mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
improve redaction process handling
Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
a0bdf605b0
commit
5dc7358219
@ -41,6 +41,7 @@ import org.springframework.web.multipart.MultipartFile;
|
|||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
import stirling.software.SPDF.model.PDFText;
|
import stirling.software.SPDF.model.PDFText;
|
||||||
import stirling.software.SPDF.model.api.security.ManualRedactPdfRequest;
|
import stirling.software.SPDF.model.api.security.ManualRedactPdfRequest;
|
||||||
@ -55,6 +56,7 @@ import stirling.software.common.service.CustomPDFDocumentFactory;
|
|||||||
import stirling.software.common.util.PdfUtils;
|
import stirling.software.common.util.PdfUtils;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
|
@Slf4j
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class RedactionService {
|
public class RedactionService {
|
||||||
|
|
||||||
@ -1383,18 +1385,32 @@ public class RedactionService {
|
|||||||
|
|
||||||
private float safeGetStringWidth(PDFont font, String text) {
|
private float safeGetStringWidth(PDFont font, String text) {
|
||||||
if (font == null || text == null || text.isEmpty()) {
|
if (font == null || text == null || text.isEmpty()) {
|
||||||
return 0;
|
return 0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
if (!WidthCalculator.isWidthCalculationReliable(font)) {
|
if (!WidthCalculator.isWidthCalculationReliable(font)) {
|
||||||
|
log.debug("Font width calculation not reliable, using conservative approach");
|
||||||
return calculateConservativeWidth(font, text);
|
return calculateConservativeWidth(font, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!TextEncodingHelper.canEncodeCharacters(font, text)) {
|
if (!TextEncodingHelper.canEncodeCharacters(font, text)) {
|
||||||
|
log.debug("Font cannot encode characters, using character-based calculation");
|
||||||
return calculateCharacterBasedWidth(font, text);
|
return calculateCharacterBasedWidth(font, text);
|
||||||
}
|
}
|
||||||
try {
|
|
||||||
return font.getStringWidth(text);
|
return font.getStringWidth(text);
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
log.debug("Standard width calculation failed, trying fallback: {}", e.getMessage());
|
||||||
|
try {
|
||||||
return calculateFallbackWidth(font, text);
|
return calculateFallbackWidth(font, text);
|
||||||
|
} catch (Exception e2) {
|
||||||
|
log.debug(
|
||||||
|
"Fallback width calculation failed, using conservative: {}",
|
||||||
|
e2.getMessage());
|
||||||
|
return calculateConservativeWidth(font, text);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1488,36 +1504,125 @@ public class RedactionService {
|
|||||||
|
|
||||||
private COSArray createRedactedTJArray(
|
private COSArray createRedactedTJArray(
|
||||||
COSArray originalArray, TextSegment segment, List<MatchRange> matches) {
|
COSArray originalArray, TextSegment segment, List<MatchRange> matches) {
|
||||||
|
|
||||||
|
if (originalArray == null || segment == null || matches == null) {
|
||||||
|
log.warn("Invalid input parameters for redaction");
|
||||||
|
return originalArray != null ? originalArray : new COSArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate critical inputs
|
||||||
|
if (matches.isEmpty()) {
|
||||||
|
return originalArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (segment.getStartPos() < 0) {
|
||||||
|
log.warn("Invalid segment start position: {}", segment.getStartPos());
|
||||||
|
return originalArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug(
|
||||||
|
"Processing redaction: segment={}, matches={}, aggressive={}",
|
||||||
|
segment,
|
||||||
|
matches.size(),
|
||||||
|
AGGRESSIVE_MODE.get());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
COSArray newArray = new COSArray();
|
COSArray newArray = new COSArray();
|
||||||
int textOffsetInSegment = 0;
|
int textOffsetInSegment = 0;
|
||||||
|
|
||||||
for (COSBase element : originalArray) {
|
for (COSBase element : originalArray) {
|
||||||
if (element instanceof COSString cosString) {
|
if (element instanceof COSString cosString) {
|
||||||
String originalText = cosString.getString();
|
try {
|
||||||
|
processStringElement(
|
||||||
|
cosString, segment, matches, newArray, textOffsetInSegment);
|
||||||
|
textOffsetInSegment += getActualStringLength(cosString, segment.getFont());
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Failed to process string element: {}", e.getMessage(), e);
|
||||||
|
newArray.add(element);
|
||||||
|
textOffsetInSegment += getActualStringLength(cosString, segment.getFont());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
newArray.add(element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return newArray;
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Critical failure in redaction process: {}", e.getMessage(), e);
|
||||||
|
return originalArray;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getActualStringLength(COSString cosString, PDFont font) {
|
||||||
|
try {
|
||||||
|
if (font == null) {
|
||||||
|
return cosString.getString().length();
|
||||||
|
}
|
||||||
|
|
||||||
|
String decodedText = TextDecodingHelper.tryDecodeWithFont(font, cosString);
|
||||||
|
if (decodedText != null) {
|
||||||
|
return decodedText.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
return cosString.getString().length();
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Failed to get actual string length, using basic length: {}", e.getMessage());
|
||||||
|
return cosString.getString().length();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void processStringElement(
|
||||||
|
COSString cosString,
|
||||||
|
TextSegment segment,
|
||||||
|
List<MatchRange> matches,
|
||||||
|
COSArray newArray,
|
||||||
|
int textOffsetInSegment)
|
||||||
|
throws Exception {
|
||||||
|
|
||||||
|
String originalText = getDecodedString(cosString, segment.getFont());
|
||||||
|
|
||||||
if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get())
|
if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get())
|
||||||
&& segment.getFont() != null
|
&& segment.getFont() != null
|
||||||
&& !TextEncodingHelper.isTextSegmentRemovable(
|
&& !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), originalText)) {
|
||||||
segment.getFont(), originalText)) {
|
newArray.add(cosString); // Keep original COSString to preserve encoding
|
||||||
newArray.add(element);
|
return;
|
||||||
textOffsetInSegment += originalText.length();
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
StringBuilder newText = new StringBuilder(originalText);
|
StringBuilder newText = new StringBuilder(originalText);
|
||||||
boolean modified = false;
|
boolean modified = false;
|
||||||
for (MatchRange match : matches) {
|
|
||||||
|
// Sort matches by start position to process them in order
|
||||||
|
List<MatchRange> sortedMatches =
|
||||||
|
matches.stream()
|
||||||
|
.sorted(Comparator.comparingInt(MatchRange::getStartPos))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
int cumulativeOffset = 0; // Track cumulative text changes
|
||||||
|
|
||||||
|
for (MatchRange match : sortedMatches) {
|
||||||
int stringStartInPage = segment.getStartPos() + textOffsetInSegment;
|
int stringStartInPage = segment.getStartPos() + textOffsetInSegment;
|
||||||
int stringEndInPage = stringStartInPage + originalText.length();
|
int stringEndInPage = stringStartInPage + originalText.length();
|
||||||
int overlapStart = Math.max(match.getStartPos(), stringStartInPage);
|
int overlapStart = Math.max(match.getStartPos(), stringStartInPage);
|
||||||
int overlapEnd = Math.min(match.getEndPos(), stringEndInPage);
|
int overlapEnd = Math.min(match.getEndPos(), stringEndInPage);
|
||||||
|
|
||||||
if (overlapStart < overlapEnd) {
|
if (overlapStart < overlapEnd) {
|
||||||
int redactionStartInString = overlapStart - stringStartInPage;
|
int redactionStartInString =
|
||||||
int redactionEndInString = overlapEnd - stringStartInPage;
|
Math.max(0, overlapStart - stringStartInPage - cumulativeOffset);
|
||||||
|
int redactionEndInString =
|
||||||
|
Math.min(
|
||||||
|
newText.length(),
|
||||||
|
overlapEnd - stringStartInPage - cumulativeOffset);
|
||||||
|
|
||||||
if (redactionStartInString >= 0
|
if (redactionStartInString >= 0
|
||||||
&& redactionEndInString <= originalText.length()) {
|
&& redactionEndInString <= newText.length()
|
||||||
|
&& redactionStartInString < redactionEndInString) {
|
||||||
|
|
||||||
String originalPart =
|
String originalPart =
|
||||||
originalText.substring(
|
originalText.substring(
|
||||||
redactionStartInString, redactionEndInString);
|
overlapStart - stringStartInPage,
|
||||||
|
overlapEnd - stringStartInPage);
|
||||||
|
|
||||||
if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get())
|
if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get())
|
||||||
&& segment.getFont() != null
|
&& segment.getFont() != null
|
||||||
&& !TextEncodingHelper.isTextSegmentRemovable(
|
&& !TextEncodingHelper.isTextSegmentRemovable(
|
||||||
@ -1526,73 +1631,123 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
modified = true;
|
modified = true;
|
||||||
if (Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) {
|
String replacement = "";
|
||||||
newText.replace(
|
|
||||||
redactionStartInString, redactionEndInString, "");
|
if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) {
|
||||||
} else {
|
replacement = createSafeReplacement(originalPart, segment);
|
||||||
float originalWidth = 0;
|
|
||||||
if (segment.getFont() != null && segment.getFontSize() > 0) {
|
|
||||||
try {
|
|
||||||
originalWidth =
|
|
||||||
safeGetStringWidth(
|
|
||||||
segment.getFont(), originalPart)
|
|
||||||
/ FONT_SCALE_FACTOR
|
|
||||||
* segment.getFontSize();
|
|
||||||
} catch (Exception ignored) {
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
newText.replace(redactionStartInString, redactionEndInString, replacement);
|
||||||
|
cumulativeOffset +=
|
||||||
|
(redactionEndInString - redactionStartInString) - replacement.length();
|
||||||
}
|
}
|
||||||
String placeholder =
|
|
||||||
(originalWidth > 0)
|
|
||||||
? createPlaceholderWithWidth(
|
|
||||||
originalPart,
|
|
||||||
originalWidth,
|
|
||||||
segment.getFont(),
|
|
||||||
segment.getFontSize())
|
|
||||||
: createPlaceholderWithFont(
|
|
||||||
originalPart, segment.getFont());
|
|
||||||
newText.replace(
|
|
||||||
redactionStartInString,
|
|
||||||
redactionEndInString,
|
|
||||||
placeholder);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
String modifiedString = newText.toString();
|
|
||||||
newArray.add(new COSString(modifiedString));
|
|
||||||
if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get())
|
|
||||||
&& modified
|
|
||||||
&& segment.getFont() != null
|
|
||||||
&& segment.getFontSize() > 0) {
|
|
||||||
try {
|
|
||||||
float originalWidth =
|
|
||||||
safeGetStringWidth(segment.getFont(), originalText)
|
|
||||||
/ FONT_SCALE_FACTOR
|
|
||||||
* segment.getFontSize();
|
|
||||||
float modifiedWidth =
|
|
||||||
safeGetStringWidth(segment.getFont(), modifiedString)
|
|
||||||
/ FONT_SCALE_FACTOR
|
|
||||||
* segment.getFontSize();
|
|
||||||
float adjustment = originalWidth - modifiedWidth;
|
|
||||||
if (Math.abs(adjustment) > PRECISION_THRESHOLD) {
|
|
||||||
float kerning =
|
|
||||||
(-adjustment / segment.getFontSize())
|
|
||||||
* FONT_SCALE_FACTOR
|
|
||||||
* 1.10f;
|
|
||||||
newArray.add(new COSFloat(kerning));
|
|
||||||
}
|
|
||||||
} catch (Exception ignored) {
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
textOffsetInSegment += originalText.length();
|
String modifiedString = newText.toString();
|
||||||
} else {
|
COSString newCosString = createCompatibleCOSString(modifiedString, cosString);
|
||||||
newArray.add(element);
|
newArray.add(newCosString);
|
||||||
|
|
||||||
|
if (modified && !Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) {
|
||||||
|
addSpacingAdjustment(newArray, segment, originalText, modifiedString);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return newArray;
|
|
||||||
|
private String getDecodedString(COSString cosString, PDFont font) {
|
||||||
|
try {
|
||||||
|
String decoded = TextDecodingHelper.tryDecodeWithFont(font, cosString);
|
||||||
|
if (decoded != null && !decoded.trim().isEmpty()) {
|
||||||
|
return decoded;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cosString.getString();
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return originalArray;
|
log.debug("Failed to decode string, using raw string: {}", e.getMessage());
|
||||||
|
return cosString.getString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String createSafeReplacement(String originalPart, TextSegment segment) {
|
||||||
|
try {
|
||||||
|
if (segment.getFont() != null && segment.getFontSize() > 0) {
|
||||||
|
float originalWidth =
|
||||||
|
calculateSafeWidth(originalPart, segment.getFont(), segment.getFontSize());
|
||||||
|
|
||||||
|
if (originalWidth > 0) {
|
||||||
|
return createPlaceholderWithWidth(
|
||||||
|
originalPart, originalWidth, segment.getFont(), segment.getFontSize());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return createPlaceholderWithFont(originalPart, segment.getFont());
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Failed to create replacement, using simple placeholder: {}", e.getMessage());
|
||||||
|
return " ".repeat(Math.max(1, originalPart.length()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private float calculateSafeWidth(String text, PDFont font, float fontSize) {
|
||||||
|
try {
|
||||||
|
if (font != null && fontSize > 0) {
|
||||||
|
float width = safeGetStringWidth(font, text);
|
||||||
|
return (width / FONT_SCALE_FACTOR) * fontSize;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Width calculation failed: {}", e.getMessage());
|
||||||
|
}
|
||||||
|
return 0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
private COSString createCompatibleCOSString(String text, COSString original) {
|
||||||
|
try {
|
||||||
|
COSString newString = new COSString(text);
|
||||||
|
|
||||||
|
if (original.getBytes().length != original.getString().length()) {
|
||||||
|
try {
|
||||||
|
byte[] originalBytes = original.getBytes();
|
||||||
|
if (originalBytes.length > 0) {
|
||||||
|
newString =
|
||||||
|
new COSString(
|
||||||
|
text.getBytes(java.nio.charset.StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Failed to preserve encoding: {}", e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return newString;
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Failed to create compatible COSString: {}", e.getMessage());
|
||||||
|
return new COSString(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addSpacingAdjustment(
|
||||||
|
COSArray newArray, TextSegment segment, String originalText, String modifiedText) {
|
||||||
|
try {
|
||||||
|
if (segment.getFont() == null || segment.getFontSize() <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
float originalWidth =
|
||||||
|
calculateSafeWidth(originalText, segment.getFont(), segment.getFontSize());
|
||||||
|
float modifiedWidth =
|
||||||
|
calculateSafeWidth(modifiedText, segment.getFont(), segment.getFontSize());
|
||||||
|
float adjustment = originalWidth - modifiedWidth;
|
||||||
|
|
||||||
|
if (Math.abs(adjustment) > PRECISION_THRESHOLD) {
|
||||||
|
float kerning = (-adjustment / segment.getFontSize()) * FONT_SCALE_FACTOR * 1.10f;
|
||||||
|
|
||||||
|
if (Math.abs(kerning) < 1000) { // Reasonable threshold
|
||||||
|
newArray.add(new COSFloat(kerning));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Failed to add spacing adjustment: {}", e.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user