diff --git a/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java b/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java index ce2cc513e..e9c68f36c 100644 --- a/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java +++ b/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java @@ -1473,13 +1473,12 @@ public class RedactionService { String originalPart = originalText.substring( redactionStartInString, redactionEndInString); - if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) { - if (segment.getFont() != null + if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get()) && segment.getFont() != null && !TextEncodingHelper.isTextSegmentRemovable( segment.getFont(), originalPart)) { continue; } - } + modified = true; if (Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) { newText.replace( @@ -1515,8 +1514,7 @@ public class RedactionService { } String modifiedString = newText.toString(); newArray.add(new COSString(modifiedString)); - if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) { - if (modified && segment.getFont() != null && segment.getFontSize() > 0) { + if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get()) && modified && segment.getFont() != null && segment.getFontSize() > 0) { try { float originalWidth = safeGetStringWidth(segment.getFont(), originalText) @@ -1537,7 +1535,7 @@ public class RedactionService { } catch (Exception ignored) { } } - } + textOffsetInSegment += originalText.length(); } else { newArray.add(element); diff --git a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextDecodingHelper.java b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextDecodingHelper.java index 80e7267f6..985b892ec 100644 --- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextDecodingHelper.java +++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextDecodingHelper.java @@ -1,20 +1,27 @@ package stirling.software.SPDF.utils.text; +import lombok.experimental.UtilityClass; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType0Font; import lombok.extern.slf4j.Slf4j; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.StandardCharsets; + @Slf4j +@UtilityClass public class TextDecodingHelper { - private static final int ASCII_LOWER_BOUND = 32; - private static final int ASCII_UPPER_BOUND = 126; - private static final int EXTENDED_ASCII_LOWER_BOUND = 160; - private static final int EXTENDED_ASCII_UPPER_BOUND = 255; + private final int ASCII_LOWER_BOUND = 32; + private final int ASCII_UPPER_BOUND = 126; + private final int EXTENDED_ASCII_LOWER_BOUND = 160; + private final int EXTENDED_ASCII_UPPER_BOUND = 255; - public static void tryDecodeWithFontEnhanced(PDFont font, COSString cosString) { + public void tryDecodeWithFontEnhanced(PDFont font, COSString cosString) { if (font == null || cosString == null) { return; } @@ -39,12 +46,11 @@ public class TextDecodingHelper { try { tryDecodeWithFont(font, cosString); } catch (Exception fallbackException) { - // Ultimate fallback: return hex representation for analysis } } } - public static String decodeCharactersEnhanced(PDFont font, byte[] bytes) { + public String decodeCharactersEnhanced(PDFont font, byte[] bytes) { StringBuilder out = new StringBuilder(); boolean hasValidCharacters = false; int i = 0; @@ -52,7 +58,6 @@ public class TextDecodingHelper { int code = bytes[i] & 0xFF; String charStr = decodeSingleCharacter(font, code, bytes); - // Heuristic for multi-byte: if high byte, try combining with next if (charStr == null && code >= 128 && i + 1 < bytes.length) { int combinedCode = (code << 8) | (bytes[i + 1] & 0xFF); charStr = decodeSingleCharacter(font, combinedCode, bytes); @@ -76,7 +81,7 @@ public class TextDecodingHelper { return hasValidCharacters ? result : null; } - public static String decodeSingleCharacter(PDFont font, int code, byte[] bytes) { + public String decodeSingleCharacter(PDFont font, int code, byte[] bytes) { String charStr = null; try { @@ -84,11 +89,9 @@ public class TextDecodingHelper { } catch (Exception ignored) { } - // Enhanced CID Font and Composite Font Handling if (charStr == null - && font instanceof org.apache.pdfbox.pdmodel.font.PDType0Font type0Font) { + && font instanceof PDType0Font type0Font) { try { - // Attempt CID-specific decoding for multi-byte codes int cid = (bytes.length > 1) ? ((bytes[0] & 0xFF) << 8) | (bytes[1] & 0xFF) : code; charStr = type0Font.toUnicode(cid); log.debug("CID decoding successful for code {}: {}", cid, charStr); @@ -108,7 +111,7 @@ public class TextDecodingHelper { return charStr; } - public static String fallbackCharacterMapping(int code, byte[] bytes, PDFont font) { + public String fallbackCharacterMapping(int code, byte[] bytes, PDFont font) { try { if (font instanceof PDType0Font && bytes.length > 1) { return null; @@ -139,10 +142,10 @@ public class TextDecodingHelper { // Fallback to UTF-8/16 decoding attempt for unknown encodings try { if (bytes.length >= 2) { - java.nio.ByteBuffer buffer = java.nio.ByteBuffer.wrap(bytes); - java.nio.charset.CharsetDecoder decoder = - java.nio.charset.StandardCharsets.UTF_16BE.newDecoder(); - java.nio.CharBuffer charBuffer = decoder.decode(buffer); + ByteBuffer buffer = ByteBuffer.wrap(bytes); + CharsetDecoder decoder = + StandardCharsets.UTF_16BE.newDecoder(); + CharBuffer charBuffer = decoder.decode(buffer); return charBuffer.toString(); } } catch (Exception e) { @@ -155,7 +158,7 @@ public class TextDecodingHelper { } } - public static String mapSubsetCharacter(int code) { + public String mapSubsetCharacter(int code) { if (code >= ASCII_LOWER_BOUND && code <= ASCII_UPPER_BOUND) { return String.valueOf((char) code); } @@ -165,7 +168,7 @@ public class TextDecodingHelper { return null; } - public static String tryDecodeWithFont(PDFont font, COSString cosString) { + public String tryDecodeWithFont(PDFont font, COSString cosString) { try { if (font == null || cosString == null) { return null; @@ -194,7 +197,6 @@ public class TextDecodingHelper { return out.toString(); } out.setLength(0); - anyMapped = false; for (int i = 0; i < bytes.length; ) { int b1 = bytes[i] & 0xFF; String u1 = null; diff --git a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextEncodingHelper.java b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextEncodingHelper.java index 4292e6c52..44a7fb32e 100644 --- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextEncodingHelper.java +++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextEncodingHelper.java @@ -2,6 +2,7 @@ package stirling.software.SPDF.utils.text; import java.io.IOException; +import lombok.experimental.UtilityClass; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDSimpleFont; import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding; @@ -10,9 +11,10 @@ import org.apache.pdfbox.pdmodel.font.encoding.Encoding; import lombok.extern.slf4j.Slf4j; @Slf4j +@UtilityClass public class TextEncodingHelper { - public static boolean canEncodeCharacters(PDFont font, String text) { + public boolean canEncodeCharacters(PDFont font, String text) { if (font == null || text == null || text.isEmpty()) { return false; } @@ -51,7 +53,7 @@ public class TextEncodingHelper { } } - private static boolean validateAsCodePointArray(PDFont font, String text) { + private boolean validateAsCodePointArray(PDFont font, String text) { int totalCodePoints = 0; int successfulCodePoints = 0; @@ -112,7 +114,7 @@ public class TextEncodingHelper { return isAcceptable; } - public static boolean isTextSegmentRemovable(PDFont font, String text) { + public boolean isTextSegmentRemovable(PDFont font, String text) { if (font == null || text == null || text.isEmpty()) { return false; } @@ -145,7 +147,7 @@ public class TextEncodingHelper { return isTextFullyRemovable(font, text); } - public static boolean isTextFullyRemovable(PDFont font, String text) { + public boolean isTextFullyRemovable(PDFont font, String text) { if (font == null || text == null || text.isEmpty()) { return false; } @@ -213,7 +215,7 @@ public class TextEncodingHelper { } } - private static boolean isSimpleCharacter(String text) { + private boolean isSimpleCharacter(String text) { if (text == null || text.isEmpty()) { return false; } @@ -241,7 +243,7 @@ public class TextEncodingHelper { return true; } - public static boolean hasCustomEncoding(PDFont font) { + public boolean hasCustomEncoding(PDFont font) { try { if (font instanceof PDSimpleFont simpleFont) { try { @@ -294,7 +296,7 @@ public class TextEncodingHelper { } } - public static boolean fontSupportsCharacter(PDFont font, String character) { + public boolean fontSupportsCharacter(PDFont font, String character) { if (font == null || character == null || character.isEmpty()) { return false; } @@ -318,14 +320,14 @@ public class TextEncodingHelper { } } - public static boolean isFontSubset(String fontName) { + public boolean isFontSubset(String fontName) { if (fontName == null) { return false; } return fontName.matches("^[A-Z]{6}\\+.*"); } - public static boolean canCalculateBasicWidths(PDFont font) { + public boolean canCalculateBasicWidths(PDFont font) { try { float spaceWidth = font.getStringWidth(" "); if (spaceWidth <= 0) { diff --git a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextFinderUtils.java b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextFinderUtils.java index 4c7d86abd..2977b1832 100644 --- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextFinderUtils.java +++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextFinderUtils.java @@ -5,15 +5,18 @@ import java.util.List; import java.util.Set; import java.util.regex.Pattern; +import lombok.experimental.UtilityClass; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import lombok.extern.slf4j.Slf4j; +import org.apache.pdfbox.pdmodel.font.PDFont; @Slf4j +@UtilityClass public class TextFinderUtils { - public static boolean validateFontReliability(org.apache.pdfbox.pdmodel.font.PDFont font) { + public boolean validateFontReliability(PDFont font) { if (font == null) { return false; } @@ -53,8 +56,8 @@ public class TextFinderUtils { return false; } - public static List createOptimizedSearchPatterns( - Set searchTerms, boolean useRegex, boolean wholeWordSearch) { + public List createOptimizedSearchPatterns( + Set searchTerms, boolean useRegex, boolean wholeWordSearch) { List patterns = new ArrayList<>(); for (String term : searchTerms) { @@ -84,7 +87,7 @@ public class TextFinderUtils { return patterns; } - private static String applyWordBoundaries(String originalTerm, String patternString) { + private String applyWordBoundaries(String originalTerm, String patternString) { if (originalTerm.length() == 1 && Character.isDigit(originalTerm.charAt(0))) { return "(?