Refactor utility classes to use @UtilityClass and switch to instance methods

- Applied `@UtilityClass` annotation to utility classes for cleaner, consistent usage patterns.
- Changed static methods to instance methods in utility classes for better encapsulation.
- Simplified imports and removed redundant comments for better readability.
- Minor updates in `RedactionService` to streamline text redaction logic and improve maintainability.

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-08-20 22:40:23 +02:00
parent f9d2d9bbe5
commit 8f19369c58
5 changed files with 53 additions and 46 deletions

View File

@ -1473,13 +1473,12 @@ public class RedactionService {
String originalPart = String originalPart =
originalText.substring( originalText.substring(
redactionStartInString, redactionEndInString); redactionStartInString, redactionEndInString);
if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) { if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get()) && segment.getFont() != null
if (segment.getFont() != null
&& !TextEncodingHelper.isTextSegmentRemovable( && !TextEncodingHelper.isTextSegmentRemovable(
segment.getFont(), originalPart)) { segment.getFont(), originalPart)) {
continue; continue;
} }
}
modified = true; modified = true;
if (Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) { if (Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) {
newText.replace( newText.replace(
@ -1515,8 +1514,7 @@ public class RedactionService {
} }
String modifiedString = newText.toString(); String modifiedString = newText.toString();
newArray.add(new COSString(modifiedString)); newArray.add(new COSString(modifiedString));
if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) { if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get()) && modified && segment.getFont() != null && segment.getFontSize() > 0) {
if (modified && segment.getFont() != null && segment.getFontSize() > 0) {
try { try {
float originalWidth = float originalWidth =
safeGetStringWidth(segment.getFont(), originalText) safeGetStringWidth(segment.getFont(), originalText)
@ -1537,7 +1535,7 @@ public class RedactionService {
} catch (Exception ignored) { } catch (Exception ignored) {
} }
} }
}
textOffsetInSegment += originalText.length(); textOffsetInSegment += originalText.length();
} else { } else {
newArray.add(element); newArray.add(element);

View File

@ -1,20 +1,27 @@
package stirling.software.SPDF.utils.text; package stirling.software.SPDF.utils.text;
import lombok.experimental.UtilityClass;
import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font; import org.apache.pdfbox.pdmodel.font.PDType0Font;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.StandardCharsets;
@Slf4j @Slf4j
@UtilityClass
public class TextDecodingHelper { public class TextDecodingHelper {
private static final int ASCII_LOWER_BOUND = 32; private final int ASCII_LOWER_BOUND = 32;
private static final int ASCII_UPPER_BOUND = 126; private final int ASCII_UPPER_BOUND = 126;
private static final int EXTENDED_ASCII_LOWER_BOUND = 160; private final int EXTENDED_ASCII_LOWER_BOUND = 160;
private static final int EXTENDED_ASCII_UPPER_BOUND = 255; private final int EXTENDED_ASCII_UPPER_BOUND = 255;
public static void tryDecodeWithFontEnhanced(PDFont font, COSString cosString) { public void tryDecodeWithFontEnhanced(PDFont font, COSString cosString) {
if (font == null || cosString == null) { if (font == null || cosString == null) {
return; return;
} }
@ -39,12 +46,11 @@ public class TextDecodingHelper {
try { try {
tryDecodeWithFont(font, cosString); tryDecodeWithFont(font, cosString);
} catch (Exception fallbackException) { } catch (Exception fallbackException) {
// Ultimate fallback: return hex representation for analysis
} }
} }
} }
public static String decodeCharactersEnhanced(PDFont font, byte[] bytes) { public String decodeCharactersEnhanced(PDFont font, byte[] bytes) {
StringBuilder out = new StringBuilder(); StringBuilder out = new StringBuilder();
boolean hasValidCharacters = false; boolean hasValidCharacters = false;
int i = 0; int i = 0;
@ -52,7 +58,6 @@ public class TextDecodingHelper {
int code = bytes[i] & 0xFF; int code = bytes[i] & 0xFF;
String charStr = decodeSingleCharacter(font, code, bytes); String charStr = decodeSingleCharacter(font, code, bytes);
// Heuristic for multi-byte: if high byte, try combining with next
if (charStr == null && code >= 128 && i + 1 < bytes.length) { if (charStr == null && code >= 128 && i + 1 < bytes.length) {
int combinedCode = (code << 8) | (bytes[i + 1] & 0xFF); int combinedCode = (code << 8) | (bytes[i + 1] & 0xFF);
charStr = decodeSingleCharacter(font, combinedCode, bytes); charStr = decodeSingleCharacter(font, combinedCode, bytes);
@ -76,7 +81,7 @@ public class TextDecodingHelper {
return hasValidCharacters ? result : null; return hasValidCharacters ? result : null;
} }
public static String decodeSingleCharacter(PDFont font, int code, byte[] bytes) { public String decodeSingleCharacter(PDFont font, int code, byte[] bytes) {
String charStr = null; String charStr = null;
try { try {
@ -84,11 +89,9 @@ public class TextDecodingHelper {
} catch (Exception ignored) { } catch (Exception ignored) {
} }
// Enhanced CID Font and Composite Font Handling
if (charStr == null if (charStr == null
&& font instanceof org.apache.pdfbox.pdmodel.font.PDType0Font type0Font) { && font instanceof PDType0Font type0Font) {
try { try {
// Attempt CID-specific decoding for multi-byte codes
int cid = (bytes.length > 1) ? ((bytes[0] & 0xFF) << 8) | (bytes[1] & 0xFF) : code; int cid = (bytes.length > 1) ? ((bytes[0] & 0xFF) << 8) | (bytes[1] & 0xFF) : code;
charStr = type0Font.toUnicode(cid); charStr = type0Font.toUnicode(cid);
log.debug("CID decoding successful for code {}: {}", cid, charStr); log.debug("CID decoding successful for code {}: {}", cid, charStr);
@ -108,7 +111,7 @@ public class TextDecodingHelper {
return charStr; return charStr;
} }
public static String fallbackCharacterMapping(int code, byte[] bytes, PDFont font) { public String fallbackCharacterMapping(int code, byte[] bytes, PDFont font) {
try { try {
if (font instanceof PDType0Font && bytes.length > 1) { if (font instanceof PDType0Font && bytes.length > 1) {
return null; return null;
@ -139,10 +142,10 @@ public class TextDecodingHelper {
// Fallback to UTF-8/16 decoding attempt for unknown encodings // Fallback to UTF-8/16 decoding attempt for unknown encodings
try { try {
if (bytes.length >= 2) { if (bytes.length >= 2) {
java.nio.ByteBuffer buffer = java.nio.ByteBuffer.wrap(bytes); ByteBuffer buffer = ByteBuffer.wrap(bytes);
java.nio.charset.CharsetDecoder decoder = CharsetDecoder decoder =
java.nio.charset.StandardCharsets.UTF_16BE.newDecoder(); StandardCharsets.UTF_16BE.newDecoder();
java.nio.CharBuffer charBuffer = decoder.decode(buffer); CharBuffer charBuffer = decoder.decode(buffer);
return charBuffer.toString(); return charBuffer.toString();
} }
} catch (Exception e) { } catch (Exception e) {
@ -155,7 +158,7 @@ public class TextDecodingHelper {
} }
} }
public static String mapSubsetCharacter(int code) { public String mapSubsetCharacter(int code) {
if (code >= ASCII_LOWER_BOUND && code <= ASCII_UPPER_BOUND) { if (code >= ASCII_LOWER_BOUND && code <= ASCII_UPPER_BOUND) {
return String.valueOf((char) code); return String.valueOf((char) code);
} }
@ -165,7 +168,7 @@ public class TextDecodingHelper {
return null; return null;
} }
public static String tryDecodeWithFont(PDFont font, COSString cosString) { public String tryDecodeWithFont(PDFont font, COSString cosString) {
try { try {
if (font == null || cosString == null) { if (font == null || cosString == null) {
return null; return null;
@ -194,7 +197,6 @@ public class TextDecodingHelper {
return out.toString(); return out.toString();
} }
out.setLength(0); out.setLength(0);
anyMapped = false;
for (int i = 0; i < bytes.length; ) { for (int i = 0; i < bytes.length; ) {
int b1 = bytes[i] & 0xFF; int b1 = bytes[i] & 0xFF;
String u1 = null; String u1 = null;

View File

@ -2,6 +2,7 @@ package stirling.software.SPDF.utils.text;
import java.io.IOException; import java.io.IOException;
import lombok.experimental.UtilityClass;
import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDSimpleFont; import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding; import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
@ -10,9 +11,10 @@ import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
@Slf4j @Slf4j
@UtilityClass
public class TextEncodingHelper { public class TextEncodingHelper {
public static boolean canEncodeCharacters(PDFont font, String text) { public boolean canEncodeCharacters(PDFont font, String text) {
if (font == null || text == null || text.isEmpty()) { if (font == null || text == null || text.isEmpty()) {
return false; return false;
} }
@ -51,7 +53,7 @@ public class TextEncodingHelper {
} }
} }
private static boolean validateAsCodePointArray(PDFont font, String text) { private boolean validateAsCodePointArray(PDFont font, String text) {
int totalCodePoints = 0; int totalCodePoints = 0;
int successfulCodePoints = 0; int successfulCodePoints = 0;
@ -112,7 +114,7 @@ public class TextEncodingHelper {
return isAcceptable; return isAcceptable;
} }
public static boolean isTextSegmentRemovable(PDFont font, String text) { public boolean isTextSegmentRemovable(PDFont font, String text) {
if (font == null || text == null || text.isEmpty()) { if (font == null || text == null || text.isEmpty()) {
return false; return false;
} }
@ -145,7 +147,7 @@ public class TextEncodingHelper {
return isTextFullyRemovable(font, text); return isTextFullyRemovable(font, text);
} }
public static boolean isTextFullyRemovable(PDFont font, String text) { public boolean isTextFullyRemovable(PDFont font, String text) {
if (font == null || text == null || text.isEmpty()) { if (font == null || text == null || text.isEmpty()) {
return false; return false;
} }
@ -213,7 +215,7 @@ public class TextEncodingHelper {
} }
} }
private static boolean isSimpleCharacter(String text) { private boolean isSimpleCharacter(String text) {
if (text == null || text.isEmpty()) { if (text == null || text.isEmpty()) {
return false; return false;
} }
@ -241,7 +243,7 @@ public class TextEncodingHelper {
return true; return true;
} }
public static boolean hasCustomEncoding(PDFont font) { public boolean hasCustomEncoding(PDFont font) {
try { try {
if (font instanceof PDSimpleFont simpleFont) { if (font instanceof PDSimpleFont simpleFont) {
try { try {
@ -294,7 +296,7 @@ public class TextEncodingHelper {
} }
} }
public static boolean fontSupportsCharacter(PDFont font, String character) { public boolean fontSupportsCharacter(PDFont font, String character) {
if (font == null || character == null || character.isEmpty()) { if (font == null || character == null || character.isEmpty()) {
return false; return false;
} }
@ -318,14 +320,14 @@ public class TextEncodingHelper {
} }
} }
public static boolean isFontSubset(String fontName) { public boolean isFontSubset(String fontName) {
if (fontName == null) { if (fontName == null) {
return false; return false;
} }
return fontName.matches("^[A-Z]{6}\\+.*"); return fontName.matches("^[A-Z]{6}\\+.*");
} }
public static boolean canCalculateBasicWidths(PDFont font) { public boolean canCalculateBasicWidths(PDFont font) {
try { try {
float spaceWidth = font.getStringWidth(" "); float spaceWidth = font.getStringWidth(" ");
if (spaceWidth <= 0) { if (spaceWidth <= 0) {

View File

@ -5,15 +5,18 @@ import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import lombok.experimental.UtilityClass;
import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.PDResources;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.font.PDFont;
@Slf4j @Slf4j
@UtilityClass
public class TextFinderUtils { public class TextFinderUtils {
public static boolean validateFontReliability(org.apache.pdfbox.pdmodel.font.PDFont font) { public boolean validateFontReliability(PDFont font) {
if (font == null) { if (font == null) {
return false; return false;
} }
@ -53,8 +56,8 @@ public class TextFinderUtils {
return false; return false;
} }
public static List<Pattern> createOptimizedSearchPatterns( public List<Pattern> createOptimizedSearchPatterns(
Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) { Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
List<Pattern> patterns = new ArrayList<>(); List<Pattern> patterns = new ArrayList<>();
for (String term : searchTerms) { for (String term : searchTerms) {
@ -84,7 +87,7 @@ public class TextFinderUtils {
return patterns; return patterns;
} }
private static String applyWordBoundaries(String originalTerm, String patternString) { private String applyWordBoundaries(String originalTerm, String patternString) {
if (originalTerm.length() == 1 && Character.isDigit(originalTerm.charAt(0))) { if (originalTerm.length() == 1 && Character.isDigit(originalTerm.charAt(0))) {
return "(?<![\\w])" + patternString + "(?![\\w])"; return "(?<![\\w])" + patternString + "(?![\\w])";
} else if (originalTerm.length() == 1) { } else if (originalTerm.length() == 1) {
@ -94,7 +97,7 @@ public class TextFinderUtils {
} }
} }
public static boolean hasProblematicFonts(PDPage page) { public boolean hasProblematicFonts(PDPage page) {
if (page == null) { if (page == null) {
return false; return false;
} }

View File

@ -1,16 +1,18 @@
package stirling.software.SPDF.utils.text; package stirling.software.SPDF.utils.text;
import lombok.experimental.UtilityClass;
import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDFont;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
@Slf4j @Slf4j
@UtilityClass
public class WidthCalculator { public class WidthCalculator {
private static final int FONT_SCALE_FACTOR = 1000; private final int FONT_SCALE_FACTOR = 1000;
public static float calculateAccurateWidth(PDFont font, String text, float fontSize) { public float calculateAccurateWidth(PDFont font, String text, float fontSize) {
if (font == null || text == null || text.isEmpty() || fontSize <= 0) { if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
return 0; return 0;
} }
@ -42,8 +44,8 @@ public class WidthCalculator {
} }
} }
private static float calculateWidthWithCharacterIteration( private float calculateWidthWithCharacterIteration(
PDFont font, String text, float fontSize) { PDFont font, String text, float fontSize) {
try { try {
float totalWidth = 0; float totalWidth = 0;
@ -81,7 +83,7 @@ public class WidthCalculator {
} }
} }
private static float calculateFallbackWidth(PDFont font, String text, float fontSize) { private float calculateFallbackWidth(PDFont font, String text, float fontSize) {
try { try {
if (font.getFontDescriptor() != null if (font.getFontDescriptor() != null
&& font.getFontDescriptor().getFontBoundingBox() != null) { && font.getFontDescriptor().getFontBoundingBox() != null) {
@ -111,7 +113,7 @@ public class WidthCalculator {
} }
} }
public static boolean isWidthCalculationReliable(PDFont font) { public boolean isWidthCalculationReliable(PDFont font) {
if (font == null) { if (font == null) {
return false; return false;
} }