mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
Refactor utility classes to use @UtilityClass
and switch to instance methods
- Applied `@UtilityClass` annotation to utility classes for cleaner, consistent usage patterns. - Changed static methods to instance methods in utility classes for better encapsulation. - Simplified imports and removed redundant comments for better readability. - Minor updates in `RedactionService` to streamline text redaction logic and improve maintainability. Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
f9d2d9bbe5
commit
8f19369c58
@ -1473,13 +1473,12 @@ public class RedactionService {
|
|||||||
String originalPart =
|
String originalPart =
|
||||||
originalText.substring(
|
originalText.substring(
|
||||||
redactionStartInString, redactionEndInString);
|
redactionStartInString, redactionEndInString);
|
||||||
if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) {
|
if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get()) && segment.getFont() != null
|
||||||
if (segment.getFont() != null
|
|
||||||
&& !TextEncodingHelper.isTextSegmentRemovable(
|
&& !TextEncodingHelper.isTextSegmentRemovable(
|
||||||
segment.getFont(), originalPart)) {
|
segment.getFont(), originalPart)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
modified = true;
|
modified = true;
|
||||||
if (Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) {
|
if (Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) {
|
||||||
newText.replace(
|
newText.replace(
|
||||||
@ -1515,8 +1514,7 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
String modifiedString = newText.toString();
|
String modifiedString = newText.toString();
|
||||||
newArray.add(new COSString(modifiedString));
|
newArray.add(new COSString(modifiedString));
|
||||||
if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get())) {
|
if (!Boolean.TRUE.equals(AGGRESSIVE_MODE.get()) && modified && segment.getFont() != null && segment.getFontSize() > 0) {
|
||||||
if (modified && segment.getFont() != null && segment.getFontSize() > 0) {
|
|
||||||
try {
|
try {
|
||||||
float originalWidth =
|
float originalWidth =
|
||||||
safeGetStringWidth(segment.getFont(), originalText)
|
safeGetStringWidth(segment.getFont(), originalText)
|
||||||
@ -1537,7 +1535,7 @@ public class RedactionService {
|
|||||||
} catch (Exception ignored) {
|
} catch (Exception ignored) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
textOffsetInSegment += originalText.length();
|
textOffsetInSegment += originalText.length();
|
||||||
} else {
|
} else {
|
||||||
newArray.add(element);
|
newArray.add(element);
|
||||||
|
@ -1,20 +1,27 @@
|
|||||||
package stirling.software.SPDF.utils.text;
|
package stirling.software.SPDF.utils.text;
|
||||||
|
|
||||||
|
import lombok.experimental.UtilityClass;
|
||||||
import org.apache.pdfbox.cos.COSString;
|
import org.apache.pdfbox.cos.COSString;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.CharBuffer;
|
||||||
|
import java.nio.charset.CharsetDecoder;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
|
@UtilityClass
|
||||||
public class TextDecodingHelper {
|
public class TextDecodingHelper {
|
||||||
|
|
||||||
private static final int ASCII_LOWER_BOUND = 32;
|
private final int ASCII_LOWER_BOUND = 32;
|
||||||
private static final int ASCII_UPPER_BOUND = 126;
|
private final int ASCII_UPPER_BOUND = 126;
|
||||||
private static final int EXTENDED_ASCII_LOWER_BOUND = 160;
|
private final int EXTENDED_ASCII_LOWER_BOUND = 160;
|
||||||
private static final int EXTENDED_ASCII_UPPER_BOUND = 255;
|
private final int EXTENDED_ASCII_UPPER_BOUND = 255;
|
||||||
|
|
||||||
public static void tryDecodeWithFontEnhanced(PDFont font, COSString cosString) {
|
public void tryDecodeWithFontEnhanced(PDFont font, COSString cosString) {
|
||||||
if (font == null || cosString == null) {
|
if (font == null || cosString == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -39,12 +46,11 @@ public class TextDecodingHelper {
|
|||||||
try {
|
try {
|
||||||
tryDecodeWithFont(font, cosString);
|
tryDecodeWithFont(font, cosString);
|
||||||
} catch (Exception fallbackException) {
|
} catch (Exception fallbackException) {
|
||||||
// Ultimate fallback: return hex representation for analysis
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String decodeCharactersEnhanced(PDFont font, byte[] bytes) {
|
public String decodeCharactersEnhanced(PDFont font, byte[] bytes) {
|
||||||
StringBuilder out = new StringBuilder();
|
StringBuilder out = new StringBuilder();
|
||||||
boolean hasValidCharacters = false;
|
boolean hasValidCharacters = false;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
@ -52,7 +58,6 @@ public class TextDecodingHelper {
|
|||||||
int code = bytes[i] & 0xFF;
|
int code = bytes[i] & 0xFF;
|
||||||
String charStr = decodeSingleCharacter(font, code, bytes);
|
String charStr = decodeSingleCharacter(font, code, bytes);
|
||||||
|
|
||||||
// Heuristic for multi-byte: if high byte, try combining with next
|
|
||||||
if (charStr == null && code >= 128 && i + 1 < bytes.length) {
|
if (charStr == null && code >= 128 && i + 1 < bytes.length) {
|
||||||
int combinedCode = (code << 8) | (bytes[i + 1] & 0xFF);
|
int combinedCode = (code << 8) | (bytes[i + 1] & 0xFF);
|
||||||
charStr = decodeSingleCharacter(font, combinedCode, bytes);
|
charStr = decodeSingleCharacter(font, combinedCode, bytes);
|
||||||
@ -76,7 +81,7 @@ public class TextDecodingHelper {
|
|||||||
return hasValidCharacters ? result : null;
|
return hasValidCharacters ? result : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String decodeSingleCharacter(PDFont font, int code, byte[] bytes) {
|
public String decodeSingleCharacter(PDFont font, int code, byte[] bytes) {
|
||||||
String charStr = null;
|
String charStr = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -84,11 +89,9 @@ public class TextDecodingHelper {
|
|||||||
} catch (Exception ignored) {
|
} catch (Exception ignored) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enhanced CID Font and Composite Font Handling
|
|
||||||
if (charStr == null
|
if (charStr == null
|
||||||
&& font instanceof org.apache.pdfbox.pdmodel.font.PDType0Font type0Font) {
|
&& font instanceof PDType0Font type0Font) {
|
||||||
try {
|
try {
|
||||||
// Attempt CID-specific decoding for multi-byte codes
|
|
||||||
int cid = (bytes.length > 1) ? ((bytes[0] & 0xFF) << 8) | (bytes[1] & 0xFF) : code;
|
int cid = (bytes.length > 1) ? ((bytes[0] & 0xFF) << 8) | (bytes[1] & 0xFF) : code;
|
||||||
charStr = type0Font.toUnicode(cid);
|
charStr = type0Font.toUnicode(cid);
|
||||||
log.debug("CID decoding successful for code {}: {}", cid, charStr);
|
log.debug("CID decoding successful for code {}: {}", cid, charStr);
|
||||||
@ -108,7 +111,7 @@ public class TextDecodingHelper {
|
|||||||
return charStr;
|
return charStr;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String fallbackCharacterMapping(int code, byte[] bytes, PDFont font) {
|
public String fallbackCharacterMapping(int code, byte[] bytes, PDFont font) {
|
||||||
try {
|
try {
|
||||||
if (font instanceof PDType0Font && bytes.length > 1) {
|
if (font instanceof PDType0Font && bytes.length > 1) {
|
||||||
return null;
|
return null;
|
||||||
@ -139,10 +142,10 @@ public class TextDecodingHelper {
|
|||||||
// Fallback to UTF-8/16 decoding attempt for unknown encodings
|
// Fallback to UTF-8/16 decoding attempt for unknown encodings
|
||||||
try {
|
try {
|
||||||
if (bytes.length >= 2) {
|
if (bytes.length >= 2) {
|
||||||
java.nio.ByteBuffer buffer = java.nio.ByteBuffer.wrap(bytes);
|
ByteBuffer buffer = ByteBuffer.wrap(bytes);
|
||||||
java.nio.charset.CharsetDecoder decoder =
|
CharsetDecoder decoder =
|
||||||
java.nio.charset.StandardCharsets.UTF_16BE.newDecoder();
|
StandardCharsets.UTF_16BE.newDecoder();
|
||||||
java.nio.CharBuffer charBuffer = decoder.decode(buffer);
|
CharBuffer charBuffer = decoder.decode(buffer);
|
||||||
return charBuffer.toString();
|
return charBuffer.toString();
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
@ -155,7 +158,7 @@ public class TextDecodingHelper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String mapSubsetCharacter(int code) {
|
public String mapSubsetCharacter(int code) {
|
||||||
if (code >= ASCII_LOWER_BOUND && code <= ASCII_UPPER_BOUND) {
|
if (code >= ASCII_LOWER_BOUND && code <= ASCII_UPPER_BOUND) {
|
||||||
return String.valueOf((char) code);
|
return String.valueOf((char) code);
|
||||||
}
|
}
|
||||||
@ -165,7 +168,7 @@ public class TextDecodingHelper {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String tryDecodeWithFont(PDFont font, COSString cosString) {
|
public String tryDecodeWithFont(PDFont font, COSString cosString) {
|
||||||
try {
|
try {
|
||||||
if (font == null || cosString == null) {
|
if (font == null || cosString == null) {
|
||||||
return null;
|
return null;
|
||||||
@ -194,7 +197,6 @@ public class TextDecodingHelper {
|
|||||||
return out.toString();
|
return out.toString();
|
||||||
}
|
}
|
||||||
out.setLength(0);
|
out.setLength(0);
|
||||||
anyMapped = false;
|
|
||||||
for (int i = 0; i < bytes.length; ) {
|
for (int i = 0; i < bytes.length; ) {
|
||||||
int b1 = bytes[i] & 0xFF;
|
int b1 = bytes[i] & 0xFF;
|
||||||
String u1 = null;
|
String u1 = null;
|
||||||
|
@ -2,6 +2,7 @@ package stirling.software.SPDF.utils.text;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import lombok.experimental.UtilityClass;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
|
import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
|
||||||
import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
|
import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
|
||||||
@ -10,9 +11,10 @@ import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
|
|||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
|
@UtilityClass
|
||||||
public class TextEncodingHelper {
|
public class TextEncodingHelper {
|
||||||
|
|
||||||
public static boolean canEncodeCharacters(PDFont font, String text) {
|
public boolean canEncodeCharacters(PDFont font, String text) {
|
||||||
if (font == null || text == null || text.isEmpty()) {
|
if (font == null || text == null || text.isEmpty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -51,7 +53,7 @@ public class TextEncodingHelper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean validateAsCodePointArray(PDFont font, String text) {
|
private boolean validateAsCodePointArray(PDFont font, String text) {
|
||||||
int totalCodePoints = 0;
|
int totalCodePoints = 0;
|
||||||
int successfulCodePoints = 0;
|
int successfulCodePoints = 0;
|
||||||
|
|
||||||
@ -112,7 +114,7 @@ public class TextEncodingHelper {
|
|||||||
return isAcceptable;
|
return isAcceptable;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isTextSegmentRemovable(PDFont font, String text) {
|
public boolean isTextSegmentRemovable(PDFont font, String text) {
|
||||||
if (font == null || text == null || text.isEmpty()) {
|
if (font == null || text == null || text.isEmpty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -145,7 +147,7 @@ public class TextEncodingHelper {
|
|||||||
return isTextFullyRemovable(font, text);
|
return isTextFullyRemovable(font, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isTextFullyRemovable(PDFont font, String text) {
|
public boolean isTextFullyRemovable(PDFont font, String text) {
|
||||||
if (font == null || text == null || text.isEmpty()) {
|
if (font == null || text == null || text.isEmpty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -213,7 +215,7 @@ public class TextEncodingHelper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isSimpleCharacter(String text) {
|
private boolean isSimpleCharacter(String text) {
|
||||||
if (text == null || text.isEmpty()) {
|
if (text == null || text.isEmpty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -241,7 +243,7 @@ public class TextEncodingHelper {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean hasCustomEncoding(PDFont font) {
|
public boolean hasCustomEncoding(PDFont font) {
|
||||||
try {
|
try {
|
||||||
if (font instanceof PDSimpleFont simpleFont) {
|
if (font instanceof PDSimpleFont simpleFont) {
|
||||||
try {
|
try {
|
||||||
@ -294,7 +296,7 @@ public class TextEncodingHelper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean fontSupportsCharacter(PDFont font, String character) {
|
public boolean fontSupportsCharacter(PDFont font, String character) {
|
||||||
if (font == null || character == null || character.isEmpty()) {
|
if (font == null || character == null || character.isEmpty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -318,14 +320,14 @@ public class TextEncodingHelper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isFontSubset(String fontName) {
|
public boolean isFontSubset(String fontName) {
|
||||||
if (fontName == null) {
|
if (fontName == null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return fontName.matches("^[A-Z]{6}\\+.*");
|
return fontName.matches("^[A-Z]{6}\\+.*");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean canCalculateBasicWidths(PDFont font) {
|
public boolean canCalculateBasicWidths(PDFont font) {
|
||||||
try {
|
try {
|
||||||
float spaceWidth = font.getStringWidth(" ");
|
float spaceWidth = font.getStringWidth(" ");
|
||||||
if (spaceWidth <= 0) {
|
if (spaceWidth <= 0) {
|
||||||
|
@ -5,15 +5,18 @@ import java.util.List;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import lombok.experimental.UtilityClass;
|
||||||
import org.apache.pdfbox.pdmodel.PDPage;
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
import org.apache.pdfbox.pdmodel.PDResources;
|
import org.apache.pdfbox.pdmodel.PDResources;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
|
@UtilityClass
|
||||||
public class TextFinderUtils {
|
public class TextFinderUtils {
|
||||||
|
|
||||||
public static boolean validateFontReliability(org.apache.pdfbox.pdmodel.font.PDFont font) {
|
public boolean validateFontReliability(PDFont font) {
|
||||||
if (font == null) {
|
if (font == null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -53,8 +56,8 @@ public class TextFinderUtils {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<Pattern> createOptimizedSearchPatterns(
|
public List<Pattern> createOptimizedSearchPatterns(
|
||||||
Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
|
Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
|
||||||
List<Pattern> patterns = new ArrayList<>();
|
List<Pattern> patterns = new ArrayList<>();
|
||||||
|
|
||||||
for (String term : searchTerms) {
|
for (String term : searchTerms) {
|
||||||
@ -84,7 +87,7 @@ public class TextFinderUtils {
|
|||||||
return patterns;
|
return patterns;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String applyWordBoundaries(String originalTerm, String patternString) {
|
private String applyWordBoundaries(String originalTerm, String patternString) {
|
||||||
if (originalTerm.length() == 1 && Character.isDigit(originalTerm.charAt(0))) {
|
if (originalTerm.length() == 1 && Character.isDigit(originalTerm.charAt(0))) {
|
||||||
return "(?<![\\w])" + patternString + "(?![\\w])";
|
return "(?<![\\w])" + patternString + "(?![\\w])";
|
||||||
} else if (originalTerm.length() == 1) {
|
} else if (originalTerm.length() == 1) {
|
||||||
@ -94,7 +97,7 @@ public class TextFinderUtils {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean hasProblematicFonts(PDPage page) {
|
public boolean hasProblematicFonts(PDPage page) {
|
||||||
if (page == null) {
|
if (page == null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1,16 +1,18 @@
|
|||||||
package stirling.software.SPDF.utils.text;
|
package stirling.software.SPDF.utils.text;
|
||||||
|
|
||||||
|
import lombok.experimental.UtilityClass;
|
||||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
|
@UtilityClass
|
||||||
public class WidthCalculator {
|
public class WidthCalculator {
|
||||||
|
|
||||||
private static final int FONT_SCALE_FACTOR = 1000;
|
private final int FONT_SCALE_FACTOR = 1000;
|
||||||
|
|
||||||
public static float calculateAccurateWidth(PDFont font, String text, float fontSize) {
|
public float calculateAccurateWidth(PDFont font, String text, float fontSize) {
|
||||||
if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
|
if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -42,8 +44,8 @@ public class WidthCalculator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static float calculateWidthWithCharacterIteration(
|
private float calculateWidthWithCharacterIteration(
|
||||||
PDFont font, String text, float fontSize) {
|
PDFont font, String text, float fontSize) {
|
||||||
try {
|
try {
|
||||||
float totalWidth = 0;
|
float totalWidth = 0;
|
||||||
|
|
||||||
@ -81,7 +83,7 @@ public class WidthCalculator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static float calculateFallbackWidth(PDFont font, String text, float fontSize) {
|
private float calculateFallbackWidth(PDFont font, String text, float fontSize) {
|
||||||
try {
|
try {
|
||||||
if (font.getFontDescriptor() != null
|
if (font.getFontDescriptor() != null
|
||||||
&& font.getFontDescriptor().getFontBoundingBox() != null) {
|
&& font.getFontDescriptor().getFontBoundingBox() != null) {
|
||||||
@ -111,7 +113,7 @@ public class WidthCalculator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isWidthCalculationReliable(PDFont font) {
|
public boolean isWidthCalculationReliable(PDFont font) {
|
||||||
if (font == null) {
|
if (font == null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user