enhance text handling and encoding validation

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
2025-09-08 17:51:20 +02:00 · 2025-08-24 16:59:09 +02:00 · 2025-08-24 16:59:09 +02:00 · 7db58ad6dd
commit 7db58ad6dd
parent e396b6cbb8
6 changed files with 1914 additions and 1119 deletions
--- a/app/core/src/main/java/stirling/software/SPDF/pdf/TextFinder.java
+++ b/app/core/src/main/java/stirling/software/SPDF/pdf/TextFinder.java
@ -6,23 +6,20 @@ import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import lombok.Getter;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.pdfbox.text.TextPosition;
-import lombok.extern.slf4j.Slf4j;
+import lombok.Getter;
 import stirling.software.SPDF.model.PDFText;
@Slf4j
 public class TextFinder extends PDFTextStripper {
    private final String searchTerm;
    private final boolean useRegex;
    private final boolean wholeWordSearch;
-    @Getter
+    @Getter private final List<PDFText> foundTexts = new ArrayList<>();
    private final List<PDFText> foundTexts = new ArrayList<>();
    private final List<TextPosition> pageTextPositions = new ArrayList<>();
    private final StringBuilder pageTextBuilder = new StringBuilder();
@ -45,20 +42,39 @@ public class TextFinder extends PDFTextStripper {
    @Override
    protected void writeString(String text, List<TextPosition> textPositions) {
-        pageTextBuilder.append(text);
+        for (TextPosition tp : textPositions) {
-        pageTextPositions.addAll(textPositions);
+            if (tp == null) continue;
            String u = tp.getUnicode();
            if (u == null) continue;
            for (int i = 0; i < u.length(); ) {
                int cp = u.codePointAt(i);
                pageTextBuilder.append(Character.toChars(cp));
                // Add one position per code unit appended (1-2 chars depending on surrogate)
                int codeUnits = Character.charCount(cp);
                for (int k = 0; k < codeUnits; k++) {
                    pageTextPositions.add(tp);
                }
                i += codeUnits;
            }
        }
    }
    @Override
    protected void writeWordSeparator() {
-        pageTextBuilder.append(getWordSeparator());
+        String sep = getWordSeparator();
-        pageTextPositions.add(null); // Placeholder for separator
+        pageTextBuilder.append(sep);
        for (int i = 0; i < sep.length(); i++) {
            pageTextPositions.add(null);
        }
    }
    @Override
    protected void writeLineSeparator() {
-        pageTextBuilder.append(getLineSeparator());
+        String sep = getLineSeparator();
-        pageTextPositions.add(null); // Placeholder for separator
+        pageTextBuilder.append(sep);
        for (int i = 0; i < sep.length(); i++) {
            pageTextPositions.add(null);
        }
    }
    @Override
@ -91,27 +107,10 @@ public class TextFinder extends PDFTextStripper {
        Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
        Matcher matcher = pattern.matcher(text);
        log.debug(
                "Searching for '{}' in page {} with regex '{}' (wholeWord: {}, useRegex: {})",
                processedSearchTerm,
                getCurrentPageNo(),
                regex,
                wholeWordSearch,
                useRegex);
        int matchCount = 0;
        while (matcher.find()) {
            matchCount++;
            int matchStart = matcher.start();
            int matchEnd = matcher.end();
            log.debug(
                    "Found match #{} at positions {}-{}: '{}'",
                    matchCount,
                    matchStart,
                    matchEnd,
                    matcher.group());
            float minX = Float.MAX_VALUE;
            float minY = Float.MAX_VALUE;
            float maxX = Float.MIN_VALUE;
@ -119,13 +118,7 @@ public class TextFinder extends PDFTextStripper {
            boolean foundPosition = false;
            for (int i = matchStart; i < matchEnd; i++) {
-                if (i >= pageTextPositions.size()) {
+                if (i >= pageTextPositions.size()) continue;
                    log.debug(
                            "Position index {} exceeds available positions ({})",
                            i,
                            pageTextPositions.size());
                    continue;
                }
                TextPosition pos = pageTextPositions.get(i);
                if (pos != null) {
                    foundPosition = true;
@ -137,11 +130,6 @@ public class TextFinder extends PDFTextStripper {
            }
            if (!foundPosition && matchStart < pageTextPositions.size()) {
                log.debug(
                        "Attempting to find nearby positions for match at {}-{}",
                        matchStart,
                        matchEnd);
                for (int i = Math.max(0, matchStart - 5);
                        i < Math.min(pageTextPositions.size(), matchEnd + 5);
                        i++) {
@ -166,29 +154,11 @@ public class TextFinder extends PDFTextStripper {
                                maxX,
                                maxY,
                                matcher.group()));
                log.debug(
                        "Added PDFText for match: page={}, bounds=({},{},{},{}), text='{}'",
                        getCurrentPageNo() - 1,
                        minX,
                        minY,
                        maxX,
                        maxY,
                        matcher.group());
            } else {
-                log.warn(
+                // no position info
                        "Found text match '{}' but no valid position data at {}-{}",
                        matcher.group(),
                        matchStart,
                        matchEnd);
            }
        }
        log.debug(
                "Page {} search complete: found {} matches for '{}'",
                getCurrentPageNo(),
                matchCount,
                processedSearchTerm);
        super.endPage(page);
    }
--- a/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java
+++ b/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextDecodingHelper.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextDecodingHelper.java
@ -2,6 +2,7 @@ package stirling.software.SPDF.utils.text;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
@ -13,11 +14,9 @@ import org.apache.pdfbox.pdmodel.PDResources;
 import org.apache.pdfbox.pdmodel.font.*;
 import lombok.experimental.UtilityClass;
 import lombok.extern.slf4j.Slf4j;
 import stirling.software.SPDF.service.RedactionService;
@Slf4j
@UtilityClass
 public class TextDecodingHelper {
@ -25,6 +24,8 @@ public class TextDecodingHelper {
    private final int ASCII_UPPER_BOUND = 126;
    private final int EXTENDED_ASCII_LOWER_BOUND = 160;
    private final int EXTENDED_ASCII_UPPER_BOUND = 255;
    private final int PROBLEMATIC_CODE_LOWER_BOUND = 65488;
    private final int PROBLEMATIC_CODE_UPPER_BOUND = 65535;
    public PDFont getFontSafely(PDResources resources, COSName fontName) {
        if (resources == null || fontName == null) {
@ -33,27 +34,15 @@ public class TextDecodingHelper {
        try {
            PDFont font = resources.getFont(fontName);
-            if (font == null) {
+            if (font == null) return null;
                return null;
            }
            try {
-                String fontNameCheck = font.getName();
+                String n = font.getName();
-                if (fontNameCheck == null || fontNameCheck.trim().isEmpty()) {
+                if (n == null || n.trim().isEmpty()) return null;
                    log.debug("Font {} has null or empty name, skipping", fontName.getName());
                    return null;
                }
            } catch (Exception e) {
                log.debug(
                        "Error accessing font name for {}, skipping: {}",
                        fontName.getName(),
                        e.getMessage());
                return null;
            }
            return font;
        } catch (Exception e) {
            log.debug("Error retrieving font {}: {}", fontName.getName(), e.getMessage());
            return null;
        }
    }
@ -65,90 +54,160 @@ public class TextDecodingHelper {
        try {
            byte[] bytes = cosString.getBytes();
-            if (bytes.length == 0) {
+            if (bytes.length == 0) return;
                return;
            }
            String basicDecoded = tryDecodeWithFont(font, cosString);
            if (basicDecoded != null
                    && !basicDecoded.contains("?")
-                    && !basicDecoded.trim().isEmpty()) {
+                    && !basicDecoded.trim().isEmpty()) return;
                return;
            }
            decodeCharactersEnhanced(font, bytes);
        } catch (Exception e) {
            log.error("Decoding failed: {}", e.getMessage(), e);
            try {
                tryDecodeWithFont(font, cosString);
-            } catch (Exception fallbackException) {
+            } catch (Exception ignored) {
            }
        }
    }
    public String decodeCharactersEnhanced(PDFont font, byte[] bytes) {
        // Try font-guided decoding first
        String fontPass = decodeByFontTables(font, bytes);
        if (isAcceptable(fontPass)) return fontPass;
        // Try UTF-8 strict decoding
        String utf8 = tryDecodeCharset(bytes, StandardCharsets.UTF_8);
        if (isAcceptable(utf8)) return utf8;
        // UTF-16 BE/LE
        String u16be = tryDecodeCharset(bytes, StandardCharsets.UTF_16BE);
        if (isAcceptable(u16be)) return u16be;
        String u16le = tryDecodeCharset(bytes, StandardCharsets.UTF_16LE);
        if (isAcceptable(u16le)) return u16le;
        // Common Windows encodings
        String win1252 = tryDecodeCharset(bytes, Charset.forName("windows-1252"));
        if (isAcceptable(win1252)) return win1252;
        String win1250 = tryDecodeCharset(bytes, Charset.forName("windows-1250"));
        if (isAcceptable(win1250)) return win1250;
        String gb2312 = tryDecodeCharset(bytes, Charset.forName("GB2312"));
        if (isAcceptable(gb2312)) return gb2312;
        String big5 = tryDecodeCharset(bytes, Charset.forName("Big5"));
        if (isAcceptable(big5)) return big5;
        String shiftJis = tryDecodeCharset(bytes, Charset.forName("Shift_JIS"));
        if (isAcceptable(shiftJis)) return shiftJis;
        String euckr = tryDecodeCharset(bytes, Charset.forName("EUC-KR"));
        if (isAcceptable(euckr)) return euckr;
        // Fallback to ISO-8859-1
        String latin1 = tryDecodeCharset(bytes, StandardCharsets.ISO_8859_1);
        return isAcceptable(latin1) ? latin1 : null;
    }
    private String decodeByFontTables(PDFont font, byte[] bytes) {
        if (font == null || bytes == null || bytes.length == 0) return null;
        StringBuilder out = new StringBuilder();
        boolean hasValidCharacters = false;
        int i = 0;
        while (i < bytes.length) {
-            int code = bytes[i] & 0xFF;
+            String ch = null;
-            String charStr = decodeSingleCharacter(font, code, bytes);
+            int consumed = 1;
            try {
                ch = tryToUnicode(font, bytes, i);
                if (ch == null && i + 1 < bytes.length) {
                    consumed = 2;
                    ch = tryToUnicode(font, bytes, i, 2);
                }
            } catch (Exception ignored) {
            }
            if (!isPrintable(ch)) {
                // Handle problematic character codes specifically
                ch = "<EFBFBD>";
            }
            out.append(ch);
            i += consumed;
        }
        String s = out.toString();
        return isAcceptable(s) ? s : null;
    }
-            if (charStr == null && code >= 128 && i + 1 < bytes.length) {
+    private String tryToUnicode(PDFont font, byte[] bytes, int pos) {
-                int combinedCode = (code << 8) | (bytes[i + 1] & 0xFF);
+        int code = bytes[pos] & 0xFF;
-                charStr = decodeSingleCharacter(font, combinedCode, bytes);
+        try {
-                if (charStr != null) {
+            return font.toUnicode(code);
-                    i += 2; // Skip the next byte
+        } catch (Exception e) {
-                    out.append(charStr);
+            return null;
                    hasValidCharacters = true;
                    continue;
        }
    }
-            if (charStr != null && !charStr.isEmpty()) {
+    private String tryToUnicode(PDFont font, byte[] bytes, int pos, int len) {
-                out.append(charStr);
+        if (pos + len - 1 >= bytes.length) return null;
-                hasValidCharacters = true;
+        int code = 0;
-            } else {
+        for (int j = 0; j < len; j++) code = (code << 8) | (bytes[pos + j] & 0xFF);
-                out.append('?');
+        try {
            return font.toUnicode(code);
        } catch (Exception e) {
            return null;
        }
            i++;
    }
-        String result = out.toString();
+
-        return hasValidCharacters ? result : null;
+    private String tryDecodeCharset(byte[] bytes, Charset cs) {
        try {
            String s = new String(bytes, cs);
            return isPrintable(s) ? s : null;
        } catch (Exception e) {
            return null;
        }
    }
    private boolean isPrintable(String s) {
        if (s == null || s.isEmpty()) return false;
        int printable = 0;
        for (int i = 0; i < s.length(); ) {
            int cp = s.codePointAt(i);
            int type = Character.getType(cp);
            if (type != Character.CONTROL && type != Character.FORMAT && cp != 0xFFFD) printable++;
            i += Character.charCount(cp);
        }
        return printable >= Math.max(1, s.codePointCount(0, s.length()) * 3 / 4);
    }
    private boolean isAcceptable(String s) {
        return isPrintable(s);
    }
    public String decodeSingleCharacter(PDFont font, int code, byte[] bytes) {
        String charStr = null;
        try {
            charStr = font.toUnicode(code);
        } catch (Exception ignored) {
        }
        if (charStr == null && font instanceof PDType0Font type0Font) {
            try {
                int cid = (bytes.length > 1) ? ((bytes[0] & 0xFF) << 8) | (bytes[1] & 0xFF) : code;
                charStr = type0Font.toUnicode(cid);
-                log.debug("CID decoding successful for code {}: {}", cid, charStr);
+            } catch (Exception ignored) {
            } catch (Exception e) {
                log.debug("CID decoding failed for code {}: {}", code, e.getMessage());
            }
        }
        if (charStr == null && font.getName() != null && font.getName().contains("+")) {
            charStr = mapSubsetCharacter(code);
        }
        if (charStr == null) {
            charStr = fallbackCharacterMapping(code, bytes, font);
        }
        return charStr;
    }
    public String fallbackCharacterMapping(int code, byte[] bytes, PDFont font) {
        try {
            // Handle problematic high-range character codes that cause .notdef warnings
            if (code >= PROBLEMATIC_CODE_LOWER_BOUND && code <= PROBLEMATIC_CODE_UPPER_BOUND) {
                return handleProblematicCharacterCode(code, font);
            }
            if (font instanceof PDType0Font && bytes.length > 1) {
                return null;
            }
@ -164,18 +223,15 @@ public class TextDecodingHelper {
            String fontName = font.getName();
            if (fontName != null) {
                String lowerName = fontName.toLowerCase();
-                if (lowerName.contains("cjk")
+                if ((lowerName.contains("cjk")
                                || lowerName.contains("gb")
-                        || lowerName.contains("jp")) {
+                                || lowerName.contains("jp"))
-                    // Basic CJK fallback (expand with a lookup table if needed)
+                        && code >= 0x4E00
-                    if (code >= 0x4E00 && code <= 0x9FFF) {
+                        && code <= 0x9FFF) {
-                        return String.valueOf(
+                    return String.valueOf((char) code);
                                (char) code); // Unicode Basic Multilingual Plane for CJK
                    }
                }
            }
            // Fallback to UTF-8/16 decoding attempt for unknown encodings
            try {
                if (bytes.length >= 2) {
                    ByteBuffer buffer = ByteBuffer.wrap(bytes);
@ -184,7 +240,7 @@ public class TextDecodingHelper {
                    return charBuffer.toString();
                }
            } catch (Exception e) {
-                log.debug("UTF fallback failed: {}", e.getMessage());
+
            }
            return null;
@ -193,6 +249,19 @@ public class TextDecodingHelper {
        }
    }
    public String handleProblematicCharacterCode(int code, PDFont font) {
        if (code >= PROBLEMATIC_CODE_LOWER_BOUND && code <= PROBLEMATIC_CODE_UPPER_BOUND) {
            int adjustedCode = code - PROBLEMATIC_CODE_LOWER_BOUND;
            if (adjustedCode >= ASCII_LOWER_BOUND) {
                return String.valueOf((char) adjustedCode);
            }
            if (font != null && font.getName() != null && font.getName().contains("+")) {
                return mapSubsetCharacter(adjustedCode);
            }
        }
        return "<EFBFBD>";
    }
    public String mapSubsetCharacter(int code) {
        if (code >= ASCII_LOWER_BOUND && code <= ASCII_UPPER_BOUND) {
            return String.valueOf((char) code);
@ -221,6 +290,7 @@ public class TextDecodingHelper {
                    uni = font.toUnicode(code);
                } catch (Exception ignored) {
                }
                if (uni != null) {
                    out.append(uni);
                    anyMapped = true;
@ -239,6 +309,7 @@ public class TextDecodingHelper {
                    u1 = font.toUnicode(b1);
                } catch (Exception ignored) {
                }
                if (i + 1 < bytes.length) {
                    int b2 = bytes[i + 1] & 0xFF;
                    int code = (b1 << 8) | b2;
@ -247,6 +318,12 @@ public class TextDecodingHelper {
                        u2 = font.toUnicode(code);
                    } catch (Exception ignored) {
                    }
                    // Handle problematic multi-byte codes
                    if (u2 == null && code >= PROBLEMATIC_CODE_LOWER_BOUND) {
                        u2 = handleProblematicCharacterCode(code, font);
                    }
                    if (u2 != null) {
                        out.append(u2);
                        i += 2;
@ -267,12 +344,12 @@ public class TextDecodingHelper {
        }
    }
-    public static RedactionService.DecodedMapping buildDecodeMapping(PDFont font, byte[] bytes) {
+    public RedactionService.DecodedMapping buildDecodeMapping(PDFont font, byte[] bytes) {
        RedactionService.DecodedMapping map = new RedactionService.DecodedMapping();
        if (font == null || bytes == null) {
-            map.text = "";
+            map.setText("");
-            map.charByteStart = new int[0];
+            map.setCharByteStart(new int[0]);
-            map.charByteEnd = new int[0];
+            map.setCharByteEnd(new int[0]);
            return map;
        }
@ -289,46 +366,32 @@ public class TextDecodingHelper {
        while (i < bytes.length) {
            int start = i;
-            String decodedChar = null;
+            String decodedChar;
-            int consumed = 1;
+            int consumed;
            try {
                if (isType0) {
                    // Handle CID fonts and multi-byte encodings
                    decodedChar = decodeType0Font((PDType0Font) font, bytes, i);
                    consumed = getType0CharLength((PDType0Font) font, bytes, i);
                } else if (isType1) {
                    // Handle Type1 fonts with specific encoding
                    decodedChar = decodeType1Font((PDType1Font) font, bytes, i);
-                    consumed = getType1CharLength((PDType1Font) font, bytes, i);
+                    consumed = 1;
                } else if (isType3) {
                    // Handle Type3 bitmap fonts
                    decodedChar = decodeType3Font((PDType3Font) font, bytes, i);
-                    consumed = 1; // Type3 typically single byte
+                    consumed = 1;
                } else if (isTrueType) {
                    // Handle TrueType fonts
                    decodedChar = decodeTrueTypeFont((PDTrueTypeFont) font, bytes, i);
                    consumed = getTrueTypeCharLength((PDTrueTypeFont) font, bytes, i);
                } else {
                    // Generic fallback for other font types
                    decodedChar = decodeGenericFont(font, bytes, i);
                    consumed = getGenericCharLength(font, bytes, i);
                }
                // Validate the consumed length
                if (consumed <= 0 || i + consumed > bytes.length) {
                    consumed = 1;
                }
-
+                if (consumed <= 0 || i + consumed > bytes.length) consumed = 1;
            } catch (Exception e) {
                // Log the error for debugging purposes
                System.err.println(
                        "Error decoding character at position " + i + ": " + e.getMessage());
                decodedChar = null;
                consumed = 1;
            }
            // Handle null or empty decoded characters
            if (decodedChar == null || decodedChar.isEmpty()) {
                decodedChar = handleUndecodableChar(bytes, i, consumed);
            }
@ -345,15 +408,14 @@ public class TextDecodingHelper {
            i += consumed;
        }
-        map.text = sb.toString();
+        map.setText(sb.toString());
-        map.charByteStart = starts.stream().mapToInt(Integer::intValue).toArray();
+        map.setCharByteStart(starts.stream().mapToInt(Integer::intValue).toArray());
-        map.charByteEnd = ends.stream().mapToInt(Integer::intValue).toArray();
+        map.setCharByteEnd(ends.stream().mapToInt(Integer::intValue).toArray());
        return map;
    }
-    private static String decodeType0Font(PDType0Font font, byte[] bytes, int position) {
+    private String decodeType0Font(PDType0Font font, byte[] bytes, int position) {
        try {
            // Try multi-byte decoding first (common for CJK fonts)
            if (position + 1 < bytes.length) {
                int b1 = bytes[position] & 0xFF;
                int b2 = bytes[position + 1] & 0xFF;
@ -372,7 +434,7 @@ public class TextDecodingHelper {
        }
    }
-    private static int getType0CharLength(PDType0Font font, byte[] bytes, int position) {
+    private int getType0CharLength(PDType0Font font, byte[] bytes, int position) {
        try {
            if (position + 1 < bytes.length) {
                int b1 = bytes[position] & 0xFF;
@ -389,7 +451,7 @@ public class TextDecodingHelper {
        }
    }
-    private static String decodeType1Font(PDType1Font font, byte[] bytes, int position) {
+    private String decodeType1Font(PDType1Font font, byte[] bytes, int position) {
        try {
            int code = bytes[position] & 0xFF;
            return font.toUnicode(code);
@ -398,11 +460,7 @@ public class TextDecodingHelper {
        }
    }
-    private static int getType1CharLength(PDType1Font font, byte[] bytes, int position) {
+    private String decodeType3Font(PDType3Font font, byte[] bytes, int position) {
        return 1; // Type1 fonts are typically single-byte
    }
    private static String decodeType3Font(PDType3Font font, byte[] bytes, int position) {
        try {
            int code = bytes[position] & 0xFF;
            return font.toUnicode(code);
@ -411,7 +469,7 @@ public class TextDecodingHelper {
        }
    }
-    private static String decodeTrueTypeFont(PDTrueTypeFont font, byte[] bytes, int position) {
+    private String decodeTrueTypeFont(PDTrueTypeFont font, byte[] bytes, int position) {
        try {
            int code = bytes[position] & 0xFF;
            String unicode = font.toUnicode(code);
@ -429,7 +487,7 @@ public class TextDecodingHelper {
        }
    }
-    private static int getTrueTypeCharLength(PDTrueTypeFont font, byte[] bytes, int position) {
+    private int getTrueTypeCharLength(PDTrueTypeFont font, byte[] bytes, int position) {
        try {
            // First try single byte
            int code = bytes[position] & 0xFF;
@ -454,7 +512,7 @@ public class TextDecodingHelper {
        }
    }
-    private static String decodeGenericFont(PDFont font, byte[] bytes, int position) {
+    private String decodeGenericFont(PDFont font, byte[] bytes, int position) {
        try {
            int code = bytes[position] & 0xFF;
            return font.toUnicode(code);
@ -463,13 +521,8 @@ public class TextDecodingHelper {
        }
    }
-    private static int getGenericCharLength(PDFont font, byte[] bytes, int position) {
+    private String handleUndecodableChar(byte[] bytes, int position, int length) {
        return 1; // Default to single byte for unknown font types
    }
    private static String handleUndecodableChar(byte[] bytes, int position, int length) {
        // Or try to interpret as ISO-8859-1 (Latin-1) as fallback
        try {
            byte[] charBytes = new byte[length];
            System.arraycopy(bytes, position, charBytes, 0, length);
@ -478,9 +531,7 @@ public class TextDecodingHelper {
                return fallback;
            }
        } catch (Exception e) {
            // Ignore and fall through to default
        }
-
+        return "<EFBFBD>";
        return "<EFBFBD>"; // Unicode replacement character instead of "?"
    }
 }
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextEncodingHelper.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextEncodingHelper.java
@ -1,11 +1,6 @@
 package stirling.software.SPDF.utils.text;
 import java.io.IOException;
 import org.apache.pdfbox.pdmodel.font.PDFont;
 import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
 import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
 import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
 import lombok.experimental.UtilityClass;
 import lombok.extern.slf4j.Slf4j;
@ -15,225 +10,360 @@ import lombok.extern.slf4j.Slf4j;
 public class TextEncodingHelper {
    public boolean canEncodeCharacters(PDFont font, String text) {
-        if (font == null || text == null || text.isEmpty()) {
+        if (font == null || text == null) {
            return false;
        }
-        try {
+        if (text.isEmpty()) {
            // Step 1: Primary check - full-string encoding (permissive for "good" cases)
            byte[] encoded = font.encode(text);
            if (encoded.length > 0) {
                log.debug(
                        "Text '{}' has good full-string encoding for font {} - permissively allowing",
                        text,
                        font.getName() != null ? font.getName() : "Unknown");
            return true;
        }
-            // Step 2: Smart array-based fallback for TJ operator-style text
+        try {
-            log.debug(
+            byte[] encoded = font.encode(text);
-                    "Full encoding failed for '{}' - using array-based fallback for font {}",
+            if (encoded.length > 0) {
-                    text,
+                return true;
-                    font.getName() != null ? font.getName() : "Unknown");
+            }
-
+        } catch (Exception e) {
            return validateAsCodePointArray(font, text);
        } catch (IOException | IllegalArgumentException e) {
            log.debug(
                    "Encoding exception for text '{}' with font {} - trying array fallback: {}",
                    text,
                    font.getName() != null ? font.getName() : "Unknown",
                    e.getMessage());
            if (isFontSubset(font.getName()) || hasCustomEncoding(font)) {
                return validateAsCodePointArray(font, text);
        }
-            return false; // Non-subset fonts with encoding exceptions are likely problematic
+        return validateAsCodePointArray(font, text);
        }
    }
    private boolean validateAsCodePointArray(PDFont font, String text) {
        if (text == null || text.isEmpty()) {
            return true;
        }
        int totalCodePoints = 0;
        int successfulCodePoints = 0;
        // Iterate through code points (handles surrogates correctly per Unicode docs)
        for (int i = 0; i < text.length(); ) {
            int codePoint = text.codePointAt(i);
            String charStr = new String(Character.toChars(codePoint));
            totalCodePoints++;
            try {
                // Test encoding for this code point
                byte[] charEncoded = font.encode(charStr);
                if (charEncoded.length > 0) {
                    try {
                        float charWidth = font.getStringWidth(charStr);
                        if (charWidth >= 0) {
                            successfulCodePoints++;
-                        log.debug(
+                        }
-                                "Code point '{}' (U+{}) encoded successfully",
+                    } catch (Exception e) {
-                                charStr,
+                        try {
-                                Integer.toHexString(codePoint).toUpperCase());
+                            if (canDecodeCharacter(font, charStr)) {
-                    } else {
+                                successfulCodePoints++;
-                        log.debug(
+                            }
-                                "Code point '{}' (U+{}) has invalid width: {}",
+                        } catch (Exception e2) {
-                                charStr,
+                        }
                                Integer.toHexString(codePoint).toUpperCase(),
                                charWidth);
                    }
                } else {
-                    log.debug(
+                    try {
-                            "Code point '{}' (U+{}) encoding failed - empty result",
+                        if (canDecodeCharacter(font, charStr)) {
-                            charStr,
+                            successfulCodePoints++;
-                            Integer.toHexString(codePoint).toUpperCase());
+                        }
                    } catch (Exception e) {
                    }
                }
            } catch (Exception e) {
                try {
                    if (canDecodeCharacter(font, charStr)) {
                        successfulCodePoints++;
                    }
                } catch (Exception e2) {
                    if (isBasicCharacter(codePoint)) {
                        successfulCodePoints++;
                    }
                }
            } catch (IOException | IllegalArgumentException e) {
                log.debug(
                        "Code point '{}' (U+{}) validation failed: {}",
                        charStr,
                        Integer.toHexString(codePoint).toUpperCase(),
                        e.getMessage());
            }
-            i += Character.charCount(codePoint); // Handle surrogates properly
+            i += Character.charCount(codePoint);
        }
-        double successRate =
+        if (totalCodePoints == 0) {
-                totalCodePoints > 0 ? (double) successfulCodePoints / totalCodePoints : 0;
+            return true;
        boolean isAcceptable = successRate >= 0.95;
        log.debug(
                "Array validation for '{}': {}/{} code points successful ({:.1f}%) - {}",
                text,
                successfulCodePoints,
                totalCodePoints,
                successRate * 100,
                isAcceptable ? "ALLOWING" : "rejecting");
        return isAcceptable;
        }
-    public boolean isTextSegmentRemovable(PDFont font, String text) {
+        double successRate = (double) successfulCodePoints / totalCodePoints;
-        if (font == null || text == null || text.isEmpty()) {
+        return successRate >= 0.1;
    }
    private boolean canDecodeCharacter(PDFont font, String charStr) {
        if (font == null || charStr == null || charStr.isEmpty()) {
            return false;
        }
-        // Log the attempt
+        try {
-        log.debug(
+            for (int code = 0; code <= 0xFFFF; code++) {
-                "Evaluating text segment for removal: '{}' with font {}",
+                try {
-                text,
+                    String decoded = font.toUnicode(code);
-                font.getName() != null ? font.getName() : "Unknown Font");
+                    if (decoded != null && decoded.equals(charStr)) {
                        return true;
                    }
                } catch (Exception e) {
                }
            }
        } catch (Exception e) {
        }
        return false;
    }
    private boolean isBasicCharacter(int codePoint) {
        return (codePoint >= 32 && codePoint <= 126)
                || (codePoint >= 160 && codePoint <= 255)
                || Character.isWhitespace(codePoint)
                || Character.isLetterOrDigit(codePoint);
    }
    public boolean isTextSegmentRemovable(PDFont font, String text) {
        if (font == null || text == null) {
            return false;
        }
        if (text.isEmpty()) {
            return true;
        }
        if (isSimpleCharacter(text)) {
            try {
                font.encode(text);
                font.getStringWidth(text);
                log.debug(
                        "Text '{}' is a simple character and passed validation - allowing removal",
                        text);
                return true;
            } catch (Exception e) {
-                log.debug(
+                try {
-                        "Simple character '{}' failed basic validation with font {}: {}",
+                    return canHandleText(font, text);
-                        text,
+                } catch (Exception e2) {
                        font.getName() != null ? font.getName() : "Unknown",
                        e.getMessage());
                    return false;
                }
            }
        }
        // For complex text, require comprehensive validation
        return isTextFullyRemovable(font, text);
    }
    private boolean canHandleText(PDFont font, String text) {
        if (font == null || text == null) {
            return false;
        }
        if (text.isEmpty()) {
            return true;
        }
        for (int i = 0; i < text.length(); ) {
            int codePoint = text.codePointAt(i);
            String charStr = new String(Character.toChars(codePoint));
            boolean canHandle = false;
            try {
                byte[] encoded = font.encode(charStr);
                if (encoded.length > 0) {
                    canHandle = true;
                }
            } catch (Exception e) {
            }
            if (!canHandle) {
                try {
                    if (canDecodeCharacter(font, charStr)) {
                        canHandle = true;
                    }
                } catch (Exception e) {
                }
            }
            if (!canHandle && isBasicCharacter(codePoint)) {
                canHandle = true;
            }
            if (!canHandle) {
                return false;
            }
            i += Character.charCount(codePoint);
        }
        return true;
    }
    public boolean isTextFullyRemovable(PDFont font, String text) {
-        if (font == null || text == null || text.isEmpty()) {
+        if (font == null || text == null) {
            return false;
        }
        if (text.isEmpty()) {
            return true;
        }
        try {
            if (!canEncodeCharacters(font, text)) {
                return false;
            }
            try {
            // Check 1: Verify encoding capability using new smart approach
            if (!canEncodeCharacters(font, text)) {
                log.debug(
                        "Text '{}' failed encoding validation for font {}",
                        text,
                        font.getName() != null ? font.getName() : "Unknown");
                return false;
            }
            // Check 2: Validate width calculation capability
                float width = font.getStringWidth(text);
-            if (width < 0) { // Allow zero width (invisible chars) but reject negative (invalid)
+                if (width < 0) {
                log.debug(
                        "Text '{}' has invalid width {} for font {}",
                        text,
                        width,
                        font.getName() != null ? font.getName() : "Unknown");
                return false; // Invalid metrics prevent accurate removal
            }
            // Check 3: Verify font descriptor completeness for redaction area calculation
            if (font.getFontDescriptor() == null) {
                log.debug(
                        "Missing font descriptor for font {}",
                        font.getName() != null ? font.getName() : "Unknown");
                    return false;
                }
            } catch (Exception e) {
                try {
                    if (!canCalculateTextWidth(font, text)) {
                        return false;
                    }
                } catch (Exception e2) {
                    return false;
                }
            }
            try {
                if (font.getFontDescriptor() == null) {
                    try {
                        return canHandleWithoutDescriptor(font, text);
                    } catch (Exception e) {
                        return false;
                    }
                }
            } catch (Exception e) {
                try {
                    return canHandleWithoutDescriptor(font, text);
                } catch (Exception e2) {
                    return false;
                }
            }
            // Check 4: Test bounding box calculation for redaction area
            try {
                font.getFontDescriptor().getFontBoundingBox();
-            } catch (IllegalArgumentException e) {
+            } catch (Exception e) {
-                log.debug(
+                try {
-                        "Font bounding box unavailable for font {}: {}",
+                    return canHandleWithoutBoundingBox(font, text);
-                        font.getName() != null ? font.getName() : "Unknown",
+                } catch (Exception e2) {
-                        e.getMessage());
+                    return false;
                }
            }
            return true;
        } catch (Exception e) {
            try {
                return canHandleText(font, text);
            } catch (Exception e2) {
                return false;
            }
        }
    }
    private boolean canCalculateTextWidth(PDFont font, String text) {
        if (font == null || text == null) {
            return false;
        }
-            log.debug(
+        if (text.isEmpty()) {
                    "Text '{}' passed comprehensive validation for font {}",
                    text,
                    font.getName() != null ? font.getName() : "Unknown");
            return true;
        }
-        } catch (IOException e) {
+        for (int i = 0; i < text.length(); ) {
-            log.debug(
+            int codePoint = text.codePointAt(i);
-                    "Text '{}' failed validation for font {} due to IO error: {}",
+            String charStr = new String(Character.toChars(codePoint));
-                    text,
+
-                    font.getName() != null ? font.getName() : "Unknown",
+            boolean hasWidth = false;
-                    e.getMessage());
+            try {
-            return false;
+                float charWidth = font.getStringWidth(charStr);
-        } catch (IllegalArgumentException e) {
+                if (charWidth >= 0) {
-            log.debug(
+                    hasWidth = true;
-                    "Text '{}' failed validation for font {} due to argument error: {}",
+                }
-                    text,
+            } catch (Exception e) {
-                    font.getName() != null ? font.getName() : "Unknown",
+                try {
-                    e.getMessage());
+                    float defaultWidth = getDefaultCharWidth(font);
                    if (defaultWidth > 0) {
                        hasWidth = true;
                    }
                } catch (Exception e2) {
                }
            }
            if (!hasWidth && isBasicCharacter(codePoint)) {
                hasWidth = true;
            }
            if (!hasWidth) {
                return false;
            }
            i += Character.charCount(codePoint);
        }
        return true;
    }
    private float getDefaultCharWidth(PDFont font) {
        String[] testChars = {" ", "a", "A", "0", ".", "e", "!", "i", "l", "I"};
        for (String testChar : testChars) {
            try {
                float width = font.getStringWidth(testChar);
                if (width > 0) {
                    return width;
                }
            } catch (Exception e) {
            }
        }
        return 500;
    }
    private boolean canHandleWithoutDescriptor(PDFont font, String text) {
        try {
            return canCalculateTextWidth(font, text);
        } catch (Exception e) {
            return canHandleText(font, text);
        }
    }
    private boolean canHandleWithoutBoundingBox(PDFont font, String text) {
        try {
            return canCalculateTextWidth(font, text);
        } catch (Exception e) {
            return canHandleText(font, text);
        }
    }
    private boolean isSimpleCharacter(String text) {
-        if (text == null || text.isEmpty()) {
+        if (text == null) {
            return false;
        }
-        if (text.length() > 20) {
+        if (text.isEmpty()) {
            return true;
        }
        if (text.length() > 50) {
            return false;
        }
        for (int i = 0; i < text.length(); i++) {
            char c = text.charAt(i);
            // Allow letters, digits, and whitespace (most common cases)
            if (Character.isLetterOrDigit(c) || Character.isWhitespace(c)) {
                continue;
            }
-            // Allow common ASCII punctuation
+            if (c >= 32 && c <= 126) {
-            if (c >= 32 && c <= 126 && ".,!?;:()-[]{}\"'/@#$%&*+=<>|\\~`".indexOf(c) >= 0) {
+                continue;
            }
            if (c >= 160 && c <= 255) {
                continue;
            }
            if (Character.getType(c) == Character.OTHER_PUNCTUATION
                    || Character.getType(c) == Character.DASH_PUNCTUATION
                    || Character.getType(c) == Character.START_PUNCTUATION
                    || Character.getType(c) == Character.END_PUNCTUATION
                    || Character.getType(c) == Character.CONNECTOR_PUNCTUATION
                    || Character.getType(c) == Character.OTHER_SYMBOL
                    || Character.getType(c) == Character.MATH_SYMBOL
                    || Character.getType(c) == Character.CURRENCY_SYMBOL) {
                continue;
            }
@ -243,111 +373,205 @@ public class TextEncodingHelper {
        return true;
    }
    public boolean hasCustomEncoding(PDFont font) {
        try {
            if (font instanceof PDSimpleFont simpleFont) {
                try {
                    Encoding encoding = simpleFont.getEncoding();
                    if (encoding != null) {
                        // Check for dictionary-based custom encodings
                        if (encoding instanceof DictionaryEncoding) {
                            log.debug("Font {} uses DictionaryEncoding (custom)", font.getName());
                            return true;
                        }
                        String encodingName = encoding.getClass().getSimpleName();
                        if (encodingName.contains("Custom")
                                || encodingName.contains("Dictionary")) {
                            log.debug(
                                    "Font {} uses custom encoding: {}",
                                    font.getName(),
                                    encodingName);
                            return true;
                        }
                    }
                } catch (Exception e) {
                    log.debug(
                            "Encoding detection failed for font {}: {}",
                            font.getName(),
                            e.getMessage());
                    return true; // Assume custom if detection fails
                }
            }
            if (font instanceof org.apache.pdfbox.pdmodel.font.PDType0Font) {
                log.debug(
                        "Font {} is Type0 (CID) - generally uses standard CMaps",
                        font.getName() != null ? font.getName() : "Unknown");
                return false;
            }
            log.debug(
                    "Font {} type {} - assuming standard encoding",
                    font.getName() != null ? font.getName() : "Unknown",
                    font.getClass().getSimpleName());
            return false;
        } catch (IllegalArgumentException e) {
            log.debug(
                    "Custom encoding detection failed for font {}: {}",
                    font.getName() != null ? font.getName() : "Unknown",
                    e.getMessage());
            return false; // Be forgiving on detection failure
        }
    }
    public boolean fontSupportsCharacter(PDFont font, String character) {
-        if (font == null || character == null || character.isEmpty()) {
+        if (font == null || character == null) {
            return false;
        }
        if (character.isEmpty()) {
            return true;
        }
        try {
            byte[] encoded = font.encode(character);
-            if (encoded.length == 0) {
+            if (encoded.length > 0) {
-                return false;
+                try {
            }
                    float width = font.getStringWidth(character);
-            return width > 0;
+                    if (width >= 0) {
                        return true;
                    }
                } catch (Exception e) {
                }
                return true;
            }
        } catch (Exception e) {
        }
-        } catch (IOException | IllegalArgumentException e) {
+        try {
-            log.debug(
+            if (canDecodeCharacter(font, character)) {
-                    "Character '{}' not supported by font {}: {}",
+                return true;
-                    character,
+            }
-                    font.getName() != null ? font.getName() : "Unknown",
+        } catch (Exception e) {
-                    e.getMessage());
+        }
        for (int i = 0; i < character.length(); ) {
            int codePoint = character.codePointAt(i);
            if (isBasicCharacter(codePoint)) {
                i += Character.charCount(codePoint);
                continue;
            }
            return false;
        }
        return true;
    }
    public boolean isFontSubset(String fontName) {
        if (fontName == null) {
            return false;
        }
-        return fontName.matches("^[A-Z]{6}\\+.*");
+
        if (fontName.matches("^[A-Z]{6}\\+.*")) {
            return true;
        }
        if (fontName.matches("^[A-Z]{5}\\+.*")) {
            return true;
        }
        if (fontName.matches("^[A-Z]{4}\\+.*")) {
            return true;
        }
        if (fontName.contains("+")) {
            String prefix = fontName.split("\\+")[0];
            if (prefix.matches("^[A-Z]+$") && prefix.length() >= 4) {
                return true;
            }
        }
    public boolean canCalculateBasicWidths(PDFont font) {
        try {
            float spaceWidth = font.getStringWidth(" ");
            if (spaceWidth <= 0) {
        return false;
    }
-            String[] testChars = {"a", "A", "0", ".", "e", "!"};
+    public boolean canCalculateBasicWidths(PDFont font) {
        if (font == null) {
            return false;
        }
        try {
            float spaceWidth = font.getStringWidth(" ");
            if (spaceWidth > 0) {
                return true;
            }
        } catch (Exception e) {
        }
        String[] testChars = {
            "a", "A", "0", ".", "e", "!", "i", "l", "I", "m", "M", "W", "w", "1", "|", "-", "_",
            "=", "+", "(", ")", "[", "]", "{", "}", "<", ">", "/", "\\", "?", ",", ";", ":", "\"",
            "'", "`", "~", "@", "#", "$", "%", "^", "&", "*"
        };
        int successCount = 0;
        for (String ch : testChars) {
            try {
                float width = font.getStringWidth(ch);
                if (width > 0) {
                    successCount++;
                    if (successCount >= 3) {
                        return true;
                    }
-                } catch (IOException | IllegalArgumentException e) {
+                }
            } catch (Exception e) {
            }
        }
-            return false; // Can't calculate width for any test characters
+        try {
-        } catch (IOException | IllegalArgumentException e) {
+            for (int code = 32; code <= 126; code++) {
-            return false; // Font failed basic width calculation
+                try {
                    String ch = String.valueOf((char) code);
                    float width = font.getStringWidth(ch);
                    if (width > 0) {
                        successCount++;
                        if (successCount >= 1) {
                            return true;
                        }
                    }
                } catch (Exception e) {
                }
            }
        } catch (Exception e) {
        }
        try {
            for (int code = 160; code <= 255; code++) {
                try {
                    String ch = String.valueOf((char) code);
                    float width = font.getStringWidth(ch);
                    if (width > 0) {
                        return true;
                    }
                } catch (Exception e) {
                }
            }
        } catch (Exception e) {
        }
        return false;
    }
    public boolean canEncodeAnyCharacter(PDFont font) {
        if (font == null) {
            return false;
        }
        String[] testStrings = {
            "a", "A", "0", " ", ".", "!", "e", "i", "o", "u", "n", "t", "r", "s", "l", "1", "2",
            "3", "4", "5", "6", "7", "8", "9", ",", ".", ";", ":", "?", "!", "(", ")", "[", "]",
            "{", "}", "hello", "test", "sample", "abc", "123", "ABC"
        };
        for (String testStr : testStrings) {
            try {
                byte[] encoded = font.encode(testStr);
                if (encoded.length > 0) {
                    return true;
                }
            } catch (Exception e) {
            }
        }
        for (int code = 0; code <= 0xFFFF; code += 100) {
            try {
                String testStr = String.valueOf((char) code);
                byte[] encoded = font.encode(testStr);
                if (encoded.length > 0) {
                    return true;
                }
            } catch (Exception e) {
            }
        }
        return false;
    }
    public boolean isValidFont(PDFont font) {
        if (font == null) {
            return false;
        }
        try {
            String name = font.getName();
            if (name != null && !name.trim().isEmpty()) {
                return true;
            }
        } catch (Exception e) {
        }
        try {
            if (canCalculateBasicWidths(font)) {
                return true;
            }
        } catch (Exception e) {
        }
        try {
            if (canEncodeAnyCharacter(font)) {
                return true;
            }
        } catch (Exception e) {
        }
        return false;
    }
 }
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextFinderUtils.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextFinderUtils.java
@ -5,10 +5,6 @@ import java.util.List;
 import java.util.Set;
 import java.util.regex.Pattern;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.PDResources;
 import org.apache.pdfbox.pdmodel.font.PDFont;
 import lombok.experimental.UtilityClass;
 import lombok.extern.slf4j.Slf4j;
@ -16,128 +12,116 @@ import lombok.extern.slf4j.Slf4j;
@UtilityClass
 public class TextFinderUtils {
    public boolean validateFontReliability(PDFont font) {
        if (font == null) {
            return false;
        }
        if (font.isDamaged()) {
            log.debug(
                    "Font {} is marked as damaged - using TextEncodingHelper validation",
                    font.getName());
        }
        if (TextEncodingHelper.canCalculateBasicWidths(font)) {
            log.debug(
                    "Font {} passed basic width calculations - considering reliable",
                    font.getName());
            return true;
        }
        String[] basicTests = {"1", "2", "3", "a", "A", "e", "E", " "};
        int workingChars = 0;
        for (String testChar : basicTests) {
            if (TextEncodingHelper.canEncodeCharacters(font, testChar)) {
                workingChars++;
            }
        }
        if (workingChars > 0) {
            log.debug(
                    "Font {} can process {}/{} basic characters - considering reliable",
                    font.getName(),
                    workingChars,
                    basicTests.length);
            return true;
        }
        log.debug("Font {} failed all basic tests - considering unreliable", font.getName());
        return false;
    }
    public List<Pattern> createOptimizedSearchPatterns(
            Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
        List<Pattern> patterns = new ArrayList<>();
        if (searchTerms == null) {
            return patterns;
        }
        for (String term : searchTerms) {
-            if (term == null || term.trim().isEmpty()) {
+            if (term == null) {
                continue;
            }
            String trimmedTerm = term.trim();
            if (trimmedTerm.isEmpty()) {
                continue;
            }
            try {
-                String patternString = useRegex ? term.trim() : Pattern.quote(term.trim());
+                String patternString;
-
+                if (useRegex) {
-                if (wholeWordSearch) {
+                    patternString = trimmedTerm;
-                    patternString = applyWordBoundaries(term.trim(), patternString);
+                    try {
                        Pattern.compile(patternString);
                    } catch (Exception e) {
                        patternString = Pattern.quote(trimmedTerm);
                    }
                } else {
                    patternString = Pattern.quote(trimmedTerm);
                }
-                Pattern pattern =
+                if (wholeWordSearch) {
-                        Pattern.compile(
+                    patternString = applyWordBoundaries(trimmedTerm, patternString, useRegex);
-                                patternString, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
+                }
                int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL;
                try {
                    flags |= Pattern.CANON_EQ;
                } catch (Exception e) {
                }
                Pattern pattern = Pattern.compile(patternString, flags);
                patterns.add(pattern);
                log.debug("Created search pattern: '{}' -> '{}'", term.trim(), patternString);
            } catch (Exception e) {
-                log.warn("Failed to create pattern for term '{}': {}", term, e.getMessage());
+                try {
                    String quotedTerm = Pattern.quote(trimmedTerm);
                    if (wholeWordSearch) {
                        quotedTerm = applyWordBoundaries(trimmedTerm, quotedTerm, false);
                    }
                    Pattern fallbackPattern =
                            Pattern.compile(
                                    quotedTerm, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
                    patterns.add(fallbackPattern);
                } catch (Exception e2) {
                    try {
                        Pattern simplestPattern = Pattern.compile(Pattern.quote(trimmedTerm));
                        patterns.add(simplestPattern);
                    } catch (Exception e3) {
                    }
                }
            }
        }
        return patterns;
    }
-    private String applyWordBoundaries(String originalTerm, String patternString) {
+    private String applyWordBoundaries(String originalTerm, String patternString, boolean isRegex) {
-        if (originalTerm.length() == 1 && Character.isDigit(originalTerm.charAt(0))) {
+        if (originalTerm == null || originalTerm.isEmpty()) {
-            return "(?<![\\w])" + patternString + "(?![\\w])";
+            return patternString;
-        } else if (originalTerm.length() == 1) {
+        }
-            return "(?<![\\w])" + patternString + "(?![\\w])";
+
        try {
            if (originalTerm.length() == 1) {
                char c = originalTerm.charAt(0);
                if (Character.isDigit(c)) {
                    return "(?<![\\p{L}\\p{N}])" + patternString + "(?![\\p{L}\\p{N}])";
                } else if (Character.isLetter(c)) {
                    return "(?<![\\p{L}\\p{N}])" + patternString + "(?![\\p{L}\\p{N}])";
                } else {
                    return "(?<!\\S)" + patternString + "(?!\\S)";
                }
            }
            boolean startsWithWordChar = Character.isLetterOrDigit(originalTerm.charAt(0));
            boolean endsWithWordChar =
                    Character.isLetterOrDigit(originalTerm.charAt(originalTerm.length() - 1));
            String result = patternString;
            if (startsWithWordChar) {
                result = "(?<![\\p{L}\\p{N}])" + result;
            } else {
                result = "(?<!\\S)" + result;
            }
            if (endsWithWordChar) {
                result = result + "(?![\\p{L}\\p{N}])";
            } else {
                result = result + "(?!\\S)";
            }
            return result;
        } catch (Exception e) {
            try {
                return "\\b" + patternString + "\\b";
            } catch (Exception e2) {
                return patternString;
            }
        }
    public boolean hasProblematicFonts(PDPage page) {
        if (page == null) {
            return false;
        }
        try {
            PDResources resources = page.getResources();
            if (resources == null) {
                return false;
            }
            int totalFonts = 0;
            int completelyUnusableFonts = 0;
            for (org.apache.pdfbox.cos.COSName fontName : resources.getFontNames()) {
                try {
                    org.apache.pdfbox.pdmodel.font.PDFont font = resources.getFont(fontName);
                    if (font != null) {
                        totalFonts++;
                        if (!validateFontReliability(font)) {
                            completelyUnusableFonts++;
                        }
                    }
                } catch (Exception e) {
                    log.debug("Font loading failed for {}: {}", fontName.getName(), e.getMessage());
                    totalFonts++;
                }
            }
            boolean hasProblems = totalFonts > 0 && (completelyUnusableFonts * 2 > totalFonts);
            log.debug(
                    "Page font analysis: {}/{} fonts are completely unusable - page {} problematic",
                    completelyUnusableFonts,
                    totalFonts,
                    hasProblems ? "IS" : "is NOT");
            return hasProblems;
        } catch (Exception e) {
            log.warn("Font analysis failed for page: {}", e.getMessage());
            return false; // Be permissive if analysis fails
        }
    }
 }
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/WidthCalculator.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/WidthCalculator.java
@ -1,88 +1,69 @@
 package stirling.software.SPDF.utils.text;
 import java.nio.charset.StandardCharsets;
 import java.text.Normalizer;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.font.PDFont;
 import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
 import org.apache.pdfbox.pdmodel.font.PDType0Font;
 import lombok.experimental.UtilityClass;
 import lombok.extern.slf4j.Slf4j;
@Slf4j
@UtilityClass
 public class WidthCalculator {
    private final int FONT_SCALE_FACTOR = 1000;
    private final float CONSERVATIVE_CHAR_WIDTH_RATIO = 0.55f;
    private final float BBOX_CHAR_WIDTH_RATIO = 0.65f;
    private final Map<String, Float> widthCache = new ConcurrentHashMap<>();
    private final Map<String, Boolean> reliabilityCache = new ConcurrentHashMap<>();
    private String createCacheKey(PDFont font, String text, float fontSize) {
        return String.format("%s|%s|%.2f", font.getName(), text, fontSize);
    }
    private String createReliabilityCacheKey(PDFont font) {
        return font.getName();
    }
    public float calculateAccurateWidth(PDFont font, String text, float fontSize) {
-        return calculateAccurateWidth(font, text, fontSize, true);
+        if (font == null || text == null || fontSize <= 0) {
            return 0;
        }
-    public float calculateAccurateWidth(
+        if (text.isEmpty()) {
-            PDFont font, String text, float fontSize, boolean useCache) {
+            return 0;
        if (font == null || text == null || text.isEmpty() || fontSize <= 0) return 0;
        if (useCache) {
            String cacheKey = createCacheKey(font, text, fontSize);
            Float cachedWidth = widthCache.get(cacheKey);
            if (cachedWidth != null) return cachedWidth;
        }
        String normalizedText = normalizeText(text);
        Float directWidth = calculateDirectWidth(font, normalizedText, fontSize);
        if (directWidth != null) {
            if (useCache) widthCache.put(createCacheKey(font, text, fontSize), directWidth);
            return directWidth;
        }
        Float charByCharWidth = calculateCharacterByCharacterWidth(font, normalizedText, fontSize);
        if (charByCharWidth != null) {
            if (useCache) widthCache.put(createCacheKey(font, text, fontSize), charByCharWidth);
            return charByCharWidth;
        }
        Float glyphWidth = calculateGlyphBasedWidth(font, normalizedText, fontSize);
        if (glyphWidth != null) {
            if (useCache) widthCache.put(createCacheKey(font, text, fontSize), glyphWidth);
            return glyphWidth;
        }
-        float fallbackWidth = calculateComprehensiveFallbackWidth(font, normalizedText, fontSize);
+        return calculateComprehensiveFallbackWidth(font, normalizedText, fontSize);
        if (useCache) widthCache.put(createCacheKey(font, text, fontSize), fallbackWidth);
        return fallbackWidth;
    }
    private String normalizeText(String text) {
        if (text == null) return "";
        try {
            return Normalizer.normalize(text, Normalizer.Form.NFC);
        } catch (Exception e) {
            return text;
        }
    }
    private Float calculateDirectWidth(PDFont font, String text, float fontSize) {
        if (!TextEncodingHelper.canEncodeCharacters(font, text)) return null;
        try {
-            float rawWidth = font.getStringWidth(text);
+            if (!TextEncodingHelper.canEncodeCharacters(font, text)) {
-            float scaledWidth = (rawWidth / FONT_SCALE_FACTOR) * fontSize;
+                return null;
-            return rawWidth >= 0 && scaledWidth >= 0 ? scaledWidth : null;
+            }
            float rawWidth = font.getStringWidth(text) / 1000f;
            if (rawWidth < 0) return null;
            float scaledWidth = rawWidth * fontSize;
            return scaledWidth >= 0 ? scaledWidth : null;
        } catch (Exception e) {
            return null;
        }
@ -96,7 +77,12 @@ public class WidthCalculator {
            for (int codePoint : codePoints) {
                String character = new String(Character.toChars(codePoint));
-                Float charWidth = calculateSingleCharacterWidth(font, character, fontSize);
+                Float charWidth =
                        calculateSingleCharacterWidth(font, character, fontSize, codePoint);
                if (charWidth == null) {
                    return null;
                }
                totalWidth += charWidth;
                if (previousCodePoint != -1) {
@ -104,7 +90,7 @@ public class WidthCalculator {
                }
                previousCodePoint = codePoint;
            }
-            return totalWidth;
+            return totalWidth >= 0 ? totalWidth : null;
        } catch (Exception e) {
            return null;
        }
@ -112,80 +98,99 @@ public class WidthCalculator {
    private List<Integer> getCodePoints(String text) {
        List<Integer> codePoints = new ArrayList<>();
        if (text == null) return codePoints;
        for (int i = 0; i < text.length(); ) {
            try {
                int codePoint = text.codePointAt(i);
                codePoints.add(codePoint);
                i += Character.charCount(codePoint);
            } catch (Exception e) {
                i++;
            }
        }
        return codePoints;
    }
-    private Float calculateSingleCharacterWidth(PDFont font, String character, float fontSize) {
+    private Float calculateSingleCharacterWidth(
            PDFont font, String character, float fontSize, int codePoint) {
        try {
-            byte[] encoded = null;
+            if (TextEncodingHelper.fontSupportsCharacter(font, character)) {
                try {
                    float raw = font.getStringWidth(character) / 1000f;
                    if (raw >= 0) return raw * fontSize;
                } catch (Exception ignored) {
                }
            }
        } catch (Exception e) {
        }
        try {
-                encoded = font.encode(character);
+            float w = font.getWidth(codePoint) / 1000f;
-                if (encoded.length == 0) encoded = null;
+            if (w >= 0) return w * fontSize;
-            } catch (Exception e) {
+        } catch (Exception ignored) {
                log.debug("Direct encoding failed for '{}': {}", character, e.getMessage());
        }
            if (encoded == null && font instanceof PDType0Font) {
        try {
-                    encoded = character.getBytes(StandardCharsets.UTF_8);
+            if (codePoint >= 0 && codePoint <= 0xFFFF) {
-                } catch (Exception e) {
+                float w = font.getWidth(codePoint) / 1000f;
-                    log.debug("UTF-8 encoding failed for '{}': {}", character, e.getMessage());
+                if (w >= 0) return w * fontSize;
            }
        } catch (Exception ignored) {
        }
-            if (encoded != null && encoded.length > 0) {
+        try {
-                Float width = calculateGlyphWidth(font, encoded, fontSize);
+            byte[] encoded = font.encode(character);
-                if (width != null && width >= 0) return width;
+            if (encoded.length > 0) {
            }
            return calculateAverageCharacterWidth(font, fontSize);
        } catch (Exception e) {
            log.debug(
                    "Single character width calculation failed for '{}': {}",
                    character,
                    e.getMessage());
            return calculateAverageCharacterWidth(font, fontSize);
        }
    }
    private Float calculateGlyphWidth(PDFont font, byte[] encoded, float fontSize) {
                for (byte b : encoded) {
                    try {
                        int glyphCode = b & 0xFF;
-                float glyphWidth = font.getWidth(glyphCode);
+                        float w = font.getWidth(glyphCode) / 1000f;
-
+                        if (w >= 0) return w * fontSize;
-                if (glyphWidth > 0) {
+                    } catch (Exception ignored) {
-                    return (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
+                    }
                }
            }
        } catch (Exception ignored) {
        }
-                // Try alternative width methods
+        return calculateCategoryBasedWidth(font, codePoint, fontSize);
                try {
                    glyphWidth = font.getWidthFromFont(glyphCode);
                    if (glyphWidth > 0) {
                        return (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
                    }
                } catch (Exception e) {
                    log.debug(
                            "getWidthFromFont failed for glyph {}: {}", glyphCode, e.getMessage());
                }
            } catch (Exception e) {
                log.debug("Glyph width calculation failed for byte {}: {}", b, e.getMessage());
            }
        }
        return null;
    }
    private float calculateKerning(
            PDFont font, int leftCodePoint, int rightCodePoint, float fontSize) {
-        return 0;
+        try {
            if (font instanceof PDSimpleFont) {
                PDSimpleFont simpleFont = (PDSimpleFont) font;
                try {
                    java.lang.reflect.Method getKerningMethod =
                            simpleFont.getClass().getMethod("getKerning", int.class, int.class);
                    float kerningValue =
                            (Float)
                                    getKerningMethod.invoke(
                                            simpleFont, leftCodePoint, rightCodePoint);
                    return (kerningValue / 1000f) * fontSize;
                } catch (Exception e) {
                }
            }
        } catch (Exception e) {
        }
        try {
            String leftChar = new String(Character.toChars(leftCodePoint));
            String rightChar = new String(Character.toChars(rightCodePoint));
            String combined = leftChar + rightChar;
            float combinedWidth = font.getStringWidth(combined) / 1000f;
            float leftWidth = font.getStringWidth(leftChar) / 1000f;
            float rightWidth = font.getStringWidth(rightChar) / 1000f;
            float kerning = combinedWidth - leftWidth - rightWidth;
            return kerning * fontSize;
        } catch (Exception e) {
        }
        return 0f;
    }
    private Float calculateGlyphBasedWidth(PDFont font, String text, float fontSize) {
@ -196,7 +201,6 @@ public class WidthCalculator {
                int codePoint = text.codePointAt(i);
                String character = new String(Character.toChars(codePoint));
                // Try to get glyph information more comprehensively
                Float charWidth =
                        calculateGlyphWidthComprehensively(font, character, codePoint, fontSize);
                if (charWidth == null) {
@ -207,19 +211,15 @@ public class WidthCalculator {
                i += Character.charCount(codePoint);
            }
-            log.debug("Glyph-based width calculation: {}", totalWidth);
+            return totalWidth >= 0 ? totalWidth : null;
            return totalWidth;
        } catch (Exception e) {
            log.debug("Glyph-based calculation failed: {}", e.getMessage());
            return null;
        }
    }
    private Float calculateGlyphWidthComprehensively(
            PDFont font, String character, int codePoint, float fontSize) {
        try {
            // Method 1: Try standard encoding
        try {
            byte[] encoded = font.encode(character);
            if (encoded.length > 0) {
@ -229,60 +229,110 @@ public class WidthCalculator {
                }
            }
        } catch (Exception e) {
                log.debug(
                        "Standard encoding failed for U+{}: {}",
                        Integer.toHexString(codePoint),
                        e.getMessage());
        }
            // Method 2: Try Unicode code point directly
        try {
-                float glyphWidth = font.getWidth(codePoint);
+            float glyphWidth = font.getWidth(codePoint) / 1000f;
-                if (glyphWidth > 0) {
+            if (glyphWidth >= 0) {
-                    return (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
+                return glyphWidth * fontSize;
            }
        } catch (Exception e) {
        }
        try {
            if (codePoint <= 0xFFFF) {
                float glyphWidth = font.getWidth(codePoint) / 1000f;
                if (glyphWidth >= 0) {
                    return glyphWidth * fontSize;
                }
            }
        } catch (Exception e) {
        }
        try {
            for (int code = 0; code <= 0xFF; code++) {
                try {
                    String decoded = font.toUnicode(code);
                    if (decoded != null && decoded.equals(character)) {
                        float glyphWidth = font.getWidth(code) / 1000f;
                        if (glyphWidth >= 0) {
                            return glyphWidth * fontSize;
                        }
                    }
                } catch (Exception e) {
                }
            }
        } catch (Exception e) {
                log.debug(
                        "Unicode code point width failed for U+{}: {}",
                        Integer.toHexString(codePoint),
                        e.getMessage());
        }
            // Method 3: Character category based estimation
        return calculateCategoryBasedWidth(font, codePoint, fontSize);
        } catch (Exception e) {
            log.debug("Comprehensive glyph width calculation failed: {}", e.getMessage());
            return calculateAverageCharacterWidth(font, fontSize);
        }
    }
    private Float calculateWidthFromEncodedBytes(PDFont font, byte[] encoded, float fontSize) {
-        // Try each byte as a potential glyph code
+        if (encoded == null || encoded.length == 0) return null;
-        for (byte b : encoded) {
+
        if (font instanceof PDType0Font && encoded.length >= 2) {
            try {
-                int glyphCode = b & 0xFF;
+                int glyphCode = ((encoded[0] & 0xFF) << 8) | (encoded[1] & 0xFF);
-                float width = font.getWidth(glyphCode);
+                float width = font.getWidth(glyphCode) / 1000f;
-                if (width > 0) {
+                if (width >= 0) {
-                    return (width / FONT_SCALE_FACTOR) * fontSize;
+                    return width * fontSize;
                }
            } catch (Exception e) {
            }
            try {
                for (int i = 0; i <= encoded.length - 2; i++) {
                    int glyphCode = ((encoded[i] & 0xFF) << 8) | (encoded[i + 1] & 0xFF);
                    float width = font.getWidth(glyphCode) / 1000f;
                    if (width >= 0) {
                        return width * fontSize;
                    }
                }
            } catch (Exception e) {
                // Continue trying other bytes
            }
        }
-        if (encoded.length >= 2 && font instanceof PDType0Font) {
+        for (byte b : encoded) {
            try {
-                int glyphCode = ((encoded[0] & 0xFF) << 8) | (encoded[1] & 0xFF);
+                int glyphCode = b & 0xFF;
-                float width = font.getWidth(glyphCode);
+                float width = font.getWidth(glyphCode) / 1000f;
-                if (width > 0) {
+                if (width >= 0) {
-                    return (width / FONT_SCALE_FACTOR) * fontSize;
+                    return width * fontSize;
                }
            } catch (Exception e) {
                log.debug("Multi-byte glyph code interpretation failed: {}", e.getMessage());
            }
        }
        try {
            if (encoded.length >= 3) {
                int glyphCode =
                        ((encoded[0] & 0xFF) << 16)
                                | ((encoded[1] & 0xFF) << 8)
                                | (encoded[2] & 0xFF);
                float width = font.getWidth(glyphCode) / 1000f;
                if (width >= 0) {
                    return width * fontSize;
                }
            }
        } catch (Exception e) {
        }
        try {
            if (encoded.length >= 4) {
                int glyphCode =
                        ((encoded[0] & 0xFF) << 24)
                                | ((encoded[1] & 0xFF) << 16)
                                | ((encoded[2] & 0xFF) << 8)
                                | (encoded[3] & 0xFF);
                float width = font.getWidth(glyphCode) / 1000f;
                if (width >= 0) {
                    return width * fontSize;
                }
            }
        } catch (Exception e) {
        }
        return null;
    }
@ -291,198 +341,237 @@ public class WidthCalculator {
            int category = Character.getType(codePoint);
            float baseWidth = calculateAverageCharacterWidth(font, fontSize);
            // Adjust width based on character category
            float multiplier =
                    switch (category) {
                        case Character.UPPERCASE_LETTER -> 1.2f;
                        case Character.LOWERCASE_LETTER -> 1.0f;
-                        case Character.DECIMAL_DIGIT_NUMBER -> 1.0f;
+                        case Character.TITLECASE_LETTER -> 1.15f;
                        case Character.SPACE_SEPARATOR -> 0.5f;
                        case Character.DASH_PUNCTUATION -> 0.8f;
                        case Character.OTHER_PUNCTUATION -> 0.6f;
                        case Character.CURRENCY_SYMBOL -> 1.1f;
                        case Character.MATH_SYMBOL -> 1.0f;
                        case Character.MODIFIER_LETTER -> 0.7f;
-                        case Character.NON_SPACING_MARK -> 0.0f; // Combining characters
+                        case Character.OTHER_LETTER -> 1.0f;
                        case Character.DECIMAL_DIGIT_NUMBER -> 1.0f;
                        case Character.LETTER_NUMBER -> 1.0f;
                        case Character.OTHER_NUMBER -> 1.0f;
                        case Character.SPACE_SEPARATOR -> 0.5f;
                        case Character.LINE_SEPARATOR -> 0.0f;
                        case Character.PARAGRAPH_SEPARATOR -> 0.0f;
                        case Character.NON_SPACING_MARK -> 0.0f;
                        case Character.ENCLOSING_MARK -> 0.0f;
                        case Character.COMBINING_SPACING_MARK -> 0.3f;
                        case Character.DASH_PUNCTUATION -> 0.8f;
                        case Character.START_PUNCTUATION -> 0.6f;
                        case Character.END_PUNCTUATION -> 0.6f;
                        case Character.CONNECTOR_PUNCTUATION -> 0.6f;
                        case Character.OTHER_PUNCTUATION -> 0.6f;
                        case Character.MATH_SYMBOL -> 1.0f;
                        case Character.CURRENCY_SYMBOL -> 1.1f;
                        case Character.MODIFIER_SYMBOL -> 0.8f;
                        case Character.OTHER_SYMBOL -> 1.0f;
                        case Character.INITIAL_QUOTE_PUNCTUATION -> 0.6f;
                        case Character.FINAL_QUOTE_PUNCTUATION -> 0.6f;
                        case Character.CONTROL -> 0.0f;
                        case Character.FORMAT -> 0.0f;
                        case Character.PRIVATE_USE -> 1.0f;
                        case Character.SURROGATE -> 0.0f;
                        case Character.UNASSIGNED -> 1.0f;
                        default -> 1.0f;
                    };
-            return baseWidth * multiplier;
+            float result = baseWidth * multiplier;
            return result >= 0 ? result : baseWidth;
        } catch (Exception e) {
            log.debug("Category-based width calculation failed: {}", e.getMessage());
            return calculateAverageCharacterWidth(font, fontSize);
        }
    }
    private float calculateAverageCharacterWidth(PDFont font, float fontSize) {
        try {
-            float avgWidth = font.getAverageFontWidth();
+            float avgWidth = font.getAverageFontWidth() / 1000f;
-            return (avgWidth / FONT_SCALE_FACTOR) * fontSize;
+            if (avgWidth > 0) {
-        } catch (Exception e) {
+                return avgWidth * fontSize;
            log.debug("Average character width calculation failed: {}", e.getMessage());
            return CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
            }
        } catch (Exception e) {
        }
        try {
            String[] testChars = {
                "a", "A", "e", "E", "i", "I", "o", "O", "n", "N", "t", "T", "r", "R", "s", "S", "0",
                "1", "2", "3", "4", "5"
            };
            float totalWidth = 0;
            int successCount = 0;
            for (String testChar : testChars) {
                try {
                    float width = font.getStringWidth(testChar) / 1000f;
                    if (width > 0) {
                        totalWidth += width;
                        successCount++;
                    }
                } catch (Exception e) {
                }
            }
            if (successCount > 0) {
                return (totalWidth / successCount) * fontSize;
            }
        } catch (Exception e) {
        }
        try {
            for (int code = 32; code <= 126; code++) {
                try {
                    float width = font.getWidth(code) / 1000f;
                    if (width > 0) {
                        return width * fontSize;
                    }
                } catch (Exception e) {
                }
            }
        } catch (Exception e) {
        }
        try {
            if (font.getFontDescriptor() != null) {
                PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox();
                if (bbox != null) {
                    float avgCharWidth = bbox.getWidth() / 2000f;
                    return avgCharWidth * fontSize;
                }
            }
        } catch (Exception e) {
        }
        return CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
    }
    private float calculateComprehensiveFallbackWidth(PDFont font, String text, float fontSize) {
        if (text == null || text.isEmpty()) {
            return 0;
        }
        try {
-            // Strategy 1: Use font bounding box with character analysis
+            float charWidth = calculateAverageCharacterWidth(font, fontSize);
            if (font.getFontDescriptor() != null
                    && font.getFontDescriptor().getFontBoundingBox() != null) {
                PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox();
                float avgCharWidth = bbox.getWidth() / FONT_SCALE_FACTOR;
                // Analyze text composition for better estimation
                float adjustedWidth = analyzeTextComposition(text, avgCharWidth, fontSize);
                log.debug("Bounding box based fallback width: {}", adjustedWidth);
                return adjustedWidth;
            }
            // Strategy 2: Enhanced average width calculation
            float enhancedAverage = calculateEnhancedAverageWidth(font, text, fontSize);
            log.debug("Enhanced average fallback width: {}", enhancedAverage);
            return enhancedAverage;
        } catch (Exception e) {
            float conservativeWidth = text.length() * CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
            log.debug("Conservative fallback width: {}", conservativeWidth);
            return conservativeWidth;
        }
    }
    private float analyzeTextComposition(String text, float avgCharWidth, float fontSize) {
            float totalWidth = 0;
        int spaceCount = 0;
        int upperCount = 0;
        int lowerCount = 0;
        int digitCount = 0;
        int punctCount = 0;
            for (int i = 0; i < text.length(); ) {
                int codePoint = text.codePointAt(i);
-            int category = Character.getType(codePoint);
+                Float specificWidth = calculateCategoryBasedWidth(font, codePoint, fontSize);
-
+                if (specificWidth != null) {
-            switch (category) {
+                    totalWidth += specificWidth;
-                case Character.SPACE_SEPARATOR -> {
+                } else {
-                    spaceCount++;
+                    totalWidth += charWidth;
                    totalWidth += avgCharWidth * 0.5f * fontSize;
                }
                case Character.UPPERCASE_LETTER -> {
                    upperCount++;
                    totalWidth += avgCharWidth * 1.2f * fontSize;
                }
                case Character.LOWERCASE_LETTER -> {
                    lowerCount++;
                    totalWidth += avgCharWidth * 1.0f * fontSize;
                }
                case Character.DECIMAL_DIGIT_NUMBER -> {
                    digitCount++;
                    totalWidth += avgCharWidth * 1.0f * fontSize;
                }
                case Character.OTHER_PUNCTUATION, Character.DASH_PUNCTUATION -> {
                    punctCount++;
                    totalWidth += avgCharWidth * 0.7f * fontSize;
                }
                default -> totalWidth += avgCharWidth * BBOX_CHAR_WIDTH_RATIO * fontSize;
            }
                i += Character.charCount(codePoint);
            }
        log.debug(
                "Text composition analysis - Spaces: {}, Upper: {}, Lower: {}, Digits: {}, Punct: {}",
                spaceCount,
                upperCount,
                lowerCount,
                digitCount,
                punctCount);
            return totalWidth;
    }
    private float calculateEnhancedAverageWidth(PDFont font, String text, float fontSize) {
        try {
            float baseAverage = font.getAverageFontWidth();
            float capHeight = 0;
            float xHeight = 0;
            if (font.getFontDescriptor() != null) {
                capHeight = font.getFontDescriptor().getCapHeight();
                xHeight = font.getFontDescriptor().getXHeight();
            }
            float adjustmentFactor = 1.0f;
            if (capHeight > 0 && xHeight > 0) {
                adjustmentFactor = Math.max(0.8f, Math.min(1.2f, xHeight / capHeight));
            }
            float adjustedAverage = (baseAverage * adjustmentFactor / FONT_SCALE_FACTOR) * fontSize;
            return text.length() * adjustedAverage;
        } catch (Exception e) {
            log.debug("Enhanced average width calculation failed: {}", e.getMessage());
            return text.length() * CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
        }
        try {
            if (font.getFontDescriptor() != null
                    && font.getFontDescriptor().getFontBoundingBox() != null) {
                PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox();
                float avgCharWidth = bbox.getWidth() / 1000f;
                return text.length() * avgCharWidth * BBOX_CHAR_WIDTH_RATIO * fontSize;
            }
        } catch (Exception e) {
        }
        return text.length() * calculateAverageCharacterWidth(font, fontSize);
    }
    public boolean isWidthCalculationReliable(PDFont font) {
-        if (font == null) {
+        if (font == null) return false;
            return false;
        }
        String cacheKey = createReliabilityCacheKey(font);
        Boolean cachedResult = reliabilityCache.get(cacheKey);
        if (cachedResult != null) {
            log.debug(
                    "Using cached reliability result for font {}: {}",
                    font.getName(),
                    cachedResult);
            return cachedResult;
        }
        boolean result = performReliabilityCheck(font);
        reliabilityCache.put(cacheKey, result);
        return result;
    }
    private boolean performReliabilityCheck(PDFont font) {
        try {
-            if (font.isDamaged()) {
+            if (font.isDamaged()) return false;
-                log.debug("Font {} is damaged", font.getName());
+        } catch (Exception e) {
                return false;
        }
-            if (!TextEncodingHelper.canCalculateBasicWidths(font)) {
+        try {
-                log.debug("Font {} cannot perform basic width calculations", font.getName());
+            if (!TextEncodingHelper.canCalculateBasicWidths(font)) return false;
-                return false;
+        } catch (Exception e) {
        }
        try {
            font.getStringWidth("A");
            return true;
        } catch (Exception e) {
                log.debug("Font {} failed basic width test: {}", font.getName(), e.getMessage());
        }
            // Check if we can at least get average width
        try {
-                float avgWidth = font.getAverageFontWidth();
+            font.getAverageFontWidth();
-                return avgWidth > 0;
+            return true;
        } catch (Exception e) {
        }
        try {
            float width = font.getWidth(65);
            return width >= 0;
        } catch (Exception e) {
                log.debug(
                        "Font {} cannot provide average width: {}", font.getName(), e.getMessage());
        }
        return false;
    }
    public float calculateMinimumTextWidth(PDFont font, String text, float fontSize) {
        if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
            return 0;
        }
        try {
            float minWidth = calculateAccurateWidth(font, text, fontSize);
            if (minWidth > 0) {
                return minWidth * 0.8f;
            }
        } catch (Exception e) {
-            log.debug("Reliability check failed for font {}: {}", font.getName(), e.getMessage());
+        }
        return text.length() * fontSize * 0.3f;
    }
    public float calculateMaximumTextWidth(PDFont font, String text, float fontSize) {
        if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
            return 0;
        }
        try {
            float maxWidth = calculateAccurateWidth(font, text, fontSize);
            if (maxWidth > 0) {
                return maxWidth * 1.2f;
            }
        } catch (Exception e) {
        }
        return text.length() * fontSize * 1.0f;
    }
    public boolean canCalculateWidthForText(PDFont font, String text) {
        if (font == null || text == null) {
            return false;
        }
        if (text.isEmpty()) {
            return true;
        }
        try {
            Float width = calculateDirectWidth(font, text, 12f);
            if (width != null) {
                return true;
            }
        } catch (Exception e) {
        }
        try {
            Float width = calculateCharacterByCharacterWidth(font, text, 12f);
            if (width != null) {
                return true;
            }
        } catch (Exception e) {
        }
        return true;
    }
 }