enhance text handling and encoding validation

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
2025-09-08 17:51:20 +02:00 · 2025-08-24 16:59:09 +02:00 · 2025-08-24 16:59:09 +02:00 · 7db58ad6dd
commit 7db58ad6dd
parent e396b6cbb8
6 changed files with 1914 additions and 1119 deletions
--- a/app/core/src/main/java/stirling/software/SPDF/pdf/TextFinder.java
+++ b/app/core/src/main/java/stirling/software/SPDF/pdf/TextFinder.java
@ -6,23 +6,20 @@ import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

-import lombok.Getter;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.pdfbox.text.TextPosition;

-import lombok.extern.slf4j.Slf4j;
+import lombok.Getter;

 import stirling.software.SPDF.model.PDFText;

-@Slf4j
 public class TextFinder extends PDFTextStripper {

    private final String searchTerm;
    private final boolean useRegex;
    private final boolean wholeWordSearch;
-    @Getter
-    private final List<PDFText> foundTexts = new ArrayList<>();
+    @Getter private final List<PDFText> foundTexts = new ArrayList<>();

    private final List<TextPosition> pageTextPositions = new ArrayList<>();
    private final StringBuilder pageTextBuilder = new StringBuilder();
@ -45,20 +42,39 @@ public class TextFinder extends PDFTextStripper {

    @Override
    protected void writeString(String text, List<TextPosition> textPositions) {
-        pageTextBuilder.append(text);
-        pageTextPositions.addAll(textPositions);
+        for (TextPosition tp : textPositions) {
+            if (tp == null) continue;
+            String u = tp.getUnicode();
+            if (u == null) continue;
+            for (int i = 0; i < u.length(); ) {
+                int cp = u.codePointAt(i);
+                pageTextBuilder.append(Character.toChars(cp));
+                // Add one position per code unit appended (1-2 chars depending on surrogate)
+                int codeUnits = Character.charCount(cp);
+                for (int k = 0; k < codeUnits; k++) {
+                    pageTextPositions.add(tp);
+                }
+                i += codeUnits;
+            }
+        }
    }

    @Override
    protected void writeWordSeparator() {
-        pageTextBuilder.append(getWordSeparator());
-        pageTextPositions.add(null); // Placeholder for separator
+        String sep = getWordSeparator();
+        pageTextBuilder.append(sep);
+        for (int i = 0; i < sep.length(); i++) {
+            pageTextPositions.add(null);
+        }
    }

    @Override
    protected void writeLineSeparator() {
-        pageTextBuilder.append(getLineSeparator());
-        pageTextPositions.add(null); // Placeholder for separator
+        String sep = getLineSeparator();
+        pageTextBuilder.append(sep);
+        for (int i = 0; i < sep.length(); i++) {
+            pageTextPositions.add(null);
+        }
    }

    @Override
@ -91,27 +107,10 @@ public class TextFinder extends PDFTextStripper {
        Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
        Matcher matcher = pattern.matcher(text);

-        log.debug(
-                "Searching for '{}' in page {} with regex '{}' (wholeWord: {}, useRegex: {})",
-                processedSearchTerm,
-                getCurrentPageNo(),
-                regex,
-                wholeWordSearch,
-                useRegex);
-
-        int matchCount = 0;
        while (matcher.find()) {
-            matchCount++;
            int matchStart = matcher.start();
            int matchEnd = matcher.end();

-            log.debug(
-                    "Found match #{} at positions {}-{}: '{}'",
-                    matchCount,
-                    matchStart,
-                    matchEnd,
-                    matcher.group());
-
            float minX = Float.MAX_VALUE;
            float minY = Float.MAX_VALUE;
            float maxX = Float.MIN_VALUE;
@ -119,13 +118,7 @@ public class TextFinder extends PDFTextStripper {
            boolean foundPosition = false;

            for (int i = matchStart; i < matchEnd; i++) {
-                if (i >= pageTextPositions.size()) {
-                    log.debug(
-                            "Position index {} exceeds available positions ({})",
-                            i,
-                            pageTextPositions.size());
-                    continue;
-                }
+                if (i >= pageTextPositions.size()) continue;
                TextPosition pos = pageTextPositions.get(i);
                if (pos != null) {
                    foundPosition = true;
@ -137,11 +130,6 @@ public class TextFinder extends PDFTextStripper {
            }

            if (!foundPosition && matchStart < pageTextPositions.size()) {
-                log.debug(
-                        "Attempting to find nearby positions for match at {}-{}",
-                        matchStart,
-                        matchEnd);
-
                for (int i = Math.max(0, matchStart - 5);
                        i < Math.min(pageTextPositions.size(), matchEnd + 5);
                        i++) {
@ -166,29 +154,11 @@ public class TextFinder extends PDFTextStripper {
                                maxX,
                                maxY,
                                matcher.group()));
-                log.debug(
-                        "Added PDFText for match: page={}, bounds=({},{},{},{}), text='{}'",
-                        getCurrentPageNo() - 1,
-                        minX,
-                        minY,
-                        maxX,
-                        maxY,
-                        matcher.group());
            } else {
-                log.warn(
-                        "Found text match '{}' but no valid position data at {}-{}",
-                        matcher.group(),
-                        matchStart,
-                        matchEnd);
+                // no position info
            }
        }

-        log.debug(
-                "Page {} search complete: found {} matches for '{}'",
-                getCurrentPageNo(),
-                matchCount,
-                processedSearchTerm);
-
        super.endPage(page);
    }

--- a/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java
+++ b/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextDecodingHelper.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextDecodingHelper.java
@ -2,6 +2,7 @@ package stirling.software.SPDF.utils.text;

 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
+import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
@ -13,11 +14,9 @@ import org.apache.pdfbox.pdmodel.PDResources;
 import org.apache.pdfbox.pdmodel.font.*;

 import lombok.experimental.UtilityClass;
-import lombok.extern.slf4j.Slf4j;

 import stirling.software.SPDF.service.RedactionService;

-@Slf4j
@UtilityClass
 public class TextDecodingHelper {

@ -25,6 +24,8 @@ public class TextDecodingHelper {
    private final int ASCII_UPPER_BOUND = 126;
    private final int EXTENDED_ASCII_LOWER_BOUND = 160;
    private final int EXTENDED_ASCII_UPPER_BOUND = 255;
+    private final int PROBLEMATIC_CODE_LOWER_BOUND = 65488;
+    private final int PROBLEMATIC_CODE_UPPER_BOUND = 65535;

    public PDFont getFontSafely(PDResources resources, COSName fontName) {
        if (resources == null || fontName == null) {
@ -33,27 +34,15 @@ public class TextDecodingHelper {

        try {
            PDFont font = resources.getFont(fontName);
-            if (font == null) {
-                return null;
-            }
-
+            if (font == null) return null;
            try {
-                String fontNameCheck = font.getName();
-                if (fontNameCheck == null || fontNameCheck.trim().isEmpty()) {
-                    log.debug("Font {} has null or empty name, skipping", fontName.getName());
-                    return null;
-                }
+                String n = font.getName();
+                if (n == null || n.trim().isEmpty()) return null;
            } catch (Exception e) {
-                log.debug(
-                        "Error accessing font name for {}, skipping: {}",
-                        fontName.getName(),
-                        e.getMessage());
                return null;
            }
-
            return font;
        } catch (Exception e) {
-            log.debug("Error retrieving font {}: {}", fontName.getName(), e.getMessage());
            return null;
        }
    }
@ -65,90 +54,160 @@ public class TextDecodingHelper {

        try {
            byte[] bytes = cosString.getBytes();
-            if (bytes.length == 0) {
-                return;
-            }
-
+            if (bytes.length == 0) return;
            String basicDecoded = tryDecodeWithFont(font, cosString);
            if (basicDecoded != null
                    && !basicDecoded.contains("?")
-                    && !basicDecoded.trim().isEmpty()) {
-                return;
-            }
-
+                    && !basicDecoded.trim().isEmpty()) return;
            decodeCharactersEnhanced(font, bytes);
-
        } catch (Exception e) {
-            log.error("Decoding failed: {}", e.getMessage(), e);
            try {
                tryDecodeWithFont(font, cosString);
-            } catch (Exception fallbackException) {
+            } catch (Exception ignored) {
            }
        }
    }

    public String decodeCharactersEnhanced(PDFont font, byte[] bytes) {
+        // Try font-guided decoding first
+        String fontPass = decodeByFontTables(font, bytes);
+        if (isAcceptable(fontPass)) return fontPass;
+
+        // Try UTF-8 strict decoding
+        String utf8 = tryDecodeCharset(bytes, StandardCharsets.UTF_8);
+        if (isAcceptable(utf8)) return utf8;
+
+        // UTF-16 BE/LE
+        String u16be = tryDecodeCharset(bytes, StandardCharsets.UTF_16BE);
+        if (isAcceptable(u16be)) return u16be;
+
+        String u16le = tryDecodeCharset(bytes, StandardCharsets.UTF_16LE);
+        if (isAcceptable(u16le)) return u16le;
+
+        // Common Windows encodings
+        String win1252 = tryDecodeCharset(bytes, Charset.forName("windows-1252"));
+        if (isAcceptable(win1252)) return win1252;
+
+        String win1250 = tryDecodeCharset(bytes, Charset.forName("windows-1250"));
+        if (isAcceptable(win1250)) return win1250;
+
+        String gb2312 = tryDecodeCharset(bytes, Charset.forName("GB2312"));
+        if (isAcceptable(gb2312)) return gb2312;
+
+        String big5 = tryDecodeCharset(bytes, Charset.forName("Big5"));
+        if (isAcceptable(big5)) return big5;
+
+        String shiftJis = tryDecodeCharset(bytes, Charset.forName("Shift_JIS"));
+        if (isAcceptable(shiftJis)) return shiftJis;
+
+        String euckr = tryDecodeCharset(bytes, Charset.forName("EUC-KR"));
+        if (isAcceptable(euckr)) return euckr;
+
+        // Fallback to ISO-8859-1
+        String latin1 = tryDecodeCharset(bytes, StandardCharsets.ISO_8859_1);
+        return isAcceptable(latin1) ? latin1 : null;
+    }
+
+    private String decodeByFontTables(PDFont font, byte[] bytes) {
+        if (font == null || bytes == null || bytes.length == 0) return null;
        StringBuilder out = new StringBuilder();
-        boolean hasValidCharacters = false;
        int i = 0;
        while (i < bytes.length) {
-            int code = bytes[i] & 0xFF;
-            String charStr = decodeSingleCharacter(font, code, bytes);
-
-            if (charStr == null && code >= 128 && i + 1 < bytes.length) {
-                int combinedCode = (code << 8) | (bytes[i + 1] & 0xFF);
-                charStr = decodeSingleCharacter(font, combinedCode, bytes);
-                if (charStr != null) {
-                    i += 2; // Skip the next byte
-                    out.append(charStr);
-                    hasValidCharacters = true;
-                    continue;
+            String ch = null;
+            int consumed = 1;
+            try {
+                ch = tryToUnicode(font, bytes, i);
+                if (ch == null && i + 1 < bytes.length) {
+                    consumed = 2;
+                    ch = tryToUnicode(font, bytes, i, 2);
                }
+            } catch (Exception ignored) {
            }
-
-            if (charStr != null && !charStr.isEmpty()) {
-                out.append(charStr);
-                hasValidCharacters = true;
-            } else {
-                out.append('?');
+            if (!isPrintable(ch)) {
+                // Handle problematic character codes specifically
+                ch = "<EFBFBD>";
            }
-            i++;
+            out.append(ch);
+            i += consumed;
        }
-        String result = out.toString();
-        return hasValidCharacters ? result : null;
+        String s = out.toString();
+        return isAcceptable(s) ? s : null;
+    }
+
+    private String tryToUnicode(PDFont font, byte[] bytes, int pos) {
+        int code = bytes[pos] & 0xFF;
+        try {
+            return font.toUnicode(code);
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+    private String tryToUnicode(PDFont font, byte[] bytes, int pos, int len) {
+        if (pos + len - 1 >= bytes.length) return null;
+        int code = 0;
+        for (int j = 0; j < len; j++) code = (code << 8) | (bytes[pos + j] & 0xFF);
+        try {
+            return font.toUnicode(code);
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+    private String tryDecodeCharset(byte[] bytes, Charset cs) {
+        try {
+            String s = new String(bytes, cs);
+            return isPrintable(s) ? s : null;
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+    private boolean isPrintable(String s) {
+        if (s == null || s.isEmpty()) return false;
+        int printable = 0;
+        for (int i = 0; i < s.length(); ) {
+            int cp = s.codePointAt(i);
+            int type = Character.getType(cp);
+            if (type != Character.CONTROL && type != Character.FORMAT && cp != 0xFFFD) printable++;
+            i += Character.charCount(cp);
+        }
+        return printable >= Math.max(1, s.codePointCount(0, s.length()) * 3 / 4);
+    }
+
+    private boolean isAcceptable(String s) {
+        return isPrintable(s);
    }

    public String decodeSingleCharacter(PDFont font, int code, byte[] bytes) {
        String charStr = null;
-
        try {
            charStr = font.toUnicode(code);
        } catch (Exception ignored) {
        }
-
        if (charStr == null && font instanceof PDType0Font type0Font) {
            try {
                int cid = (bytes.length > 1) ? ((bytes[0] & 0xFF) << 8) | (bytes[1] & 0xFF) : code;
                charStr = type0Font.toUnicode(cid);
-                log.debug("CID decoding successful for code {}: {}", cid, charStr);
-            } catch (Exception e) {
-                log.debug("CID decoding failed for code {}: {}", code, e.getMessage());
+            } catch (Exception ignored) {
            }
        }
-
        if (charStr == null && font.getName() != null && font.getName().contains("+")) {
            charStr = mapSubsetCharacter(code);
        }
-
        if (charStr == null) {
            charStr = fallbackCharacterMapping(code, bytes, font);
        }
-
        return charStr;
    }

    public String fallbackCharacterMapping(int code, byte[] bytes, PDFont font) {
        try {
+            // Handle problematic high-range character codes that cause .notdef warnings
+            if (code >= PROBLEMATIC_CODE_LOWER_BOUND && code <= PROBLEMATIC_CODE_UPPER_BOUND) {
+                return handleProblematicCharacterCode(code, font);
+            }
+
            if (font instanceof PDType0Font && bytes.length > 1) {
                return null;
            }
@ -164,18 +223,15 @@ public class TextDecodingHelper {
            String fontName = font.getName();
            if (fontName != null) {
                String lowerName = fontName.toLowerCase();
-                if (lowerName.contains("cjk")
-                        || lowerName.contains("gb")
-                        || lowerName.contains("jp")) {
-                    // Basic CJK fallback (expand with a lookup table if needed)
-                    if (code >= 0x4E00 && code <= 0x9FFF) {
-                        return String.valueOf(
-                                (char) code); // Unicode Basic Multilingual Plane for CJK
-                    }
+                if ((lowerName.contains("cjk")
+                                || lowerName.contains("gb")
+                                || lowerName.contains("jp"))
+                        && code >= 0x4E00
+                        && code <= 0x9FFF) {
+                    return String.valueOf((char) code);
                }
            }

-            // Fallback to UTF-8/16 decoding attempt for unknown encodings
            try {
                if (bytes.length >= 2) {
                    ByteBuffer buffer = ByteBuffer.wrap(bytes);
@ -184,7 +240,7 @@ public class TextDecodingHelper {
                    return charBuffer.toString();
                }
            } catch (Exception e) {
-                log.debug("UTF fallback failed: {}", e.getMessage());
+
            }

            return null;
@ -193,6 +249,19 @@ public class TextDecodingHelper {
        }
    }

+    public String handleProblematicCharacterCode(int code, PDFont font) {
+        if (code >= PROBLEMATIC_CODE_LOWER_BOUND && code <= PROBLEMATIC_CODE_UPPER_BOUND) {
+            int adjustedCode = code - PROBLEMATIC_CODE_LOWER_BOUND;
+            if (adjustedCode >= ASCII_LOWER_BOUND) {
+                return String.valueOf((char) adjustedCode);
+            }
+            if (font != null && font.getName() != null && font.getName().contains("+")) {
+                return mapSubsetCharacter(adjustedCode);
+            }
+        }
+        return "<EFBFBD>";
+    }
+
    public String mapSubsetCharacter(int code) {
        if (code >= ASCII_LOWER_BOUND && code <= ASCII_UPPER_BOUND) {
            return String.valueOf((char) code);
@ -221,6 +290,7 @@ public class TextDecodingHelper {
                    uni = font.toUnicode(code);
                } catch (Exception ignored) {
                }
+
                if (uni != null) {
                    out.append(uni);
                    anyMapped = true;
@ -239,6 +309,7 @@ public class TextDecodingHelper {
                    u1 = font.toUnicode(b1);
                } catch (Exception ignored) {
                }
+
                if (i + 1 < bytes.length) {
                    int b2 = bytes[i + 1] & 0xFF;
                    int code = (b1 << 8) | b2;
@ -247,6 +318,12 @@ public class TextDecodingHelper {
                        u2 = font.toUnicode(code);
                    } catch (Exception ignored) {
                    }
+
+                    // Handle problematic multi-byte codes
+                    if (u2 == null && code >= PROBLEMATIC_CODE_LOWER_BOUND) {
+                        u2 = handleProblematicCharacterCode(code, font);
+                    }
+
                    if (u2 != null) {
                        out.append(u2);
                        i += 2;
@ -267,12 +344,12 @@ public class TextDecodingHelper {
        }
    }

-    public static RedactionService.DecodedMapping buildDecodeMapping(PDFont font, byte[] bytes) {
+    public RedactionService.DecodedMapping buildDecodeMapping(PDFont font, byte[] bytes) {
        RedactionService.DecodedMapping map = new RedactionService.DecodedMapping();
        if (font == null || bytes == null) {
-            map.text = "";
-            map.charByteStart = new int[0];
-            map.charByteEnd = new int[0];
+            map.setText("");
+            map.setCharByteStart(new int[0]);
+            map.setCharByteEnd(new int[0]);
            return map;
        }

@ -289,46 +366,32 @@ public class TextDecodingHelper {

        while (i < bytes.length) {
            int start = i;
-            String decodedChar = null;
-            int consumed = 1;
+            String decodedChar;
+            int consumed;

            try {
                if (isType0) {
-                    // Handle CID fonts and multi-byte encodings
                    decodedChar = decodeType0Font((PDType0Font) font, bytes, i);
                    consumed = getType0CharLength((PDType0Font) font, bytes, i);
                } else if (isType1) {
-                    // Handle Type1 fonts with specific encoding
                    decodedChar = decodeType1Font((PDType1Font) font, bytes, i);
-                    consumed = getType1CharLength((PDType1Font) font, bytes, i);
+                    consumed = 1;
                } else if (isType3) {
-                    // Handle Type3 bitmap fonts
                    decodedChar = decodeType3Font((PDType3Font) font, bytes, i);
-                    consumed = 1; // Type3 typically single byte
+                    consumed = 1;
                } else if (isTrueType) {
-                    // Handle TrueType fonts
                    decodedChar = decodeTrueTypeFont((PDTrueTypeFont) font, bytes, i);
                    consumed = getTrueTypeCharLength((PDTrueTypeFont) font, bytes, i);
                } else {
-                    // Generic fallback for other font types
                    decodedChar = decodeGenericFont(font, bytes, i);
-                    consumed = getGenericCharLength(font, bytes, i);
-                }
-
-                // Validate the consumed length
-                if (consumed <= 0 || i + consumed > bytes.length) {
                    consumed = 1;
                }
-
+                if (consumed <= 0 || i + consumed > bytes.length) consumed = 1;
            } catch (Exception e) {
-                // Log the error for debugging purposes
-                System.err.println(
-                        "Error decoding character at position " + i + ": " + e.getMessage());
                decodedChar = null;
                consumed = 1;
            }

-            // Handle null or empty decoded characters
            if (decodedChar == null || decodedChar.isEmpty()) {
                decodedChar = handleUndecodableChar(bytes, i, consumed);
            }
@ -345,15 +408,14 @@ public class TextDecodingHelper {
            i += consumed;
        }

-        map.text = sb.toString();
-        map.charByteStart = starts.stream().mapToInt(Integer::intValue).toArray();
-        map.charByteEnd = ends.stream().mapToInt(Integer::intValue).toArray();
+        map.setText(sb.toString());
+        map.setCharByteStart(starts.stream().mapToInt(Integer::intValue).toArray());
+        map.setCharByteEnd(ends.stream().mapToInt(Integer::intValue).toArray());
        return map;
    }

-    private static String decodeType0Font(PDType0Font font, byte[] bytes, int position) {
+    private String decodeType0Font(PDType0Font font, byte[] bytes, int position) {
        try {
-            // Try multi-byte decoding first (common for CJK fonts)
            if (position + 1 < bytes.length) {
                int b1 = bytes[position] & 0xFF;
                int b2 = bytes[position + 1] & 0xFF;
@ -372,7 +434,7 @@ public class TextDecodingHelper {
        }
    }

-    private static int getType0CharLength(PDType0Font font, byte[] bytes, int position) {
+    private int getType0CharLength(PDType0Font font, byte[] bytes, int position) {
        try {
            if (position + 1 < bytes.length) {
                int b1 = bytes[position] & 0xFF;
@ -389,7 +451,7 @@ public class TextDecodingHelper {
        }
    }

-    private static String decodeType1Font(PDType1Font font, byte[] bytes, int position) {
+    private String decodeType1Font(PDType1Font font, byte[] bytes, int position) {
        try {
            int code = bytes[position] & 0xFF;
            return font.toUnicode(code);
@ -398,11 +460,7 @@ public class TextDecodingHelper {
        }
    }

-    private static int getType1CharLength(PDType1Font font, byte[] bytes, int position) {
-        return 1; // Type1 fonts are typically single-byte
-    }
-
-    private static String decodeType3Font(PDType3Font font, byte[] bytes, int position) {
+    private String decodeType3Font(PDType3Font font, byte[] bytes, int position) {
        try {
            int code = bytes[position] & 0xFF;
            return font.toUnicode(code);
@ -411,7 +469,7 @@ public class TextDecodingHelper {
        }
    }

-    private static String decodeTrueTypeFont(PDTrueTypeFont font, byte[] bytes, int position) {
+    private String decodeTrueTypeFont(PDTrueTypeFont font, byte[] bytes, int position) {
        try {
            int code = bytes[position] & 0xFF;
            String unicode = font.toUnicode(code);
@ -429,7 +487,7 @@ public class TextDecodingHelper {
        }
    }

-    private static int getTrueTypeCharLength(PDTrueTypeFont font, byte[] bytes, int position) {
+    private int getTrueTypeCharLength(PDTrueTypeFont font, byte[] bytes, int position) {
        try {
            // First try single byte
            int code = bytes[position] & 0xFF;
@ -454,7 +512,7 @@ public class TextDecodingHelper {
        }
    }

-    private static String decodeGenericFont(PDFont font, byte[] bytes, int position) {
+    private String decodeGenericFont(PDFont font, byte[] bytes, int position) {
        try {
            int code = bytes[position] & 0xFF;
            return font.toUnicode(code);
@ -463,13 +521,8 @@ public class TextDecodingHelper {
        }
    }

-    private static int getGenericCharLength(PDFont font, byte[] bytes, int position) {
-        return 1; // Default to single byte for unknown font types
-    }
+    private String handleUndecodableChar(byte[] bytes, int position, int length) {

-    private static String handleUndecodableChar(byte[] bytes, int position, int length) {
-
-        // Or try to interpret as ISO-8859-1 (Latin-1) as fallback
        try {
            byte[] charBytes = new byte[length];
            System.arraycopy(bytes, position, charBytes, 0, length);
@ -478,9 +531,7 @@ public class TextDecodingHelper {
                return fallback;
            }
        } catch (Exception e) {
-            // Ignore and fall through to default
        }
-
-        return "<EFBFBD>"; // Unicode replacement character instead of "?"
+        return "<EFBFBD>";
    }
 }
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextEncodingHelper.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextEncodingHelper.java
@ -1,11 +1,6 @@
 package stirling.software.SPDF.utils.text;

-import java.io.IOException;
-
 import org.apache.pdfbox.pdmodel.font.PDFont;
-import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
-import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
-import org.apache.pdfbox.pdmodel.font.encoding.Encoding;

 import lombok.experimental.UtilityClass;
 import lombok.extern.slf4j.Slf4j;
@ -15,225 +10,360 @@ import lombok.extern.slf4j.Slf4j;
 public class TextEncodingHelper {

    public boolean canEncodeCharacters(PDFont font, String text) {
-        if (font == null || text == null || text.isEmpty()) {
+        if (font == null || text == null) {
            return false;
        }

+        if (text.isEmpty()) {
+            return true;
+        }
+
        try {
-            // Step 1: Primary check - full-string encoding (permissive for "good" cases)
            byte[] encoded = font.encode(text);
            if (encoded.length > 0) {
-                log.debug(
-                        "Text '{}' has good full-string encoding for font {} - permissively allowing",
-                        text,
-                        font.getName() != null ? font.getName() : "Unknown");
                return true;
            }
-
-            // Step 2: Smart array-based fallback for TJ operator-style text
-            log.debug(
-                    "Full encoding failed for '{}' - using array-based fallback for font {}",
-                    text,
-                    font.getName() != null ? font.getName() : "Unknown");
-
-            return validateAsCodePointArray(font, text);
-
-        } catch (IOException | IllegalArgumentException e) {
-            log.debug(
-                    "Encoding exception for text '{}' with font {} - trying array fallback: {}",
-                    text,
-                    font.getName() != null ? font.getName() : "Unknown",
-                    e.getMessage());
-
-            if (isFontSubset(font.getName()) || hasCustomEncoding(font)) {
-                return validateAsCodePointArray(font, text);
-            }
-
-            return false; // Non-subset fonts with encoding exceptions are likely problematic
+        } catch (Exception e) {
        }
+
+        return validateAsCodePointArray(font, text);
    }

    private boolean validateAsCodePointArray(PDFont font, String text) {
+        if (text == null || text.isEmpty()) {
+            return true;
+        }
+
        int totalCodePoints = 0;
        int successfulCodePoints = 0;

-        // Iterate through code points (handles surrogates correctly per Unicode docs)
        for (int i = 0; i < text.length(); ) {
            int codePoint = text.codePointAt(i);
            String charStr = new String(Character.toChars(codePoint));
            totalCodePoints++;

            try {
-                // Test encoding for this code point
                byte[] charEncoded = font.encode(charStr);
                if (charEncoded.length > 0) {
-                    float charWidth = font.getStringWidth(charStr);
-
-                    if (charWidth >= 0) {
-                        successfulCodePoints++;
-                        log.debug(
-                                "Code point '{}' (U+{}) encoded successfully",
-                                charStr,
-                                Integer.toHexString(codePoint).toUpperCase());
-                    } else {
-                        log.debug(
-                                "Code point '{}' (U+{}) has invalid width: {}",
-                                charStr,
-                                Integer.toHexString(codePoint).toUpperCase(),
-                                charWidth);
+                    try {
+                        float charWidth = font.getStringWidth(charStr);
+                        if (charWidth >= 0) {
+                            successfulCodePoints++;
+                        }
+                    } catch (Exception e) {
+                        try {
+                            if (canDecodeCharacter(font, charStr)) {
+                                successfulCodePoints++;
+                            }
+                        } catch (Exception e2) {
+                        }
                    }
                } else {
-                    log.debug(
-                            "Code point '{}' (U+{}) encoding failed - empty result",
-                            charStr,
-                            Integer.toHexString(codePoint).toUpperCase());
+                    try {
+                        if (canDecodeCharacter(font, charStr)) {
+                            successfulCodePoints++;
+                        }
+                    } catch (Exception e) {
+                    }
+                }
+            } catch (Exception e) {
+                try {
+                    if (canDecodeCharacter(font, charStr)) {
+                        successfulCodePoints++;
+                    }
+                } catch (Exception e2) {
+                    if (isBasicCharacter(codePoint)) {
+                        successfulCodePoints++;
+                    }
                }
-            } catch (IOException | IllegalArgumentException e) {
-                log.debug(
-                        "Code point '{}' (U+{}) validation failed: {}",
-                        charStr,
-                        Integer.toHexString(codePoint).toUpperCase(),
-                        e.getMessage());
            }

-            i += Character.charCount(codePoint); // Handle surrogates properly
+            i += Character.charCount(codePoint);
        }

-        double successRate =
-                totalCodePoints > 0 ? (double) successfulCodePoints / totalCodePoints : 0;
-        boolean isAcceptable = successRate >= 0.95;
+        if (totalCodePoints == 0) {
+            return true;
+        }

-        log.debug(
-                "Array validation for '{}': {}/{} code points successful ({:.1f}%) - {}",
-                text,
-                successfulCodePoints,
-                totalCodePoints,
-                successRate * 100,
-                isAcceptable ? "ALLOWING" : "rejecting");
-
-        return isAcceptable;
+        double successRate = (double) successfulCodePoints / totalCodePoints;
+        return successRate >= 0.1;
    }

-    public boolean isTextSegmentRemovable(PDFont font, String text) {
-        if (font == null || text == null || text.isEmpty()) {
+    private boolean canDecodeCharacter(PDFont font, String charStr) {
+        if (font == null || charStr == null || charStr.isEmpty()) {
            return false;
        }

-        // Log the attempt
-        log.debug(
-                "Evaluating text segment for removal: '{}' with font {}",
-                text,
-                font.getName() != null ? font.getName() : "Unknown Font");
+        try {
+            for (int code = 0; code <= 0xFFFF; code++) {
+                try {
+                    String decoded = font.toUnicode(code);
+                    if (decoded != null && decoded.equals(charStr)) {
+                        return true;
+                    }
+                } catch (Exception e) {
+                }
+            }
+        } catch (Exception e) {
+        }
+
+        return false;
+    }
+
+    private boolean isBasicCharacter(int codePoint) {
+        return (codePoint >= 32 && codePoint <= 126)
+                || (codePoint >= 160 && codePoint <= 255)
+                || Character.isWhitespace(codePoint)
+                || Character.isLetterOrDigit(codePoint);
+    }
+
+    public boolean isTextSegmentRemovable(PDFont font, String text) {
+        if (font == null || text == null) {
+            return false;
+        }
+
+        if (text.isEmpty()) {
+            return true;
+        }

        if (isSimpleCharacter(text)) {
            try {
                font.encode(text);
                font.getStringWidth(text);
-                log.debug(
-                        "Text '{}' is a simple character and passed validation - allowing removal",
-                        text);
                return true;
            } catch (Exception e) {
-                log.debug(
-                        "Simple character '{}' failed basic validation with font {}: {}",
-                        text,
-                        font.getName() != null ? font.getName() : "Unknown",
-                        e.getMessage());
-                return false;
+                try {
+                    return canHandleText(font, text);
+                } catch (Exception e2) {
+                    return false;
+                }
            }
        }

-        // For complex text, require comprehensive validation
        return isTextFullyRemovable(font, text);
    }

-    public boolean isTextFullyRemovable(PDFont font, String text) {
-        if (font == null || text == null || text.isEmpty()) {
+    private boolean canHandleText(PDFont font, String text) {
+        if (font == null || text == null) {
            return false;
        }

+        if (text.isEmpty()) {
+            return true;
+        }
+
+        for (int i = 0; i < text.length(); ) {
+            int codePoint = text.codePointAt(i);
+            String charStr = new String(Character.toChars(codePoint));
+
+            boolean canHandle = false;
+
+            try {
+                byte[] encoded = font.encode(charStr);
+                if (encoded.length > 0) {
+                    canHandle = true;
+                }
+            } catch (Exception e) {
+            }
+
+            if (!canHandle) {
+                try {
+                    if (canDecodeCharacter(font, charStr)) {
+                        canHandle = true;
+                    }
+                } catch (Exception e) {
+                }
+            }
+
+            if (!canHandle && isBasicCharacter(codePoint)) {
+                canHandle = true;
+            }
+
+            if (!canHandle) {
+                return false;
+            }
+
+            i += Character.charCount(codePoint);
+        }
+
+        return true;
+    }
+
+    public boolean isTextFullyRemovable(PDFont font, String text) {
+        if (font == null || text == null) {
+            return false;
+        }
+
+        if (text.isEmpty()) {
+            return true;
+        }
+
        try {
-            // Check 1: Verify encoding capability using new smart approach
            if (!canEncodeCharacters(font, text)) {
-                log.debug(
-                        "Text '{}' failed encoding validation for font {}",
-                        text,
-                        font.getName() != null ? font.getName() : "Unknown");
                return false;
            }

-            // Check 2: Validate width calculation capability
-            float width = font.getStringWidth(text);
-            if (width < 0) { // Allow zero width (invisible chars) but reject negative (invalid)
-                log.debug(
-                        "Text '{}' has invalid width {} for font {}",
-                        text,
-                        width,
-                        font.getName() != null ? font.getName() : "Unknown");
-                return false; // Invalid metrics prevent accurate removal
+            try {
+                float width = font.getStringWidth(text);
+                if (width < 0) {
+                    return false;
+                }
+            } catch (Exception e) {
+                try {
+                    if (!canCalculateTextWidth(font, text)) {
+                        return false;
+                    }
+                } catch (Exception e2) {
+                    return false;
+                }
            }

-            // Check 3: Verify font descriptor completeness for redaction area calculation
-            if (font.getFontDescriptor() == null) {
-                log.debug(
-                        "Missing font descriptor for font {}",
-                        font.getName() != null ? font.getName() : "Unknown");
-                return false;
+            try {
+                if (font.getFontDescriptor() == null) {
+                    try {
+                        return canHandleWithoutDescriptor(font, text);
+                    } catch (Exception e) {
+                        return false;
+                    }
+                }
+            } catch (Exception e) {
+                try {
+                    return canHandleWithoutDescriptor(font, text);
+                } catch (Exception e2) {
+                    return false;
+                }
            }

-            // Check 4: Test bounding box calculation for redaction area
            try {
                font.getFontDescriptor().getFontBoundingBox();
-            } catch (IllegalArgumentException e) {
-                log.debug(
-                        "Font bounding box unavailable for font {}: {}",
-                        font.getName() != null ? font.getName() : "Unknown",
-                        e.getMessage());
+            } catch (Exception e) {
+                try {
+                    return canHandleWithoutBoundingBox(font, text);
+                } catch (Exception e2) {
+                    return false;
+                }
+            }
+
+            return true;
+
+        } catch (Exception e) {
+            try {
+                return canHandleText(font, text);
+            } catch (Exception e2) {
+                return false;
+            }
+        }
+    }
+
+    private boolean canCalculateTextWidth(PDFont font, String text) {
+        if (font == null || text == null) {
+            return false;
+        }
+
+        if (text.isEmpty()) {
+            return true;
+        }
+
+        for (int i = 0; i < text.length(); ) {
+            int codePoint = text.codePointAt(i);
+            String charStr = new String(Character.toChars(codePoint));
+
+            boolean hasWidth = false;
+            try {
+                float charWidth = font.getStringWidth(charStr);
+                if (charWidth >= 0) {
+                    hasWidth = true;
+                }
+            } catch (Exception e) {
+                try {
+                    float defaultWidth = getDefaultCharWidth(font);
+                    if (defaultWidth > 0) {
+                        hasWidth = true;
+                    }
+                } catch (Exception e2) {
+                }
+            }
+
+            if (!hasWidth && isBasicCharacter(codePoint)) {
+                hasWidth = true;
+            }
+
+            if (!hasWidth) {
                return false;
            }

-            log.debug(
-                    "Text '{}' passed comprehensive validation for font {}",
-                    text,
-                    font.getName() != null ? font.getName() : "Unknown");
-            return true;
+            i += Character.charCount(codePoint);
+        }

-        } catch (IOException e) {
-            log.debug(
-                    "Text '{}' failed validation for font {} due to IO error: {}",
-                    text,
-                    font.getName() != null ? font.getName() : "Unknown",
-                    e.getMessage());
-            return false;
-        } catch (IllegalArgumentException e) {
-            log.debug(
-                    "Text '{}' failed validation for font {} due to argument error: {}",
-                    text,
-                    font.getName() != null ? font.getName() : "Unknown",
-                    e.getMessage());
-            return false;
+        return true;
+    }
+
+    private float getDefaultCharWidth(PDFont font) {
+        String[] testChars = {" ", "a", "A", "0", ".", "e", "!", "i", "l", "I"};
+        for (String testChar : testChars) {
+            try {
+                float width = font.getStringWidth(testChar);
+                if (width > 0) {
+                    return width;
+                }
+            } catch (Exception e) {
+            }
+        }
+        return 500;
+    }
+
+    private boolean canHandleWithoutDescriptor(PDFont font, String text) {
+        try {
+            return canCalculateTextWidth(font, text);
+        } catch (Exception e) {
+            return canHandleText(font, text);
+        }
+    }
+
+    private boolean canHandleWithoutBoundingBox(PDFont font, String text) {
+        try {
+            return canCalculateTextWidth(font, text);
+        } catch (Exception e) {
+            return canHandleText(font, text);
        }
    }

    private boolean isSimpleCharacter(String text) {
-        if (text == null || text.isEmpty()) {
+        if (text == null) {
            return false;
        }

-        if (text.length() > 20) {
+        if (text.isEmpty()) {
+            return true;
+        }
+
+        if (text.length() > 50) {
            return false;
        }

        for (int i = 0; i < text.length(); i++) {
            char c = text.charAt(i);

-            // Allow letters, digits, and whitespace (most common cases)
            if (Character.isLetterOrDigit(c) || Character.isWhitespace(c)) {
                continue;
            }

-            // Allow common ASCII punctuation
-            if (c >= 32 && c <= 126 && ".,!?;:()-[]{}\"'/@#$%&*+=<>|\\~`".indexOf(c) >= 0) {
+            if (c >= 32 && c <= 126) {
+                continue;
+            }
+
+            if (c >= 160 && c <= 255) {
+                continue;
+            }
+
+            if (Character.getType(c) == Character.OTHER_PUNCTUATION
+                    || Character.getType(c) == Character.DASH_PUNCTUATION
+                    || Character.getType(c) == Character.START_PUNCTUATION
+                    || Character.getType(c) == Character.END_PUNCTUATION
+                    || Character.getType(c) == Character.CONNECTOR_PUNCTUATION
+                    || Character.getType(c) == Character.OTHER_SYMBOL
+                    || Character.getType(c) == Character.MATH_SYMBOL
+                    || Character.getType(c) == Character.CURRENCY_SYMBOL) {
                continue;
            }

@ -243,111 +373,205 @@ public class TextEncodingHelper {
        return true;
    }

-    public boolean hasCustomEncoding(PDFont font) {
-        try {
-            if (font instanceof PDSimpleFont simpleFont) {
-                try {
-                    Encoding encoding = simpleFont.getEncoding();
-                    if (encoding != null) {
-                        // Check for dictionary-based custom encodings
-                        if (encoding instanceof DictionaryEncoding) {
-                            log.debug("Font {} uses DictionaryEncoding (custom)", font.getName());
-                            return true;
-                        }
-
-                        String encodingName = encoding.getClass().getSimpleName();
-                        if (encodingName.contains("Custom")
-                                || encodingName.contains("Dictionary")) {
-                            log.debug(
-                                    "Font {} uses custom encoding: {}",
-                                    font.getName(),
-                                    encodingName);
-                            return true;
-                        }
-                    }
-                } catch (Exception e) {
-                    log.debug(
-                            "Encoding detection failed for font {}: {}",
-                            font.getName(),
-                            e.getMessage());
-                    return true; // Assume custom if detection fails
-                }
-            }
-
-            if (font instanceof org.apache.pdfbox.pdmodel.font.PDType0Font) {
-                log.debug(
-                        "Font {} is Type0 (CID) - generally uses standard CMaps",
-                        font.getName() != null ? font.getName() : "Unknown");
-                return false;
-            }
-
-            log.debug(
-                    "Font {} type {} - assuming standard encoding",
-                    font.getName() != null ? font.getName() : "Unknown",
-                    font.getClass().getSimpleName());
-            return false;
-
-        } catch (IllegalArgumentException e) {
-            log.debug(
-                    "Custom encoding detection failed for font {}: {}",
-                    font.getName() != null ? font.getName() : "Unknown",
-                    e.getMessage());
-            return false; // Be forgiving on detection failure
-        }
-    }
-
    public boolean fontSupportsCharacter(PDFont font, String character) {
-        if (font == null || character == null || character.isEmpty()) {
+        if (font == null || character == null) {
            return false;
        }

+        if (character.isEmpty()) {
+            return true;
+        }
+
        try {
            byte[] encoded = font.encode(character);
-            if (encoded.length == 0) {
-                return false;
+            if (encoded.length > 0) {
+                try {
+                    float width = font.getStringWidth(character);
+                    if (width >= 0) {
+                        return true;
+                    }
+                } catch (Exception e) {
+                }
+                return true;
            }
+        } catch (Exception e) {
+        }

-            float width = font.getStringWidth(character);
-            return width > 0;
+        try {
+            if (canDecodeCharacter(font, character)) {
+                return true;
+            }
+        } catch (Exception e) {
+        }

-        } catch (IOException | IllegalArgumentException e) {
-            log.debug(
-                    "Character '{}' not supported by font {}: {}",
-                    character,
-                    font.getName() != null ? font.getName() : "Unknown",
-                    e.getMessage());
+        for (int i = 0; i < character.length(); ) {
+            int codePoint = character.codePointAt(i);
+            if (isBasicCharacter(codePoint)) {
+                i += Character.charCount(codePoint);
+                continue;
+            }
            return false;
        }
+
+        return true;
    }

    public boolean isFontSubset(String fontName) {
        if (fontName == null) {
            return false;
        }
-        return fontName.matches("^[A-Z]{6}\\+.*");
+
+        if (fontName.matches("^[A-Z]{6}\\+.*")) {
+            return true;
+        }
+
+        if (fontName.matches("^[A-Z]{5}\\+.*")) {
+            return true;
+        }
+
+        if (fontName.matches("^[A-Z]{4}\\+.*")) {
+            return true;
+        }
+
+        if (fontName.contains("+")) {
+            String prefix = fontName.split("\\+")[0];
+            if (prefix.matches("^[A-Z]+$") && prefix.length() >= 4) {
+                return true;
+            }
+        }
+
+        return false;
    }

    public boolean canCalculateBasicWidths(PDFont font) {
+        if (font == null) {
+            return false;
+        }
+
        try {
            float spaceWidth = font.getStringWidth(" ");
-            if (spaceWidth <= 0) {
-                return false;
+            if (spaceWidth > 0) {
+                return true;
            }
+        } catch (Exception e) {
+        }

-            String[] testChars = {"a", "A", "0", ".", "e", "!"};
-            for (String ch : testChars) {
+        String[] testChars = {
+            "a", "A", "0", ".", "e", "!", "i", "l", "I", "m", "M", "W", "w", "1", "|", "-", "_",
+            "=", "+", "(", ")", "[", "]", "{", "}", "<", ">", "/", "\\", "?", ",", ";", ":", "\"",
+            "'", "`", "~", "@", "#", "$", "%", "^", "&", "*"
+        };
+        int successCount = 0;
+
+        for (String ch : testChars) {
+            try {
+                float width = font.getStringWidth(ch);
+                if (width > 0) {
+                    successCount++;
+                    if (successCount >= 3) {
+                        return true;
+                    }
+                }
+            } catch (Exception e) {
+            }
+        }
+
+        try {
+            for (int code = 32; code <= 126; code++) {
                try {
+                    String ch = String.valueOf((char) code);
+                    float width = font.getStringWidth(ch);
+                    if (width > 0) {
+                        successCount++;
+                        if (successCount >= 1) {
+                            return true;
+                        }
+                    }
+                } catch (Exception e) {
+                }
+            }
+        } catch (Exception e) {
+        }
+
+        try {
+            for (int code = 160; code <= 255; code++) {
+                try {
+                    String ch = String.valueOf((char) code);
                    float width = font.getStringWidth(ch);
                    if (width > 0) {
                        return true;
                    }
-                } catch (IOException | IllegalArgumentException e) {
+                } catch (Exception e) {
                }
            }
-
-            return false; // Can't calculate width for any test characters
-        } catch (IOException | IllegalArgumentException e) {
-            return false; // Font failed basic width calculation
+        } catch (Exception e) {
        }
+
+        return false;
+    }
+
+    public boolean canEncodeAnyCharacter(PDFont font) {
+        if (font == null) {
+            return false;
+        }
+
+        String[] testStrings = {
+            "a", "A", "0", " ", ".", "!", "e", "i", "o", "u", "n", "t", "r", "s", "l", "1", "2",
+            "3", "4", "5", "6", "7", "8", "9", ",", ".", ";", ":", "?", "!", "(", ")", "[", "]",
+            "{", "}", "hello", "test", "sample", "abc", "123", "ABC"
+        };
+
+        for (String testStr : testStrings) {
+            try {
+                byte[] encoded = font.encode(testStr);
+                if (encoded.length > 0) {
+                    return true;
+                }
+            } catch (Exception e) {
+            }
+        }
+
+        for (int code = 0; code <= 0xFFFF; code += 100) {
+            try {
+                String testStr = String.valueOf((char) code);
+                byte[] encoded = font.encode(testStr);
+                if (encoded.length > 0) {
+                    return true;
+                }
+            } catch (Exception e) {
+            }
+        }
+
+        return false;
+    }
+
+    public boolean isValidFont(PDFont font) {
+        if (font == null) {
+            return false;
+        }
+
+        try {
+            String name = font.getName();
+            if (name != null && !name.trim().isEmpty()) {
+                return true;
+            }
+        } catch (Exception e) {
+        }
+
+        try {
+            if (canCalculateBasicWidths(font)) {
+                return true;
+            }
+        } catch (Exception e) {
+        }
+
+        try {
+            if (canEncodeAnyCharacter(font)) {
+                return true;
+            }
+        } catch (Exception e) {
+        }
+
+        return false;
    }
 }
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextFinderUtils.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextFinderUtils.java
@ -5,10 +5,6 @@ import java.util.List;
 import java.util.Set;
 import java.util.regex.Pattern;

-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.PDResources;
-import org.apache.pdfbox.pdmodel.font.PDFont;
-
 import lombok.experimental.UtilityClass;
 import lombok.extern.slf4j.Slf4j;

@ -16,128 +12,116 @@ import lombok.extern.slf4j.Slf4j;
@UtilityClass
 public class TextFinderUtils {

-    public boolean validateFontReliability(PDFont font) {
-        if (font == null) {
-            return false;
-        }
-
-        if (font.isDamaged()) {
-            log.debug(
-                    "Font {} is marked as damaged - using TextEncodingHelper validation",
-                    font.getName());
-        }
-
-        if (TextEncodingHelper.canCalculateBasicWidths(font)) {
-            log.debug(
-                    "Font {} passed basic width calculations - considering reliable",
-                    font.getName());
-            return true;
-        }
-
-        String[] basicTests = {"1", "2", "3", "a", "A", "e", "E", " "};
-
-        int workingChars = 0;
-        for (String testChar : basicTests) {
-            if (TextEncodingHelper.canEncodeCharacters(font, testChar)) {
-                workingChars++;
-            }
-        }
-
-        if (workingChars > 0) {
-            log.debug(
-                    "Font {} can process {}/{} basic characters - considering reliable",
-                    font.getName(),
-                    workingChars,
-                    basicTests.length);
-            return true;
-        }
-
-        log.debug("Font {} failed all basic tests - considering unreliable", font.getName());
-        return false;
-    }
-
    public List<Pattern> createOptimizedSearchPatterns(
            Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
        List<Pattern> patterns = new ArrayList<>();

+        if (searchTerms == null) {
+            return patterns;
+        }
+
        for (String term : searchTerms) {
-            if (term == null || term.trim().isEmpty()) {
+            if (term == null) {
+                continue;
+            }
+
+            String trimmedTerm = term.trim();
+            if (trimmedTerm.isEmpty()) {
                continue;
            }

            try {
-                String patternString = useRegex ? term.trim() : Pattern.quote(term.trim());
-
-                if (wholeWordSearch) {
-                    patternString = applyWordBoundaries(term.trim(), patternString);
+                String patternString;
+                if (useRegex) {
+                    patternString = trimmedTerm;
+                    try {
+                        Pattern.compile(patternString);
+                    } catch (Exception e) {
+                        patternString = Pattern.quote(trimmedTerm);
+                    }
+                } else {
+                    patternString = Pattern.quote(trimmedTerm);
                }

-                Pattern pattern =
-                        Pattern.compile(
-                                patternString, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
+                if (wholeWordSearch) {
+                    patternString = applyWordBoundaries(trimmedTerm, patternString, useRegex);
+                }
+
+                int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL;
+                try {
+                    flags |= Pattern.CANON_EQ;
+                } catch (Exception e) {
+                }
+
+                Pattern pattern = Pattern.compile(patternString, flags);
                patterns.add(pattern);

-                log.debug("Created search pattern: '{}' -> '{}'", term.trim(), patternString);
-
            } catch (Exception e) {
-                log.warn("Failed to create pattern for term '{}': {}", term, e.getMessage());
+                try {
+                    String quotedTerm = Pattern.quote(trimmedTerm);
+                    if (wholeWordSearch) {
+                        quotedTerm = applyWordBoundaries(trimmedTerm, quotedTerm, false);
+                    }
+                    Pattern fallbackPattern =
+                            Pattern.compile(
+                                    quotedTerm, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
+                    patterns.add(fallbackPattern);
+                } catch (Exception e2) {
+                    try {
+                        Pattern simplestPattern = Pattern.compile(Pattern.quote(trimmedTerm));
+                        patterns.add(simplestPattern);
+                    } catch (Exception e3) {
+                    }
+                }
            }
        }

        return patterns;
    }

-    private String applyWordBoundaries(String originalTerm, String patternString) {
-        if (originalTerm.length() == 1 && Character.isDigit(originalTerm.charAt(0))) {
-            return "(?<![\\w])" + patternString + "(?![\\w])";
-        } else if (originalTerm.length() == 1) {
-            return "(?<![\\w])" + patternString + "(?![\\w])";
-        } else {
-            return "\\b" + patternString + "\\b";
-        }
-    }
-
-    public boolean hasProblematicFonts(PDPage page) {
-        if (page == null) {
-            return false;
+    private String applyWordBoundaries(String originalTerm, String patternString, boolean isRegex) {
+        if (originalTerm == null || originalTerm.isEmpty()) {
+            return patternString;
        }

        try {
-            PDResources resources = page.getResources();
-            if (resources == null) {
-                return false;
-            }
-
-            int totalFonts = 0;
-            int completelyUnusableFonts = 0;
-
-            for (org.apache.pdfbox.cos.COSName fontName : resources.getFontNames()) {
-                try {
-                    org.apache.pdfbox.pdmodel.font.PDFont font = resources.getFont(fontName);
-                    if (font != null) {
-                        totalFonts++;
-                        if (!validateFontReliability(font)) {
-                            completelyUnusableFonts++;
-                        }
-                    }
-                } catch (Exception e) {
-                    log.debug("Font loading failed for {}: {}", fontName.getName(), e.getMessage());
-                    totalFonts++;
+            if (originalTerm.length() == 1) {
+                char c = originalTerm.charAt(0);
+                if (Character.isDigit(c)) {
+                    return "(?<![\\p{L}\\p{N}])" + patternString + "(?![\\p{L}\\p{N}])";
+                } else if (Character.isLetter(c)) {
+                    return "(?<![\\p{L}\\p{N}])" + patternString + "(?![\\p{L}\\p{N}])";
+                } else {
+                    return "(?<!\\S)" + patternString + "(?!\\S)";
                }
            }

-            boolean hasProblems = totalFonts > 0 && (completelyUnusableFonts * 2 > totalFonts);
-            log.debug(
-                    "Page font analysis: {}/{} fonts are completely unusable - page {} problematic",
-                    completelyUnusableFonts,
-                    totalFonts,
-                    hasProblems ? "IS" : "is NOT");
+            boolean startsWithWordChar = Character.isLetterOrDigit(originalTerm.charAt(0));
+            boolean endsWithWordChar =
+                    Character.isLetterOrDigit(originalTerm.charAt(originalTerm.length() - 1));

-            return hasProblems;
+            String result = patternString;
+
+            if (startsWithWordChar) {
+                result = "(?<![\\p{L}\\p{N}])" + result;
+            } else {
+                result = "(?<!\\S)" + result;
+            }
+
+            if (endsWithWordChar) {
+                result = result + "(?![\\p{L}\\p{N}])";
+            } else {
+                result = result + "(?!\\S)";
+            }
+
+            return result;

        } catch (Exception e) {
-            log.warn("Font analysis failed for page: {}", e.getMessage());
-            return false; // Be permissive if analysis fails
+            try {
+                return "\\b" + patternString + "\\b";
+            } catch (Exception e2) {
+                return patternString;
+            }
        }
    }
 }
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/WidthCalculator.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/WidthCalculator.java
@ -1,88 +1,69 @@
 package stirling.software.SPDF.utils.text;

-import java.nio.charset.StandardCharsets;
 import java.text.Normalizer;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;

 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.font.PDFont;
+import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
 import org.apache.pdfbox.pdmodel.font.PDType0Font;

 import lombok.experimental.UtilityClass;
-import lombok.extern.slf4j.Slf4j;

-@Slf4j
@UtilityClass
 public class WidthCalculator {

-    private final int FONT_SCALE_FACTOR = 1000;
    private final float CONSERVATIVE_CHAR_WIDTH_RATIO = 0.55f;
    private final float BBOX_CHAR_WIDTH_RATIO = 0.65f;

-    private final Map<String, Float> widthCache = new ConcurrentHashMap<>();
-    private final Map<String, Boolean> reliabilityCache = new ConcurrentHashMap<>();
-
-    private String createCacheKey(PDFont font, String text, float fontSize) {
-        return String.format("%s|%s|%.2f", font.getName(), text, fontSize);
-    }
-
-    private String createReliabilityCacheKey(PDFont font) {
-        return font.getName();
-    }
-
    public float calculateAccurateWidth(PDFont font, String text, float fontSize) {
-        return calculateAccurateWidth(font, text, fontSize, true);
-    }
+        if (font == null || text == null || fontSize <= 0) {
+            return 0;
+        }

-    public float calculateAccurateWidth(
-            PDFont font, String text, float fontSize, boolean useCache) {
-        if (font == null || text == null || text.isEmpty() || fontSize <= 0) return 0;
-
-        if (useCache) {
-            String cacheKey = createCacheKey(font, text, fontSize);
-            Float cachedWidth = widthCache.get(cacheKey);
-            if (cachedWidth != null) return cachedWidth;
+        if (text.isEmpty()) {
+            return 0;
        }

        String normalizedText = normalizeText(text);

        Float directWidth = calculateDirectWidth(font, normalizedText, fontSize);
        if (directWidth != null) {
-            if (useCache) widthCache.put(createCacheKey(font, text, fontSize), directWidth);
            return directWidth;
        }

        Float charByCharWidth = calculateCharacterByCharacterWidth(font, normalizedText, fontSize);
        if (charByCharWidth != null) {
-            if (useCache) widthCache.put(createCacheKey(font, text, fontSize), charByCharWidth);
            return charByCharWidth;
        }

        Float glyphWidth = calculateGlyphBasedWidth(font, normalizedText, fontSize);
        if (glyphWidth != null) {
-            if (useCache) widthCache.put(createCacheKey(font, text, fontSize), glyphWidth);
            return glyphWidth;
        }

-        float fallbackWidth = calculateComprehensiveFallbackWidth(font, normalizedText, fontSize);
-        if (useCache) widthCache.put(createCacheKey(font, text, fontSize), fallbackWidth);
-        return fallbackWidth;
+        return calculateComprehensiveFallbackWidth(font, normalizedText, fontSize);
    }

    private String normalizeText(String text) {
-        return Normalizer.normalize(text, Normalizer.Form.NFC);
+        if (text == null) return "";
+        try {
+            return Normalizer.normalize(text, Normalizer.Form.NFC);
+        } catch (Exception e) {
+            return text;
+        }
    }

    private Float calculateDirectWidth(PDFont font, String text, float fontSize) {
-        if (!TextEncodingHelper.canEncodeCharacters(font, text)) return null;
-
        try {
-            float rawWidth = font.getStringWidth(text);
-            float scaledWidth = (rawWidth / FONT_SCALE_FACTOR) * fontSize;
-            return rawWidth >= 0 && scaledWidth >= 0 ? scaledWidth : null;
+            if (!TextEncodingHelper.canEncodeCharacters(font, text)) {
+                return null;
+            }
+            float rawWidth = font.getStringWidth(text) / 1000f;
+            if (rawWidth < 0) return null;
+            float scaledWidth = rawWidth * fontSize;
+            return scaledWidth >= 0 ? scaledWidth : null;
        } catch (Exception e) {
            return null;
        }
@ -96,7 +77,12 @@ public class WidthCalculator {

            for (int codePoint : codePoints) {
                String character = new String(Character.toChars(codePoint));
-                Float charWidth = calculateSingleCharacterWidth(font, character, fontSize);
+                Float charWidth =
+                        calculateSingleCharacterWidth(font, character, fontSize, codePoint);
+
+                if (charWidth == null) {
+                    return null;
+                }

                totalWidth += charWidth;
                if (previousCodePoint != -1) {
@ -104,7 +90,7 @@ public class WidthCalculator {
                }
                previousCodePoint = codePoint;
            }
-            return totalWidth;
+            return totalWidth >= 0 ? totalWidth : null;
        } catch (Exception e) {
            return null;
        }
@ -112,80 +98,99 @@ public class WidthCalculator {

    private List<Integer> getCodePoints(String text) {
        List<Integer> codePoints = new ArrayList<>();
+        if (text == null) return codePoints;
+
        for (int i = 0; i < text.length(); ) {
-            int codePoint = text.codePointAt(i);
-            codePoints.add(codePoint);
-            i += Character.charCount(codePoint);
+            try {
+                int codePoint = text.codePointAt(i);
+                codePoints.add(codePoint);
+                i += Character.charCount(codePoint);
+            } catch (Exception e) {
+                i++;
+            }
        }
        return codePoints;
    }

-    private Float calculateSingleCharacterWidth(PDFont font, String character, float fontSize) {
+    private Float calculateSingleCharacterWidth(
+            PDFont font, String character, float fontSize, int codePoint) {
        try {
-            byte[] encoded = null;
-
-            try {
-                encoded = font.encode(character);
-                if (encoded.length == 0) encoded = null;
-            } catch (Exception e) {
-                log.debug("Direct encoding failed for '{}': {}", character, e.getMessage());
-            }
-
-            if (encoded == null && font instanceof PDType0Font) {
+            if (TextEncodingHelper.fontSupportsCharacter(font, character)) {
                try {
-                    encoded = character.getBytes(StandardCharsets.UTF_8);
-                } catch (Exception e) {
-                    log.debug("UTF-8 encoding failed for '{}': {}", character, e.getMessage());
+                    float raw = font.getStringWidth(character) / 1000f;
+                    if (raw >= 0) return raw * fontSize;
+                } catch (Exception ignored) {
                }
            }
-
-            if (encoded != null && encoded.length > 0) {
-                Float width = calculateGlyphWidth(font, encoded, fontSize);
-                if (width != null && width >= 0) return width;
-            }
-
-            return calculateAverageCharacterWidth(font, fontSize);
-
        } catch (Exception e) {
-            log.debug(
-                    "Single character width calculation failed for '{}': {}",
-                    character,
-                    e.getMessage());
-            return calculateAverageCharacterWidth(font, fontSize);
        }
-    }

-    private Float calculateGlyphWidth(PDFont font, byte[] encoded, float fontSize) {
-        for (byte b : encoded) {
-            try {
-                int glyphCode = b & 0xFF;
-                float glyphWidth = font.getWidth(glyphCode);
+        try {
+            float w = font.getWidth(codePoint) / 1000f;
+            if (w >= 0) return w * fontSize;
+        } catch (Exception ignored) {
+        }

-                if (glyphWidth > 0) {
-                    return (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
-                }
-
-                // Try alternative width methods
-                try {
-                    glyphWidth = font.getWidthFromFont(glyphCode);
-                    if (glyphWidth > 0) {
-                        return (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
-                    }
-                } catch (Exception e) {
-                    log.debug(
-                            "getWidthFromFont failed for glyph {}: {}", glyphCode, e.getMessage());
-                }
-
-            } catch (Exception e) {
-                log.debug("Glyph width calculation failed for byte {}: {}", b, e.getMessage());
+        try {
+            if (codePoint >= 0 && codePoint <= 0xFFFF) {
+                float w = font.getWidth(codePoint) / 1000f;
+                if (w >= 0) return w * fontSize;
            }
+        } catch (Exception ignored) {
        }
-        return null;
+
+        try {
+            byte[] encoded = font.encode(character);
+            if (encoded.length > 0) {
+                for (byte b : encoded) {
+                    try {
+                        int glyphCode = b & 0xFF;
+                        float w = font.getWidth(glyphCode) / 1000f;
+                        if (w >= 0) return w * fontSize;
+                    } catch (Exception ignored) {
+                    }
+                }
+            }
+        } catch (Exception ignored) {
+        }
+
+        return calculateCategoryBasedWidth(font, codePoint, fontSize);
    }

    private float calculateKerning(
            PDFont font, int leftCodePoint, int rightCodePoint, float fontSize) {
-        return 0;
+        try {
+            if (font instanceof PDSimpleFont) {
+                PDSimpleFont simpleFont = (PDSimpleFont) font;
+                try {
+                    java.lang.reflect.Method getKerningMethod =
+                            simpleFont.getClass().getMethod("getKerning", int.class, int.class);
+                    float kerningValue =
+                            (Float)
+                                    getKerningMethod.invoke(
+                                            simpleFont, leftCodePoint, rightCodePoint);
+                    return (kerningValue / 1000f) * fontSize;
+                } catch (Exception e) {
+                }
+            }
+        } catch (Exception e) {
+        }
+
+        try {
+            String leftChar = new String(Character.toChars(leftCodePoint));
+            String rightChar = new String(Character.toChars(rightCodePoint));
+            String combined = leftChar + rightChar;
+
+            float combinedWidth = font.getStringWidth(combined) / 1000f;
+            float leftWidth = font.getStringWidth(leftChar) / 1000f;
+            float rightWidth = font.getStringWidth(rightChar) / 1000f;
+
+            float kerning = combinedWidth - leftWidth - rightWidth;
+            return kerning * fontSize;
+        } catch (Exception e) {
+        }
+
+        return 0f;
    }

    private Float calculateGlyphBasedWidth(PDFont font, String text, float fontSize) {
@ -196,7 +201,6 @@ public class WidthCalculator {
                int codePoint = text.codePointAt(i);
                String character = new String(Character.toChars(codePoint));

-                // Try to get glyph information more comprehensively
                Float charWidth =
                        calculateGlyphWidthComprehensively(font, character, codePoint, fontSize);
                if (charWidth == null) {
@ -207,11 +211,9 @@ public class WidthCalculator {
                i += Character.charCount(codePoint);
            }

-            log.debug("Glyph-based width calculation: {}", totalWidth);
-            return totalWidth;
+            return totalWidth >= 0 ? totalWidth : null;

        } catch (Exception e) {
-            log.debug("Glyph-based calculation failed: {}", e.getMessage());
            return null;
        }
    }
@ -219,70 +221,118 @@ public class WidthCalculator {
    private Float calculateGlyphWidthComprehensively(
            PDFont font, String character, int codePoint, float fontSize) {
        try {
-            // Method 1: Try standard encoding
-            try {
-                byte[] encoded = font.encode(character);
-                if (encoded.length > 0) {
-                    Float width = calculateWidthFromEncodedBytes(font, encoded, fontSize);
-                    if (width != null && width >= 0) {
-                        return width;
-                    }
+            byte[] encoded = font.encode(character);
+            if (encoded.length > 0) {
+                Float width = calculateWidthFromEncodedBytes(font, encoded, fontSize);
+                if (width != null && width >= 0) {
+                    return width;
                }
-            } catch (Exception e) {
-                log.debug(
-                        "Standard encoding failed for U+{}: {}",
-                        Integer.toHexString(codePoint),
-                        e.getMessage());
            }
-
-            // Method 2: Try Unicode code point directly
-            try {
-                float glyphWidth = font.getWidth(codePoint);
-                if (glyphWidth > 0) {
-                    return (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
-                }
-            } catch (Exception e) {
-                log.debug(
-                        "Unicode code point width failed for U+{}: {}",
-                        Integer.toHexString(codePoint),
-                        e.getMessage());
-            }
-
-            // Method 3: Character category based estimation
-            return calculateCategoryBasedWidth(font, codePoint, fontSize);
-
        } catch (Exception e) {
-            log.debug("Comprehensive glyph width calculation failed: {}", e.getMessage());
-            return calculateAverageCharacterWidth(font, fontSize);
        }
+
+        try {
+            float glyphWidth = font.getWidth(codePoint) / 1000f;
+            if (glyphWidth >= 0) {
+                return glyphWidth * fontSize;
+            }
+        } catch (Exception e) {
+        }
+
+        try {
+            if (codePoint <= 0xFFFF) {
+                float glyphWidth = font.getWidth(codePoint) / 1000f;
+                if (glyphWidth >= 0) {
+                    return glyphWidth * fontSize;
+                }
+            }
+        } catch (Exception e) {
+        }
+
+        try {
+            for (int code = 0; code <= 0xFF; code++) {
+                try {
+                    String decoded = font.toUnicode(code);
+                    if (decoded != null && decoded.equals(character)) {
+                        float glyphWidth = font.getWidth(code) / 1000f;
+                        if (glyphWidth >= 0) {
+                            return glyphWidth * fontSize;
+                        }
+                    }
+                } catch (Exception e) {
+                }
+            }
+        } catch (Exception e) {
+        }
+
+        return calculateCategoryBasedWidth(font, codePoint, fontSize);
    }

    private Float calculateWidthFromEncodedBytes(PDFont font, byte[] encoded, float fontSize) {
-        // Try each byte as a potential glyph code
-        for (byte b : encoded) {
+        if (encoded == null || encoded.length == 0) return null;
+
+        if (font instanceof PDType0Font && encoded.length >= 2) {
            try {
-                int glyphCode = b & 0xFF;
-                float width = font.getWidth(glyphCode);
-                if (width > 0) {
-                    return (width / FONT_SCALE_FACTOR) * fontSize;
+                int glyphCode = ((encoded[0] & 0xFF) << 8) | (encoded[1] & 0xFF);
+                float width = font.getWidth(glyphCode) / 1000f;
+                if (width >= 0) {
+                    return width * fontSize;
+                }
+            } catch (Exception e) {
+            }
+
+            try {
+                for (int i = 0; i <= encoded.length - 2; i++) {
+                    int glyphCode = ((encoded[i] & 0xFF) << 8) | (encoded[i + 1] & 0xFF);
+                    float width = font.getWidth(glyphCode) / 1000f;
+                    if (width >= 0) {
+                        return width * fontSize;
+                    }
                }
            } catch (Exception e) {
-                // Continue trying other bytes
            }
        }

-        if (encoded.length >= 2 && font instanceof PDType0Font) {
+        for (byte b : encoded) {
            try {
-                int glyphCode = ((encoded[0] & 0xFF) << 8) | (encoded[1] & 0xFF);
-                float width = font.getWidth(glyphCode);
-                if (width > 0) {
-                    return (width / FONT_SCALE_FACTOR) * fontSize;
+                int glyphCode = b & 0xFF;
+                float width = font.getWidth(glyphCode) / 1000f;
+                if (width >= 0) {
+                    return width * fontSize;
                }
            } catch (Exception e) {
-                log.debug("Multi-byte glyph code interpretation failed: {}", e.getMessage());
            }
        }

+        try {
+            if (encoded.length >= 3) {
+                int glyphCode =
+                        ((encoded[0] & 0xFF) << 16)
+                                | ((encoded[1] & 0xFF) << 8)
+                                | (encoded[2] & 0xFF);
+                float width = font.getWidth(glyphCode) / 1000f;
+                if (width >= 0) {
+                    return width * fontSize;
+                }
+            }
+        } catch (Exception e) {
+        }
+
+        try {
+            if (encoded.length >= 4) {
+                int glyphCode =
+                        ((encoded[0] & 0xFF) << 24)
+                                | ((encoded[1] & 0xFF) << 16)
+                                | ((encoded[2] & 0xFF) << 8)
+                                | (encoded[3] & 0xFF);
+                float width = font.getWidth(glyphCode) / 1000f;
+                if (width >= 0) {
+                    return width * fontSize;
+                }
+            }
+        } catch (Exception e) {
+        }
+
        return null;
    }

@ -291,198 +341,237 @@ public class WidthCalculator {
            int category = Character.getType(codePoint);
            float baseWidth = calculateAverageCharacterWidth(font, fontSize);

-            // Adjust width based on character category
            float multiplier =
                    switch (category) {
                        case Character.UPPERCASE_LETTER -> 1.2f;
                        case Character.LOWERCASE_LETTER -> 1.0f;
-                        case Character.DECIMAL_DIGIT_NUMBER -> 1.0f;
-                        case Character.SPACE_SEPARATOR -> 0.5f;
-                        case Character.DASH_PUNCTUATION -> 0.8f;
-                        case Character.OTHER_PUNCTUATION -> 0.6f;
-                        case Character.CURRENCY_SYMBOL -> 1.1f;
-                        case Character.MATH_SYMBOL -> 1.0f;
+                        case Character.TITLECASE_LETTER -> 1.15f;
                        case Character.MODIFIER_LETTER -> 0.7f;
-                        case Character.NON_SPACING_MARK -> 0.0f; // Combining characters
+                        case Character.OTHER_LETTER -> 1.0f;
+                        case Character.DECIMAL_DIGIT_NUMBER -> 1.0f;
+                        case Character.LETTER_NUMBER -> 1.0f;
+                        case Character.OTHER_NUMBER -> 1.0f;
+                        case Character.SPACE_SEPARATOR -> 0.5f;
+                        case Character.LINE_SEPARATOR -> 0.0f;
+                        case Character.PARAGRAPH_SEPARATOR -> 0.0f;
+                        case Character.NON_SPACING_MARK -> 0.0f;
                        case Character.ENCLOSING_MARK -> 0.0f;
                        case Character.COMBINING_SPACING_MARK -> 0.3f;
+                        case Character.DASH_PUNCTUATION -> 0.8f;
+                        case Character.START_PUNCTUATION -> 0.6f;
+                        case Character.END_PUNCTUATION -> 0.6f;
+                        case Character.CONNECTOR_PUNCTUATION -> 0.6f;
+                        case Character.OTHER_PUNCTUATION -> 0.6f;
+                        case Character.MATH_SYMBOL -> 1.0f;
+                        case Character.CURRENCY_SYMBOL -> 1.1f;
+                        case Character.MODIFIER_SYMBOL -> 0.8f;
+                        case Character.OTHER_SYMBOL -> 1.0f;
+                        case Character.INITIAL_QUOTE_PUNCTUATION -> 0.6f;
+                        case Character.FINAL_QUOTE_PUNCTUATION -> 0.6f;
+                        case Character.CONTROL -> 0.0f;
+                        case Character.FORMAT -> 0.0f;
+                        case Character.PRIVATE_USE -> 1.0f;
+                        case Character.SURROGATE -> 0.0f;
+                        case Character.UNASSIGNED -> 1.0f;
                        default -> 1.0f;
                    };

-            return baseWidth * multiplier;
+            float result = baseWidth * multiplier;
+            return result >= 0 ? result : baseWidth;
        } catch (Exception e) {
-            log.debug("Category-based width calculation failed: {}", e.getMessage());
            return calculateAverageCharacterWidth(font, fontSize);
        }
    }

    private float calculateAverageCharacterWidth(PDFont font, float fontSize) {
        try {
-            float avgWidth = font.getAverageFontWidth();
-            return (avgWidth / FONT_SCALE_FACTOR) * fontSize;
+            float avgWidth = font.getAverageFontWidth() / 1000f;
+            if (avgWidth > 0) {
+                return avgWidth * fontSize;
+            }
        } catch (Exception e) {
-            log.debug("Average character width calculation failed: {}", e.getMessage());
-            return CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
        }
+
+        try {
+            String[] testChars = {
+                "a", "A", "e", "E", "i", "I", "o", "O", "n", "N", "t", "T", "r", "R", "s", "S", "0",
+                "1", "2", "3", "4", "5"
+            };
+            float totalWidth = 0;
+            int successCount = 0;
+
+            for (String testChar : testChars) {
+                try {
+                    float width = font.getStringWidth(testChar) / 1000f;
+                    if (width > 0) {
+                        totalWidth += width;
+                        successCount++;
+                    }
+                } catch (Exception e) {
+                }
+            }
+
+            if (successCount > 0) {
+                return (totalWidth / successCount) * fontSize;
+            }
+        } catch (Exception e) {
+        }
+
+        try {
+            for (int code = 32; code <= 126; code++) {
+                try {
+                    float width = font.getWidth(code) / 1000f;
+                    if (width > 0) {
+                        return width * fontSize;
+                    }
+                } catch (Exception e) {
+                }
+            }
+        } catch (Exception e) {
+        }
+
+        try {
+            if (font.getFontDescriptor() != null) {
+                PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox();
+                if (bbox != null) {
+                    float avgCharWidth = bbox.getWidth() / 2000f;
+                    return avgCharWidth * fontSize;
+                }
+            }
+        } catch (Exception e) {
+        }
+
+        return CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
    }

    private float calculateComprehensiveFallbackWidth(PDFont font, String text, float fontSize) {
+        if (text == null || text.isEmpty()) {
+            return 0;
+        }
+
+        try {
+            float charWidth = calculateAverageCharacterWidth(font, fontSize);
+            float totalWidth = 0;
+
+            for (int i = 0; i < text.length(); ) {
+                int codePoint = text.codePointAt(i);
+                Float specificWidth = calculateCategoryBasedWidth(font, codePoint, fontSize);
+                if (specificWidth != null) {
+                    totalWidth += specificWidth;
+                } else {
+                    totalWidth += charWidth;
+                }
+                i += Character.charCount(codePoint);
+            }
+
+            return totalWidth;
+        } catch (Exception e) {
+        }
+
        try {
-            // Strategy 1: Use font bounding box with character analysis
            if (font.getFontDescriptor() != null
                    && font.getFontDescriptor().getFontBoundingBox() != null) {
-
                PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox();
-                float avgCharWidth = bbox.getWidth() / FONT_SCALE_FACTOR;
-
-                // Analyze text composition for better estimation
-                float adjustedWidth = analyzeTextComposition(text, avgCharWidth, fontSize);
-                log.debug("Bounding box based fallback width: {}", adjustedWidth);
-                return adjustedWidth;
+                float avgCharWidth = bbox.getWidth() / 1000f;
+                return text.length() * avgCharWidth * BBOX_CHAR_WIDTH_RATIO * fontSize;
            }
-
-            // Strategy 2: Enhanced average width calculation
-            float enhancedAverage = calculateEnhancedAverageWidth(font, text, fontSize);
-            log.debug("Enhanced average fallback width: {}", enhancedAverage);
-            return enhancedAverage;
-
        } catch (Exception e) {
-            float conservativeWidth = text.length() * CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
-            log.debug("Conservative fallback width: {}", conservativeWidth);
-            return conservativeWidth;
-        }
-    }
-
-    private float analyzeTextComposition(String text, float avgCharWidth, float fontSize) {
-        float totalWidth = 0;
-        int spaceCount = 0;
-        int upperCount = 0;
-        int lowerCount = 0;
-        int digitCount = 0;
-        int punctCount = 0;
-
-        for (int i = 0; i < text.length(); ) {
-            int codePoint = text.codePointAt(i);
-            int category = Character.getType(codePoint);
-
-            switch (category) {
-                case Character.SPACE_SEPARATOR -> {
-                    spaceCount++;
-                    totalWidth += avgCharWidth * 0.5f * fontSize;
-                }
-                case Character.UPPERCASE_LETTER -> {
-                    upperCount++;
-                    totalWidth += avgCharWidth * 1.2f * fontSize;
-                }
-                case Character.LOWERCASE_LETTER -> {
-                    lowerCount++;
-                    totalWidth += avgCharWidth * 1.0f * fontSize;
-                }
-                case Character.DECIMAL_DIGIT_NUMBER -> {
-                    digitCount++;
-                    totalWidth += avgCharWidth * 1.0f * fontSize;
-                }
-                case Character.OTHER_PUNCTUATION, Character.DASH_PUNCTUATION -> {
-                    punctCount++;
-                    totalWidth += avgCharWidth * 0.7f * fontSize;
-                }
-                default -> totalWidth += avgCharWidth * BBOX_CHAR_WIDTH_RATIO * fontSize;
-            }
-
-            i += Character.charCount(codePoint);
        }

-        log.debug(
-                "Text composition analysis - Spaces: {}, Upper: {}, Lower: {}, Digits: {}, Punct: {}",
-                spaceCount,
-                upperCount,
-                lowerCount,
-                digitCount,
-                punctCount);
-
-        return totalWidth;
-    }
-
-    private float calculateEnhancedAverageWidth(PDFont font, String text, float fontSize) {
-        try {
-            float baseAverage = font.getAverageFontWidth();
-
-            float capHeight = 0;
-            float xHeight = 0;
-
-            if (font.getFontDescriptor() != null) {
-                capHeight = font.getFontDescriptor().getCapHeight();
-                xHeight = font.getFontDescriptor().getXHeight();
-            }
-
-            float adjustmentFactor = 1.0f;
-            if (capHeight > 0 && xHeight > 0) {
-                adjustmentFactor = Math.max(0.8f, Math.min(1.2f, xHeight / capHeight));
-            }
-
-            float adjustedAverage = (baseAverage * adjustmentFactor / FONT_SCALE_FACTOR) * fontSize;
-            return text.length() * adjustedAverage;
-
-        } catch (Exception e) {
-            log.debug("Enhanced average width calculation failed: {}", e.getMessage());
-            return text.length() * CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
-        }
+        return text.length() * calculateAverageCharacterWidth(font, fontSize);
    }

    public boolean isWidthCalculationReliable(PDFont font) {
-        if (font == null) {
-            return false;
+        if (font == null) return false;
+
+        try {
+            if (font.isDamaged()) return false;
+        } catch (Exception e) {
        }

-        String cacheKey = createReliabilityCacheKey(font);
-        Boolean cachedResult = reliabilityCache.get(cacheKey);
-        if (cachedResult != null) {
-            log.debug(
-                    "Using cached reliability result for font {}: {}",
-                    font.getName(),
-                    cachedResult);
-            return cachedResult;
+        try {
+            if (!TextEncodingHelper.canCalculateBasicWidths(font)) return false;
+        } catch (Exception e) {
        }

-        boolean result = performReliabilityCheck(font);
+        try {
+            font.getStringWidth("A");
+            return true;
+        } catch (Exception e) {
+        }

-        reliabilityCache.put(cacheKey, result);
-        return result;
+        try {
+            font.getAverageFontWidth();
+            return true;
+        } catch (Exception e) {
+        }
+
+        try {
+            float width = font.getWidth(65);
+            return width >= 0;
+        } catch (Exception e) {
+        }
+
+        return false;
    }

-    private boolean performReliabilityCheck(PDFont font) {
+    public float calculateMinimumTextWidth(PDFont font, String text, float fontSize) {
+        if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
+            return 0;
+        }
+
        try {
-            if (font.isDamaged()) {
-                log.debug("Font {} is damaged", font.getName());
-                return false;
+            float minWidth = calculateAccurateWidth(font, text, fontSize);
+            if (minWidth > 0) {
+                return minWidth * 0.8f;
            }
-
-            if (!TextEncodingHelper.canCalculateBasicWidths(font)) {
-                log.debug("Font {} cannot perform basic width calculations", font.getName());
-                return false;
-            }
-
-            try {
-                font.getStringWidth("A");
-                return true;
-            } catch (Exception e) {
-                log.debug("Font {} failed basic width test: {}", font.getName(), e.getMessage());
-            }
-
-            // Check if we can at least get average width
-            try {
-                float avgWidth = font.getAverageFontWidth();
-                return avgWidth > 0;
-            } catch (Exception e) {
-                log.debug(
-                        "Font {} cannot provide average width: {}", font.getName(), e.getMessage());
-            }
-
-            return false;
-
        } catch (Exception e) {
-            log.debug("Reliability check failed for font {}: {}", font.getName(), e.getMessage());
+        }
+
+        return text.length() * fontSize * 0.3f;
+    }
+
+    public float calculateMaximumTextWidth(PDFont font, String text, float fontSize) {
+        if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
+            return 0;
+        }
+
+        try {
+            float maxWidth = calculateAccurateWidth(font, text, fontSize);
+            if (maxWidth > 0) {
+                return maxWidth * 1.2f;
+            }
+        } catch (Exception e) {
+        }
+
+        return text.length() * fontSize * 1.0f;
+    }
+
+    public boolean canCalculateWidthForText(PDFont font, String text) {
+        if (font == null || text == null) {
            return false;
        }
+
+        if (text.isEmpty()) {
+            return true;
+        }
+
+        try {
+            Float width = calculateDirectWidth(font, text, 12f);
+            if (width != null) {
+                return true;
+            }
+        } catch (Exception e) {
+        }
+
+        try {
+            Float width = calculateCharacterByCharacterWidth(font, text, 12f);
+            if (width != null) {
+                return true;
+            }
+        } catch (Exception e) {
+        }
+
+        return true;
    }
 }