feat: add TextFinderUtils and WidthCalculator for text processing and font validation, TextEncodingHelper for encoding support

2025-08-29 13:48:46 +02:00 · 2025-07-18 18:50:17 +02:00 · 2025-07-18 18:50:17 +02:00 · 6315721e8f
commit 6315721e8f
parent 7a9f962172
6 changed files with 1106 additions and 230 deletions
--- a/app/core/src/main/java/stirling/software/SPDF/controller/api/security/RedactController.java
+++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/security/RedactController.java
@ -32,9 +32,6 @@ import org.apache.pdfbox.pdmodel.PDResources;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.common.PDStream;
 import org.apache.pdfbox.pdmodel.font.PDFont;
-import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
-import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
-import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
 import org.apache.pdfbox.pdmodel.graphics.PDXObject;
 import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
 import org.springframework.http.ResponseEntity;
@ -59,6 +56,9 @@ import stirling.software.SPDF.model.PDFText;
 import stirling.software.SPDF.model.api.security.ManualRedactPdfRequest;
 import stirling.software.SPDF.model.api.security.RedactPdfRequest;
 import stirling.software.SPDF.pdf.TextFinder;
+import stirling.software.SPDF.utils.text.TextEncodingHelper;
+import stirling.software.SPDF.utils.text.TextFinderUtils;
+import stirling.software.SPDF.utils.text.WidthCalculator;
 import stirling.software.common.model.api.security.RedactionArea;
 import stirling.software.common.service.CustomPDFDocumentFactory;
 import stirling.software.common.util.GeneralUtils;
@ -77,6 +77,9 @@ public class RedactController {
    private static final float PRECISION_THRESHOLD = 1e-3f;
    private static final int FONT_SCALE_FACTOR = 1000;

+    // Redaction box width reduction factor (10% reduction)
+    private static final float REDACTION_WIDTH_REDUCTION_FACTOR = 0.9f;
+
    // Text showing operators
    private static final Set<String> TEXT_SHOWING_OPERATORS = Set.of("Tj", "TJ", "'", "\"");

@ -229,7 +232,11 @@ public class RedactController {
    }

    private void redactFoundText(
-            PDDocument document, List<PDFText> blocks, float customPadding, Color redactColor)
+            PDDocument document,
+            List<PDFText> blocks,
+            float customPadding,
+            Color redactColor,
+            boolean isTextRemovalMode)
            throws IOException {

        var allPages = document.getDocumentCatalog().getPages();
@ -263,10 +270,28 @@ public class RedactController {
                                (block.getY2() - block.getY1()) * DEFAULT_TEXT_PADDING_MULTIPLIER
                                        + customPadding;

+                        float originalWidth = block.getX2() - block.getX1();
+                        float boxWidth;
+                        float boxX;
+
+                        // Only apply width reduction when text is actually being removed
+                        if (isTextRemovalMode) {
+                            // Calculate reduced width and center the box
+                            boxWidth =
+                                    originalWidth
+                                            * REDACTION_WIDTH_REDUCTION_FACTOR; // 10% reduction
+                            float widthReduction = originalWidth - boxWidth;
+                            boxX = block.getX1() + (widthReduction / 2); // Center the reduced box
+                        } else {
+                            // Use original width for box-only redaction
+                            boxWidth = originalWidth;
+                            boxX = block.getX1();
+                        }
+
                        contentStream.addRect(
-                                block.getX1(),
+                                boxX,
                                pageBox.getHeight() - block.getY2() - padding,
-                                block.getX2() - block.getX1(),
+                                boxWidth,
                                block.getY2() - block.getY1() + 2 * padding);
                    }

@ -284,7 +309,7 @@ public class RedactController {
            return originalWord;
        }

-        if (font != null && isFontSubset(font.getName())) {
+        if (font != null && TextEncodingHelper.isFontSubset(font.getName())) {
            try {
                float originalWidth = safeGetStringWidth(font, originalWord) / FONT_SCALE_FACTOR;
                return createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f);
@ -300,6 +325,10 @@ public class RedactController {
        return " ".repeat(originalWord.length());
    }

+    /**
+     * Enhanced placeholder creation using advanced width calculation. Incorporates font validation
+     * and sophisticated fallback strategies.
+     */
    String createPlaceholderWithWidth(
            String originalWord, float targetWidth, PDFont font, float fontSize) {
        if (originalWord == null || originalWord.isEmpty()) {
@ -311,11 +340,21 @@ public class RedactController {
        }

        try {
-            if (isFontSubset(font.getName())) {
+            // Check font reliability before proceeding
+            if (!WidthCalculator.isWidthCalculationReliable(font)) {
+                log.debug(
+                        "Font {} unreliable for width calculation, using simple placeholder",
+                        font.getName());
+                return " ".repeat(originalWord.length());
+            }
+
+            // Use enhanced subset font detection
+            if (TextEncodingHelper.isFontSubset(font.getName())) {
                return createSubsetFontPlaceholder(originalWord, targetWidth, font, fontSize);
            }

-            float spaceWidth = safeGetStringWidth(font, " ") / FONT_SCALE_FACTOR * fontSize;
+            // Enhanced space width calculation
+            float spaceWidth = WidthCalculator.calculateAccurateWidth(font, " ", fontSize);

            if (spaceWidth <= 0) {
                return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
@ -323,13 +362,16 @@ public class RedactController {

            int spaceCount = Math.max(1, Math.round(targetWidth / spaceWidth));

-            int maxSpaces = originalWord.length() * 2;
+            // More conservative space limit based on original word characteristics
+            int maxSpaces =
+                    Math.max(
+                            originalWord.length() * 2, Math.round(targetWidth / spaceWidth * 1.5f));
            spaceCount = Math.min(spaceCount, maxSpaces);

            return " ".repeat(spaceCount);

        } catch (Exception e) {
-            log.debug("Width-based placeholder creation failed: {}", e.getMessage());
+            log.debug("Enhanced placeholder creation failed: {}", e.getMessage());
            return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
        }
    }
@ -359,7 +401,7 @@ public class RedactController {
        try {
            String[] alternatives = {" ", ".", "-", "_", "~", "°", "·"};

-            if (fontSupportsCharacter(font, " ")) {
+            if (TextEncodingHelper.fontSupportsCharacter(font, " ")) {
                float spaceWidth = safeGetStringWidth(font, " ") / FONT_SCALE_FACTOR * fontSize;
                if (spaceWidth > 0) {
                    int spaceCount = Math.max(1, Math.round(targetWidth / spaceWidth));
@ -371,10 +413,10 @@ public class RedactController {
            }

            for (String altChar : alternatives) {
-                if (altChar.equals(" ")) continue; // Already tried spaces
+                if (" ".equals(altChar)) continue; // Already tried spaces

                try {
-                    if (!fontSupportsCharacter(font, altChar)) {
+                    if (!TextEncodingHelper.fontSupportsCharacter(font, altChar)) {
                        continue;
                    }

@ -546,7 +588,8 @@ public class RedactController {
                                allFoundTextsByPage,
                                request.getRedactColor(),
                                request.getCustomPadding(),
-                                request.getConvertPDFToImage());
+                                request.getConvertPDFToImage(),
+                                false); // Box-only mode, use original box sizes

                return WebResponseUtils.bytesToWebResponse(
                        pdfContent,
@ -564,7 +607,8 @@ public class RedactController {
                            allFoundTextsByPage,
                            request.getRedactColor(),
                            request.getCustomPadding(),
-                            request.getConvertPDFToImage());
+                            request.getConvertPDFToImage(),
+                            true); // Text removal mode, use reduced box sizes

            return WebResponseUtils.bytesToWebResponse(
                    pdfContent,
@ -608,14 +652,31 @@ public class RedactController {
            text = text.trim();
            if (text.isEmpty()) continue;

+            log.debug(
+                    "Searching for text: '{}' (regex: {}, wholeWord: {})",
+                    text,
+                    useRegex,
+                    wholeWordSearch);
+
            try {
                TextFinder textFinder = new TextFinder(text, useRegex, wholeWordSearch);
                textFinder.getText(document);

-                for (PDFText found : textFinder.getFoundTexts()) {
+                List<PDFText> foundTexts = textFinder.getFoundTexts();
+                log.debug("TextFinder found {} instances of '{}'", foundTexts.size(), text);
+
+                for (PDFText found : foundTexts) {
                    allFoundTextsByPage
                            .computeIfAbsent(found.getPageIndex(), k -> new ArrayList<>())
                            .add(found);
+                    log.debug(
+                            "Added match on page {} at ({},{},{},{}): '{}'",
+                            found.getPageIndex(),
+                            found.getX1(),
+                            found.getY1(),
+                            found.getX2(),
+                            found.getY2(),
+                            found.getText());
                }
            } catch (Exception e) {
                log.error("Error processing search term '{}': {}", text, e.getMessage());
@ -673,7 +734,8 @@ public class RedactController {
            Map<Integer, List<PDFText>> allFoundTextsByPage,
            String colorString,
            float customPadding,
-            Boolean convertToImage)
+            Boolean convertToImage,
+            boolean isTextRemovalMode)
            throws IOException {

        List<PDFText> allFoundTexts = new ArrayList<>();
@ -684,7 +746,7 @@ public class RedactController {
        if (!allFoundTexts.isEmpty()) {
            Color redactColor = decodeOrDefault(colorString);

-            redactFoundText(document, allFoundTexts, customPadding, redactColor);
+            redactFoundText(document, allFoundTexts, customPadding, redactColor, isTextRemovalMode);

            cleanDocumentMetadata(document);
        }
@ -870,16 +932,24 @@ public class RedactController {
            boolean useRegex,
            boolean wholeWordSearch) {

-        return targetWords.stream()
-                .map(
-                        target -> {
-                            String patternString = useRegex ? target : Pattern.quote(target);
-                            if (wholeWordSearch) {
-                                patternString = "\\b" + patternString + "\\b";
+        // Use the new utility for creating optimized patterns
+        List<Pattern> patterns =
+                TextFinderUtils.createOptimizedSearchPatterns(
+                        targetWords, useRegex, wholeWordSearch);
+
+        return patterns.stream()
+                .flatMap(
+                        pattern -> {
+                            try {
+                                return pattern.matcher(completeText).results();
+                            } catch (Exception e) {
+                                log.debug(
+                                        "Pattern matching failed for pattern {}: {}",
+                                        pattern.pattern(),
+                                        e.getMessage());
+                                return java.util.stream.Stream.empty();
                            }
-                            return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
                        })
-                .flatMap(pattern -> pattern.matcher(completeText).results())
                .map(matchResult -> new MatchRange(matchResult.start(), matchResult.end()))
                .sorted(Comparator.comparingInt(MatchRange::getStartPos))
                .collect(Collectors.toList());
@ -957,6 +1027,16 @@ public class RedactController {

    private String applyRedactionsToSegmentText(TextSegment segment, List<MatchRange> matches) {
        String text = segment.getText();
+
+        if (segment.getFont() != null
+                && !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), text)) {
+            log.debug(
+                    "Skipping text segment '{}' - font {} cannot process this text reliably",
+                    text,
+                    segment.getFont().getName());
+            return text; // Return original text unchanged
+        }
+
        StringBuilder result = new StringBuilder(text);

        for (MatchRange match : matches) {
@ -966,6 +1046,15 @@ public class RedactController {
            if (segmentStart < text.length() && segmentEnd > segmentStart) {
                String originalPart = text.substring(segmentStart, segmentEnd);

+                if (segment.getFont() != null
+                        && !TextEncodingHelper.isTextSegmentRemovable(
+                                segment.getFont(), originalPart)) {
+                    log.debug(
+                            "Skipping text part '{}' within segment - cannot be processed reliably",
+                            originalPart);
+                    continue; // Skip this match, process others
+                }
+
                float originalWidth = 0;
                if (segment.getFont() != null && segment.getFontSize() > 0) {
                    try {
@ -1001,68 +1090,125 @@ public class RedactController {
            return 0;
        }

+        if (!WidthCalculator.isWidthCalculationReliable(font)) {
+            log.debug(
+                    "Font {} flagged as unreliable for width calculation, using fallback",
+                    font.getName());
+            return calculateConservativeWidth(font, text);
+        }
+
+        if (!TextEncodingHelper.canEncodeCharacters(font, text)) {
+            log.debug(
+                    "Text cannot be encoded by font {}, using character-based fallback",
+                    font.getName());
+            return calculateCharacterBasedWidth(font, text);
+        }
+
        try {
-            return font.getStringWidth(text);
+            float width = font.getStringWidth(text);
+            log.debug("Direct width calculation successful for '{}': {}", text, width);
+            return width;
+
        } catch (Exception e) {
-            try {
-                float totalWidth = 0;
-                for (int i = 0; i < text.length(); i++) {
-                    String character = text.substring(i, i + 1);
-                    try {
-                        byte[] encoded = font.encode(character);
-                        if (encoded.length > 0) {
-                            int glyphCode = encoded[0] & 0xFF;
+            log.debug(
+                    "Direct width calculation failed for font {}: {}",
+                    font.getName(),
+                    e.getMessage());
+            return calculateFallbackWidth(font, text);
+        }
+    }

-                            float glyphWidth = font.getWidth(glyphCode);
+    private float calculateCharacterBasedWidth(PDFont font, String text) {
+        try {
+            float totalWidth = 0;
+            for (int i = 0; i < text.length(); i++) {
+                String character = text.substring(i, i + 1);
+                try {
+                    // Validate character encoding first
+                    if (!TextEncodingHelper.fontSupportsCharacter(font, character)) {
+                        totalWidth += font.getAverageFontWidth();
+                        continue;
+                    }

-                            if (glyphWidth == 0) {
-                                try {
-                                    glyphWidth = font.getWidthFromFont(glyphCode);
-                                } catch (Exception e2) {
-                                    glyphWidth = font.getAverageFontWidth();
-                                }
+                    byte[] encoded = font.encode(character);
+                    if (encoded.length > 0) {
+                        int glyphCode = encoded[0] & 0xFF;
+                        float glyphWidth = font.getWidth(glyphCode);
+
+                        // Try alternative width methods if primary fails
+                        if (glyphWidth == 0) {
+                            try {
+                                glyphWidth = font.getWidthFromFont(glyphCode);
+                            } catch (Exception e2) {
+                                glyphWidth = font.getAverageFontWidth();
                            }
-
-                            totalWidth += glyphWidth;
-                        } else {
-                            totalWidth += font.getAverageFontWidth();
                        }
-                    } catch (Exception e2) {
+
+                        totalWidth += glyphWidth;
+                    } else {
                        totalWidth += font.getAverageFontWidth();
                    }
+                } catch (Exception e2) {
+                    // Character processing failed, use average width
+                    totalWidth += font.getAverageFontWidth();
                }
-                return totalWidth;
-            } catch (Exception e2) {
-                log.debug("PDFBox API width calculation failed: {}", e2.getMessage());
            }

-            try {
-                if (font.getFontDescriptor() != null
-                        && font.getFontDescriptor().getFontBoundingBox() != null) {
-                    PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox();
-                    float avgCharWidth = bbox.getHeight() / 1000f * 0.865f;
-                    return text.length() * avgCharWidth * FONT_SCALE_FACTOR;
-                }
-            } catch (Exception e2) {
-                log.debug("Font bounding box width calculation failed: {}", e2.getMessage());
+            log.debug("Character-based width calculation: {}", totalWidth);
+            return totalWidth;
+
+        } catch (Exception e) {
+            log.debug("Character-based width calculation failed: {}", e.getMessage());
+            return calculateConservativeWidth(font, text);
+        }
+    }
+
+    private float calculateFallbackWidth(PDFont font, String text) {
+        try {
+            // Method 1: Font bounding box approach
+            if (font.getFontDescriptor() != null
+                    && font.getFontDescriptor().getFontBoundingBox() != null) {
+
+                PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox();
+                float avgCharWidth = bbox.getWidth() * 0.6f; // Conservative estimate
+                float fallbackWidth = text.length() * avgCharWidth;
+
+                log.debug("Bounding box fallback width: {}", fallbackWidth);
+                return fallbackWidth;
            }

+            // Method 2: Average font width
            try {
                float avgWidth = font.getAverageFontWidth();
-                return text.length() * avgWidth;
+                if (avgWidth > 0) {
+                    float fallbackWidth = text.length() * avgWidth;
+                    log.debug("Average width fallback: {}", fallbackWidth);
+                    return fallbackWidth;
+                }
            } catch (Exception e2) {
                log.debug("Average font width calculation failed: {}", e2.getMessage());
            }

-            float conservativeWidth = text.length() * 500f; // 500 units per character
-            log.debug(
-                    "All width calculation methods failed for font {}, using conservative estimate: {}",
-                    font.getName(),
-                    conservativeWidth);
-            return conservativeWidth;
+            // Method 3: Conservative estimate based on font metrics
+            return calculateConservativeWidth(font, text);
+
+        } catch (Exception e) {
+            log.debug("Fallback width calculation failed: {}", e.getMessage());
+            return calculateConservativeWidth(font, text);
        }
    }

+    private float calculateConservativeWidth(PDFont font, String text) {
+        float conservativeWidth = text.length() * 500f;
+
+        log.debug(
+                "Conservative width estimate for font {} text '{}': {}",
+                font.getName(),
+                text,
+                conservativeWidth);
+        return conservativeWidth;
+    }
+
    private float calculateWidthAdjustment(TextSegment segment, List<MatchRange> matches) {
        try {
            if (segment.getFont() == null || segment.getFontSize() <= 0) {
@ -1070,7 +1216,8 @@ public class RedactController {
            }

            String fontName = segment.getFont().getName();
-            if (fontName != null && (fontName.contains("HOEPAP") || isFontSubset(fontName))) {
+            if (fontName != null
+                    && (fontName.contains("HOEPAP") || TextEncodingHelper.isFontSubset(fontName))) {
                log.debug("Skipping width adjustment for problematic/subset font: {}", fontName);
                return 0;
            }
@ -1196,6 +1343,19 @@ public class RedactController {
            for (COSBase element : originalArray) {
                if (element instanceof COSString cosString) {
                    String originalText = cosString.getString();
+
+                    if (segment.getFont() != null
+                            && !TextEncodingHelper.isTextSegmentRemovable(
+                                    segment.getFont(), originalText)) {
+                        log.debug(
+                                "Skipping TJ text part '{}' - cannot be processed reliably with font {}",
+                                originalText,
+                                segment.getFont().getName());
+                        newArray.add(element); // Keep original unchanged
+                        textOffsetInSegment += originalText.length();
+                        continue;
+                    }
+
                    StringBuilder newText = new StringBuilder(originalText);
                    boolean modified = false;

@ -1207,7 +1367,6 @@ public class RedactController {
                        int overlapEnd = Math.min(match.getEndPos(), stringEndInPage);

                        if (overlapStart < overlapEnd) {
-                            modified = true;
                            int redactionStartInString = overlapStart - stringStartInPage;
                            int redactionEndInString = overlapEnd - stringStartInPage;
                            if (redactionStartInString >= 0
@ -1216,6 +1375,16 @@ public class RedactController {
                                        originalText.substring(
                                                redactionStartInString, redactionEndInString);

+                                if (segment.getFont() != null
+                                        && !TextEncodingHelper.isTextSegmentRemovable(
+                                                segment.getFont(), originalPart)) {
+                                    log.debug(
+                                            "Skipping TJ text part '{}' - cannot be redacted reliably",
+                                            originalPart);
+                                    continue; // Skip this redaction, keep original text
+                                }
+
+                                modified = true;
                                float originalWidth = 0;
                                if (segment.getFont() != null && segment.getFontSize() > 0) {
                                    try {
@ -1320,8 +1489,13 @@ public class RedactController {
            int totalFonts = 0;
            int customEncodedFonts = 0;
            int subsetFonts = 0;
+            int unreliableFonts = 0;

            for (PDPage page : document.getPages()) {
+                if (TextFinderUtils.hasProblematicFonts(page)) {
+                    log.debug("Page contains fonts flagged as problematic by TextFinderUtils");
+                }
+
                PDResources resources = page.getResources();
                if (resources == null) {
                    continue;
@ -1333,190 +1507,64 @@ public class RedactController {
                        if (font != null) {
                            totalFonts++;

-                            boolean isSubset = isFontSubset(font.getName());
-                            boolean isProblematic = hasProblematicFontCharacteristics(font);
+                            // Enhanced analysis using helper classes
+                            boolean isSubset = TextEncodingHelper.isFontSubset(font.getName());
+                            boolean hasCustomEncoding = TextEncodingHelper.hasCustomEncoding(font);
+                            boolean isReliable = WidthCalculator.isWidthCalculationReliable(font);
+                            boolean canCalculateWidths =
+                                    TextEncodingHelper.canCalculateBasicWidths(font);

                            if (isSubset) {
                                subsetFonts++;
                            }

-                            if (isProblematic) {
+                            if (hasCustomEncoding) {
                                customEncodedFonts++;
+                                log.debug("Font {} has custom encoding", font.getName());
+                            }
+
+                            if (!isReliable || !canCalculateWidths) {
+                                unreliableFonts++;
                                log.debug(
-                                        "Detected problematic font: {} (type: {})",
+                                        "Font {} flagged as unreliable: reliable={}, canCalculateWidths={}",
                                        font.getName(),
-                                        font.getClass().getSimpleName());
+                                        isReliable,
+                                        canCalculateWidths);
+                            }
+
+                            if (!TextFinderUtils.validateFontReliability(font)) {
+                                log.debug(
+                                        "Font {} failed comprehensive reliability check",
+                                        font.getName());
                            }
                        }
-                    } catch (IOException e) {
+                    } catch (Exception e) {
                        log.debug(
-                                "Font loading failed for {}: {}",
+                                "Font loading/analysis failed for {}: {}",
                                fontName.getName(),
                                e.getMessage());
                        customEncodedFonts++;
+                        unreliableFonts++;
+                        totalFonts++;
                    }
                }
            }

            log.info(
-                    "Font analysis: {}/{} fonts use custom encoding, {}/{} are subset fonts (subset fonts with standard encodings are fine)",
+                    "Enhanced font analysis: {}/{} custom encoding, {}/{} subset, {}/{} unreliable fonts",
                    customEncodedFonts,
                    totalFonts,
                    subsetFonts,
+                    totalFonts,
+                    unreliableFonts,
                    totalFonts);

-            return customEncodedFonts > 0;
-        } catch (Exception e) {
-            log.warn("Font detection analysis failed: {}", e.getMessage());
-            return false;
-        }
-    }
-
-    private boolean hasProblematicFontCharacteristics(PDFont font) {
-        try {
-            if (font.isDamaged()) {
-                log.debug("Font {} is marked as damaged by PDFBox", font.getName());
-                return true;
-            }
-
-            if (hasCustomEncoding(font)) {
-                log.debug(
-                        "Font {} uses custom encoding - text replacement will be unreliable",
-                        font.getName());
-                return true;
-            }
-
-            String fontType = font.getClass().getSimpleName();
-            if ("PDType3Font".equals(fontType)) {
-                log.debug("Font {} is Type3 - may have text replacement issues", font.getName());
-                return cannotCalculateBasicWidths(font);
-            }
-
-            log.debug("Font {} appears suitable for text replacement", font.getName());
-            return false;
+            // Consider document problematic if we have custom encodings or unreliable fonts
+            return customEncodedFonts > 0 || unreliableFonts > 0;

        } catch (Exception e) {
-            log.debug("Font analysis failed for {}: {}", font.getName(), e.getMessage());
-            return false;
-        }
-    }
-
-    private boolean hasCustomEncoding(PDFont font) {
-        try {
-            if (font instanceof PDSimpleFont simpleFont) {
-                try {
-                    Encoding encoding = simpleFont.getEncoding();
-                    if (encoding != null) {
-                        String encodingName = encoding.getEncodingName();
-
-                        // Check if it's one of the standard encodings
-                        if ("WinAnsiEncoding".equals(encodingName)
-                                || "MacRomanEncoding".equals(encodingName)
-                                || "StandardEncoding".equals(encodingName)
-                                || "MacExpertEncoding".equals(encodingName)
-                                || "SymbolEncoding".equals(encodingName)
-                                || "ZapfDingbatsEncoding".equals(encodingName)) {
-
-                            log.debug(
-                                    "Font {} uses standard encoding: {}",
-                                    font.getName(),
-                                    encodingName);
-                            return false;
-                        }
-
-                        if (encoding instanceof DictionaryEncoding) {
-                            log.debug(
-                                    "Font {} uses DictionaryEncoding - likely custom",
-                                    font.getName());
-                            return true;
-                        }
-
-                        log.debug(
-                                "Font {} uses non-standard encoding: {}",
-                                font.getName(),
-                                encodingName);
-                        return true;
-                    }
-                } catch (Exception e) {
-                    log.debug(
-                            "Could not determine encoding for font {}: {}",
-                            font.getName(),
-                            e.getMessage());
-                }
-            }
-
-            if (font instanceof org.apache.pdfbox.pdmodel.font.PDType0Font) {
-                log.debug("Font {} is Type0 (CID) - generally uses standard CMaps", font.getName());
-                return false; // Be forgiving with CID fonts
-            }
-
-            log.debug(
-                    "Font {} type {} - assuming standard encoding",
-                    font.getName(),
-                    font.getClass().getSimpleName());
-            return false;
-
-        } catch (Exception e) {
-            log.debug(
-                    "Custom encoding detection failed for font {}: {}",
-                    font.getName(),
-                    e.getMessage());
-            return false; // Be forgiving on detection failure
-        }
-    }
-
-    private boolean cannotCalculateBasicWidths(PDFont font) {
-        try {
-            float spaceWidth = font.getStringWidth(" ");
-            if (spaceWidth <= 0) {
-                return true;
-            }
-
-            String[] testChars = {"a", "A", "0", ".", "e", "!"};
-            for (String ch : testChars) {
-                try {
-                    float width = font.getStringWidth(ch);
-                    if (width > 0) {
-                        return false; // Found at least one character we can measure
-                    }
-                } catch (Exception e) {
-                }
-            }
-
-            return true; // Can't calculate width for any test characters
-        } catch (Exception e) {
-            return true; // Font failed basic width calculation
-        }
-    }
-
-    private boolean isFontSubset(String fontName) {
-        if (fontName == null) {
-            return false;
-        }
-        return fontName.matches("^[A-Z]{6}\\+.*");
-    }
-
-    private boolean fontSupportsCharacter(PDFont font, String character) {
-        if (font == null || character == null || character.isEmpty()) {
-            return false;
-        }
-
-        try {
-            byte[] encoded = font.encode(character);
-            if (encoded.length == 0) {
-                return false;
-            }
-
-            float width = font.getStringWidth(character);
-            return width > 0;
-
-        } catch (Exception e) {
-            log.debug(
-                    "Character '{}' not supported by font {}: {}",
-                    character,
-                    font.getName(),
-                    e.getMessage());
-            return false;
+            log.warn("Enhanced font detection analysis failed: {}", e.getMessage());
+            return true; // Assume problematic if analysis fails
        }
    }

--- a/app/core/src/main/java/stirling/software/SPDF/pdf/TextFinder.java
+++ b/app/core/src/main/java/stirling/software/SPDF/pdf/TextFinder.java
@ -10,8 +10,11 @@ import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.pdfbox.text.TextPosition;

+import lombok.extern.slf4j.Slf4j;
+
 import stirling.software.SPDF.model.PDFText;

+@Slf4j
 public class TextFinder extends PDFTextStripper {

    private final String searchTerm;
@ -67,16 +70,40 @@ public class TextFinder extends PDFTextStripper {
        String processedSearchTerm = this.searchTerm.trim();
        String regex = this.useRegex ? processedSearchTerm : "\\Q" + processedSearchTerm + "\\E";
        if (this.wholeWordSearch) {
-            regex = "\\b" + regex + "\\b";
+            if (processedSearchTerm.length() == 1
+                    && Character.isDigit(processedSearchTerm.charAt(0))) {
+                regex = "(?<![\\w])" + regex + "(?![\\w])";
+            } else if (processedSearchTerm.length() == 1) {
+                regex = "(?<![\\w])" + regex + "(?![\\w])";
+            } else {
+                regex = "\\b" + regex + "\\b";
+            }
        }

        Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
        Matcher matcher = pattern.matcher(text);

+        log.debug(
+                "Searching for '{}' in page {} with regex '{}' (wholeWord: {}, useRegex: {})",
+                processedSearchTerm,
+                getCurrentPageNo(),
+                regex,
+                wholeWordSearch,
+                useRegex);
+
+        int matchCount = 0;
        while (matcher.find()) {
+            matchCount++;
            int matchStart = matcher.start();
            int matchEnd = matcher.end();

+            log.debug(
+                    "Found match #{} at positions {}-{}: '{}'",
+                    matchCount,
+                    matchStart,
+                    matchEnd,
+                    matcher.group());
+
            float minX = Float.MAX_VALUE;
            float minY = Float.MAX_VALUE;
            float maxX = Float.MIN_VALUE;
@ -85,6 +112,10 @@ public class TextFinder extends PDFTextStripper {

            for (int i = matchStart; i < matchEnd; i++) {
                if (i >= pageTextPositions.size()) {
+                    log.debug(
+                            "Position index {} exceeds available positions ({})",
+                            i,
+                            pageTextPositions.size());
                    continue;
                }
                TextPosition pos = pageTextPositions.get(i);
@ -97,6 +128,27 @@ public class TextFinder extends PDFTextStripper {
                }
            }

+            if (!foundPosition && matchStart < pageTextPositions.size()) {
+                log.debug(
+                        "Attempting to find nearby positions for match at {}-{}",
+                        matchStart,
+                        matchEnd);
+
+                for (int i = Math.max(0, matchStart - 5);
+                        i < Math.min(pageTextPositions.size(), matchEnd + 5);
+                        i++) {
+                    TextPosition pos = pageTextPositions.get(i);
+                    if (pos != null) {
+                        foundPosition = true;
+                        minX = Math.min(minX, pos.getX());
+                        maxX = Math.max(maxX, pos.getX() + pos.getWidth());
+                        minY = Math.min(minY, pos.getY() - pos.getHeight());
+                        maxY = Math.max(maxY, pos.getY());
+                        break;
+                    }
+                }
+            }
+
            if (foundPosition) {
                foundTexts.add(
                        new PDFText(
@ -106,13 +158,59 @@ public class TextFinder extends PDFTextStripper {
                                maxX,
                                maxY,
                                matcher.group()));
+                log.debug(
+                        "Added PDFText for match: page={}, bounds=({},{},{},{}), text='{}'",
+                        getCurrentPageNo() - 1,
+                        minX,
+                        minY,
+                        maxX,
+                        maxY,
+                        matcher.group());
+            } else {
+                log.warn(
+                        "Found text match '{}' but no valid position data at {}-{}",
+                        matcher.group(),
+                        matchStart,
+                        matchEnd);
            }
        }

+        log.debug(
+                "Page {} search complete: found {} matches for '{}'",
+                getCurrentPageNo(),
+                matchCount,
+                processedSearchTerm);
+
        super.endPage(page);
    }

    public List<PDFText> getFoundTexts() {
        return foundTexts;
    }
+
+    public String getDebugInfo() {
+        StringBuilder debug = new StringBuilder();
+        debug.append("Extracted text length: ").append(pageTextBuilder.length()).append("\n");
+        debug.append("Position count: ").append(pageTextPositions.size()).append("\n");
+        debug.append("Text content: '")
+                .append(pageTextBuilder.toString().replace("\n", "\\n").replace("\r", "\\r"))
+                .append("'\n");
+
+        String text = pageTextBuilder.toString();
+        for (int i = 0; i < Math.min(text.length(), 50); i++) {
+            char c = text.charAt(i);
+            TextPosition pos = i < pageTextPositions.size() ? pageTextPositions.get(i) : null;
+            debug.append(
+                    String.format(
+                            "  [%d] '%c' (0x%02X) -> %s\n",
+                            i,
+                            c,
+                            (int) c,
+                            pos != null
+                                    ? String.format("(%.1f,%.1f)", pos.getX(), pos.getY())
+                                    : "null"));
+        }
+
+        return debug.toString();
+    }
 }
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextEncodingHelper.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextEncodingHelper.java
@ -0,0 +1,351 @@
+package stirling.software.SPDF.utils.text;
+
+import java.io.IOException;
+
+import org.apache.pdfbox.pdmodel.font.PDFont;
+import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
+import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
+import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
+
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+public class TextEncodingHelper {
+
+    public static boolean canEncodeCharacters(PDFont font, String text) {
+        if (font == null || text == null || text.isEmpty()) {
+            return false;
+        }
+
+        try {
+            // Step 1: Primary check - full-string encoding (permissive for "good" cases)
+            byte[] encoded = font.encode(text);
+            if (encoded.length > 0) {
+                log.debug(
+                        "Text '{}' has good full-string encoding for font {} - permissively allowing",
+                        text,
+                        font.getName() != null ? font.getName() : "Unknown");
+                return true;
+            }
+
+            // Step 2: Smart array-based fallback for TJ operator-style text
+            log.debug(
+                    "Full encoding failed for '{}' - using array-based fallback for font {}",
+                    text,
+                    font.getName() != null ? font.getName() : "Unknown");
+
+            return validateAsCodePointArray(font, text);
+
+        } catch (IOException | IllegalArgumentException e) {
+            log.debug(
+                    "Encoding exception for text '{}' with font {} - trying array fallback: {}",
+                    text,
+                    font.getName() != null ? font.getName() : "Unknown",
+                    e.getMessage());
+
+            if (isFontSubset(font.getName()) || hasCustomEncoding(font)) {
+                return validateAsCodePointArray(font, text);
+            }
+
+            return false; // Non-subset fonts with encoding exceptions are likely problematic
+        }
+    }
+
+    private static boolean validateAsCodePointArray(PDFont font, String text) {
+        int totalCodePoints = 0;
+        int successfulCodePoints = 0;
+
+        // Iterate through code points (handles surrogates correctly per Unicode docs)
+        for (int i = 0; i < text.length(); ) {
+            int codePoint = text.codePointAt(i);
+            String charStr = new String(Character.toChars(codePoint));
+            totalCodePoints++;
+
+            try {
+                // Test encoding for this code point
+                byte[] charEncoded = font.encode(charStr);
+                if (charEncoded.length > 0) {
+                    float charWidth = font.getStringWidth(charStr);
+
+                    if (charWidth >= 0) {
+                        successfulCodePoints++;
+                        log.debug(
+                                "Code point '{}' (U+{}) encoded successfully",
+                                charStr,
+                                Integer.toHexString(codePoint).toUpperCase());
+                    } else {
+                        log.debug(
+                                "Code point '{}' (U+{}) has invalid width: {}",
+                                charStr,
+                                Integer.toHexString(codePoint).toUpperCase(),
+                                charWidth);
+                    }
+                } else {
+                    log.debug(
+                            "Code point '{}' (U+{}) encoding failed - empty result",
+                            charStr,
+                            Integer.toHexString(codePoint).toUpperCase());
+                }
+            } catch (IOException | IllegalArgumentException e) {
+                log.debug(
+                        "Code point '{}' (U+{}) validation failed: {}",
+                        charStr,
+                        Integer.toHexString(codePoint).toUpperCase(),
+                        e.getMessage());
+            }
+
+            i += Character.charCount(codePoint); // Handle surrogates properly
+        }
+
+        double successRate =
+                totalCodePoints > 0 ? (double) successfulCodePoints / totalCodePoints : 0;
+        boolean isAcceptable = successRate >= 0.95;
+
+        log.debug(
+                "Array validation for '{}': {}/{} code points successful ({:.1f}%) - {}",
+                text,
+                successfulCodePoints,
+                totalCodePoints,
+                successRate * 100,
+                isAcceptable ? "ALLOWING" : "rejecting");
+
+        return isAcceptable;
+    }
+
+    public static boolean isTextSegmentRemovable(PDFont font, String text) {
+        if (font == null || text == null || text.isEmpty()) {
+            return false;
+        }
+
+        // Log the attempt
+        log.debug(
+                "Evaluating text segment for removal: '{}' with font {}",
+                text,
+                font.getName() != null ? font.getName() : "Unknown Font");
+
+        if (isSimpleCharacter(text)) {
+            try {
+                font.encode(text);
+                font.getStringWidth(text);
+                log.debug(
+                        "Text '{}' is a simple character and passed validation - allowing removal",
+                        text);
+                return true;
+            } catch (Exception e) {
+                log.debug(
+                        "Simple character '{}' failed basic validation with font {}: {}",
+                        text,
+                        font.getName() != null ? font.getName() : "Unknown",
+                        e.getMessage());
+                return false;
+            }
+        }
+
+        // For complex text, require comprehensive validation
+        return isTextFullyRemovable(font, text);
+    }
+
+    public static boolean isTextFullyRemovable(PDFont font, String text) {
+        if (font == null || text == null || text.isEmpty()) {
+            return false;
+        }
+
+        try {
+            // Check 1: Verify encoding capability using new smart approach
+            if (!canEncodeCharacters(font, text)) {
+                log.debug(
+                        "Text '{}' failed encoding validation for font {}",
+                        text,
+                        font.getName() != null ? font.getName() : "Unknown");
+                return false;
+            }
+
+            // Check 2: Validate width calculation capability
+            float width = font.getStringWidth(text);
+            if (width < 0) { // Allow zero width (invisible chars) but reject negative (invalid)
+                log.debug(
+                        "Text '{}' has invalid width {} for font {}",
+                        text,
+                        width,
+                        font.getName() != null ? font.getName() : "Unknown");
+                return false; // Invalid metrics prevent accurate removal
+            }
+
+            // Check 3: Verify font descriptor completeness for redaction area calculation
+            if (font.getFontDescriptor() == null) {
+                log.debug(
+                        "Missing font descriptor for font {}",
+                        font.getName() != null ? font.getName() : "Unknown");
+                return false;
+            }
+
+            // Check 4: Test bounding box calculation for redaction area
+            try {
+                font.getFontDescriptor().getFontBoundingBox();
+            } catch (IllegalArgumentException e) {
+                log.debug(
+                        "Font bounding box unavailable for font {}: {}",
+                        font.getName() != null ? font.getName() : "Unknown",
+                        e.getMessage());
+                return false;
+            }
+
+            log.debug(
+                    "Text '{}' passed comprehensive validation for font {}",
+                    text,
+                    font.getName() != null ? font.getName() : "Unknown");
+            return true;
+
+        } catch (IOException e) {
+            log.debug(
+                    "Text '{}' failed validation for font {} due to IO error: {}",
+                    text,
+                    font.getName() != null ? font.getName() : "Unknown",
+                    e.getMessage());
+            return false;
+        } catch (IllegalArgumentException e) {
+            log.debug(
+                    "Text '{}' failed validation for font {} due to argument error: {}",
+                    text,
+                    font.getName() != null ? font.getName() : "Unknown",
+                    e.getMessage());
+            return false;
+        }
+    }
+
+    private static boolean isSimpleCharacter(String text) {
+        if (text == null || text.isEmpty()) {
+            return false;
+        }
+
+        if (text.length() > 20) {
+            return false;
+        }
+
+        for (int i = 0; i < text.length(); i++) {
+            char c = text.charAt(i);
+
+            // Allow letters, digits, and whitespace (most common cases)
+            if (Character.isLetterOrDigit(c) || Character.isWhitespace(c)) {
+                continue;
+            }
+
+            // Allow common ASCII punctuation
+            if (c >= 32 && c <= 126 && ".,!?;:()-[]{}\"'/@#$%&*+=<>|\\~`".indexOf(c) >= 0) {
+                continue;
+            }
+
+            return false;
+        }
+
+        return true;
+    }
+
+    public static boolean hasCustomEncoding(PDFont font) {
+        try {
+            if (font instanceof PDSimpleFont simpleFont) {
+                try {
+                    Encoding encoding = simpleFont.getEncoding();
+                    if (encoding != null) {
+                        // Check for dictionary-based custom encodings
+                        if (encoding instanceof DictionaryEncoding) {
+                            log.debug("Font {} uses DictionaryEncoding (custom)", font.getName());
+                            return true;
+                        }
+
+                        String encodingName = encoding.getClass().getSimpleName();
+                        if (encodingName.contains("Custom")
+                                || encodingName.contains("Dictionary")) {
+                            log.debug(
+                                    "Font {} uses custom encoding: {}",
+                                    font.getName(),
+                                    encodingName);
+                            return true;
+                        }
+                    }
+                } catch (Exception e) {
+                    log.debug(
+                            "Encoding detection failed for font {}: {}",
+                            font.getName(),
+                            e.getMessage());
+                    return true; // Assume custom if detection fails
+                }
+            }
+
+            if (font instanceof org.apache.pdfbox.pdmodel.font.PDType0Font) {
+                log.debug(
+                        "Font {} is Type0 (CID) - generally uses standard CMaps",
+                        font.getName() != null ? font.getName() : "Unknown");
+                return false;
+            }
+
+            log.debug(
+                    "Font {} type {} - assuming standard encoding",
+                    font.getName() != null ? font.getName() : "Unknown",
+                    font.getClass().getSimpleName());
+            return false;
+
+        } catch (IllegalArgumentException e) {
+            log.debug(
+                    "Custom encoding detection failed for font {}: {}",
+                    font.getName() != null ? font.getName() : "Unknown",
+                    e.getMessage());
+            return false; // Be forgiving on detection failure
+        }
+    }
+
+    public static boolean fontSupportsCharacter(PDFont font, String character) {
+        if (font == null || character == null || character.isEmpty()) {
+            return false;
+        }
+
+        try {
+            byte[] encoded = font.encode(character);
+            if (encoded.length == 0) {
+                return false;
+            }
+
+            float width = font.getStringWidth(character);
+            return width > 0;
+
+        } catch (IOException | IllegalArgumentException e) {
+            log.debug(
+                    "Character '{}' not supported by font {}: {}",
+                    character,
+                    font.getName() != null ? font.getName() : "Unknown",
+                    e.getMessage());
+            return false;
+        }
+    }
+
+    public static boolean isFontSubset(String fontName) {
+        if (fontName == null) {
+            return false;
+        }
+        return fontName.matches("^[A-Z]{6}\\+.*");
+    }
+
+    public static boolean canCalculateBasicWidths(PDFont font) {
+        try {
+            float spaceWidth = font.getStringWidth(" ");
+            if (spaceWidth <= 0) {
+                return false;
+            }
+
+            String[] testChars = {"a", "A", "0", ".", "e", "!"};
+            for (String ch : testChars) {
+                try {
+                    float width = font.getStringWidth(ch);
+                    if (width > 0) {
+                        return true;
+                    }
+                } catch (IOException | IllegalArgumentException e) {
+                }
+            }
+
+            return false; // Can't calculate width for any test characters
+        } catch (IOException | IllegalArgumentException e) {
+            return false; // Font failed basic width calculation
+        }
+    }
+}
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextFinderUtils.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextFinderUtils.java
@ -0,0 +1,140 @@
+package stirling.software.SPDF.utils.text;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDResources;
+
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+public class TextFinderUtils {
+
+    public static boolean validateFontReliability(org.apache.pdfbox.pdmodel.font.PDFont font) {
+        if (font == null) {
+            return false;
+        }
+
+        if (font.isDamaged()) {
+            log.debug(
+                    "Font {} is marked as damaged - using TextEncodingHelper validation",
+                    font.getName());
+        }
+
+        if (TextEncodingHelper.canCalculateBasicWidths(font)) {
+            log.debug(
+                    "Font {} passed basic width calculations - considering reliable",
+                    font.getName());
+            return true;
+        }
+
+        String[] basicTests = {"1", "2", "3", "a", "A", "e", "E", " "};
+
+        int workingChars = 0;
+        for (String testChar : basicTests) {
+            if (TextEncodingHelper.canEncodeCharacters(font, testChar)) {
+                workingChars++;
+            }
+        }
+
+        if (workingChars > 0) {
+            log.debug(
+                    "Font {} can process {}/{} basic characters - considering reliable",
+                    font.getName(),
+                    workingChars,
+                    basicTests.length);
+            return true;
+        }
+
+        log.debug("Font {} failed all basic tests - considering unreliable", font.getName());
+        return false;
+    }
+
+    public static List<Pattern> createOptimizedSearchPatterns(
+            Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
+        List<Pattern> patterns = new ArrayList<>();
+
+        for (String term : searchTerms) {
+            if (term == null || term.trim().isEmpty()) {
+                continue;
+            }
+
+            try {
+                String patternString = useRegex ? term.trim() : Pattern.quote(term.trim());
+
+                if (wholeWordSearch) {
+                    patternString = applyWordBoundaries(term.trim(), patternString);
+                }
+
+                Pattern pattern =
+                        Pattern.compile(
+                                patternString, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
+                patterns.add(pattern);
+
+                log.debug("Created search pattern: '{}' -> '{}'", term.trim(), patternString);
+
+            } catch (Exception e) {
+                log.warn("Failed to create pattern for term '{}': {}", term, e.getMessage());
+            }
+        }
+
+        return patterns;
+    }
+
+    private static String applyWordBoundaries(String originalTerm, String patternString) {
+        if (originalTerm.length() == 1 && Character.isDigit(originalTerm.charAt(0))) {
+            return "(?<![\\w])" + patternString + "(?![\\w])";
+        } else if (originalTerm.length() == 1) {
+            return "(?<![\\w])" + patternString + "(?![\\w])";
+        } else {
+            return "\\b" + patternString + "\\b";
+        }
+    }
+
+    public static boolean hasProblematicFonts(PDPage page) {
+        if (page == null) {
+            return false;
+        }
+
+        try {
+            PDResources resources = page.getResources();
+            if (resources == null) {
+                return false;
+            }
+
+            int totalFonts = 0;
+            int completelyUnusableFonts = 0;
+
+            for (org.apache.pdfbox.cos.COSName fontName : resources.getFontNames()) {
+                try {
+                    org.apache.pdfbox.pdmodel.font.PDFont font = resources.getFont(fontName);
+                    if (font != null) {
+                        totalFonts++;
+                        if (!validateFontReliability(font)) {
+                            completelyUnusableFonts++;
+                        }
+                    }
+                } catch (Exception e) {
+                    log.debug("Font loading failed for {}: {}", fontName.getName(), e.getMessage());
+                    totalFonts++;
+                }
+            }
+
+            boolean hasProblems = totalFonts > 0 && (completelyUnusableFonts * 2 > totalFonts);
+            log.debug(
+                    "Page font analysis: {}/{} fonts are completely unusable - page {} problematic",
+                    completelyUnusableFonts,
+                    totalFonts,
+                    hasProblems ? "IS" : "is NOT");
+
+            return hasProblems;
+
+        } catch (Exception e) {
+            log.warn("Font analysis failed for page: {}", e.getMessage());
+            return false; // Be permissive if analysis fails
+        }
+    }
+}
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/WidthCalculator.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/WidthCalculator.java
@ -0,0 +1,136 @@
+package stirling.software.SPDF.utils.text;
+
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.font.PDFont;
+
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+public class WidthCalculator {
+
+    private static final int FONT_SCALE_FACTOR = 1000;
+
+    public static float calculateAccurateWidth(PDFont font, String text, float fontSize) {
+        if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
+            return 0;
+        }
+
+        if (!TextEncodingHelper.canEncodeCharacters(font, text)) {
+            log.debug(
+                    "Text cannot be encoded by font {}, using fallback width calculation",
+                    font.getName());
+            return calculateFallbackWidth(font, text, fontSize);
+        }
+
+        try {
+            float rawWidth = font.getStringWidth(text);
+            float scaledWidth = (rawWidth / FONT_SCALE_FACTOR) * fontSize;
+
+            log.debug(
+                    "Direct width calculation successful for font {}: {} -> {}",
+                    font.getName(),
+                    rawWidth,
+                    scaledWidth);
+            return scaledWidth;
+
+        } catch (Exception e) {
+            log.debug(
+                    "Direct width calculation failed for font {}: {}",
+                    font.getName(),
+                    e.getMessage());
+            return calculateWidthWithCharacterIteration(font, text, fontSize);
+        }
+    }
+
+    private static float calculateWidthWithCharacterIteration(
+            PDFont font, String text, float fontSize) {
+        try {
+            float totalWidth = 0;
+
+            for (int i = 0; i < text.length(); i++) {
+                String character = text.substring(i, i + 1);
+                try {
+                    byte[] encoded = font.encode(character);
+                    if (encoded.length > 0) {
+                        int glyphCode = encoded[0] & 0xFF;
+                        float glyphWidth = font.getWidth(glyphCode);
+
+                        if (glyphWidth == 0) {
+                            try {
+                                glyphWidth = font.getWidthFromFont(glyphCode);
+                            } catch (Exception e2) {
+                                glyphWidth = font.getAverageFontWidth();
+                            }
+                        }
+
+                        totalWidth += (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
+                    } else {
+                        totalWidth += (font.getAverageFontWidth() / FONT_SCALE_FACTOR) * fontSize;
+                    }
+                } catch (Exception e2) {
+                    totalWidth += (font.getAverageFontWidth() / FONT_SCALE_FACTOR) * fontSize;
+                }
+            }
+
+            log.debug("Character iteration width calculation: {}", totalWidth);
+            return totalWidth;
+
+        } catch (Exception e) {
+            log.debug("Character iteration failed: {}", e.getMessage());
+            return calculateFallbackWidth(font, text, fontSize);
+        }
+    }
+
+    private static float calculateFallbackWidth(PDFont font, String text, float fontSize) {
+        try {
+            if (font.getFontDescriptor() != null
+                    && font.getFontDescriptor().getFontBoundingBox() != null) {
+
+                PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox();
+                float avgCharWidth =
+                        bbox.getWidth() / FONT_SCALE_FACTOR * 0.6f; // Conservative estimate
+                float fallbackWidth = text.length() * avgCharWidth * fontSize;
+
+                log.debug("Bounding box fallback width: {}", fallbackWidth);
+                return fallbackWidth;
+            }
+
+            float avgWidth = font.getAverageFontWidth();
+            float fallbackWidth = (text.length() * avgWidth / FONT_SCALE_FACTOR) * fontSize;
+
+            log.debug("Average width fallback: {}", fallbackWidth);
+            return fallbackWidth;
+
+        } catch (Exception e) {
+            float conservativeWidth = text.length() * 0.5f * fontSize;
+            log.debug(
+                    "Conservative fallback width for font {}: {}",
+                    font.getName(),
+                    conservativeWidth);
+            return conservativeWidth;
+        }
+    }
+
+    public static boolean isWidthCalculationReliable(PDFont font) {
+        if (font == null) {
+            return false;
+        }
+
+        if (font.isDamaged()) {
+            log.debug("Font {} is damaged", font.getName());
+            return false;
+        }
+
+        if (!TextEncodingHelper.canCalculateBasicWidths(font)) {
+            log.debug("Font {} cannot perform basic width calculations", font.getName());
+            return false;
+        }
+
+        if (TextEncodingHelper.hasCustomEncoding(font)) {
+            log.debug("Font {} has custom encoding", font.getName());
+            return false;
+        }
+
+        return true;
+    }
+}
--- a/stirling-pdf/src/test/java/stirling/software/SPDF/pdf/TextFinderTest.java
+++ b/stirling-pdf/src/test/java/stirling/software/SPDF/pdf/TextFinderTest.java
@ -1,7 +1,5 @@
 package stirling.software.SPDF.pdf;

-import static org.junit.jupiter.api.Assertions.*;
-
 import java.io.IOException;
 import java.util.List;

@ -12,6 +10,11 @@ import org.apache.pdfbox.pdmodel.common.PDRectangle;
 import org.apache.pdfbox.pdmodel.font.PDType1Font;
 import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
 import org.junit.jupiter.api.AfterEach;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.DisplayName;
 import org.junit.jupiter.api.Nested;
@ -468,6 +471,106 @@ class TextFinderTest {
        }
    }

+    @Nested
+    @DisplayName("Single Character and Digit Tests")
+    class SingleCharacterAndDigitTests {
+
+        @Test
+        @DisplayName("Should find single digits in various contexts with whole word search")
+        void findSingleDigitsWholeWord() throws IOException {
+            String content = "Item 1 of 5 costs $2.50. Order number: 1234. Reference: A1B.";
+            addTextToPage(content);
+
+            TextFinder textFinder = new TextFinder("1", false, true);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertEquals(1, foundTexts.size(),
+                "Should find exactly one standalone '1', not the ones embedded in other numbers/codes");
+            assertEquals("1", foundTexts.get(0).getText());
+        }
+
+        @Test
+        @DisplayName("Should find single digits without whole word search")
+        void findSingleDigitsNoWholeWord() throws IOException {
+            String content = "Item 1 of 5 costs $2.50. Order number: 1234. Reference: A1B.";
+            addTextToPage(content);
+
+            TextFinder textFinder = new TextFinder("1", false, false);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertTrue(foundTexts.size() >= 3,
+                "Should find multiple instances of '1' including standalone, in '1234', and in 'A1B'");
+        }
+
+        @Test
+        @DisplayName("Should find single characters in various contexts")
+        void findSingleCharacters() throws IOException {
+            String content = "Grade: A. Section B has item A-1. The letter A appears multiple times.";
+            addTextToPage(content);
+
+            TextFinder textFinder = new TextFinder("A", false, true);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertTrue(foundTexts.size() >= 2, "Should find multiple standalone 'A' characters");
+
+            for (PDFText found : foundTexts) {
+                assertEquals("A", found.getText());
+            }
+        }
+
+        @Test
+        @DisplayName("Should handle digits at word boundaries correctly")
+        void findDigitsAtWordBoundaries() throws IOException {
+            String content = "Numbers: 1, 2, 3. Code: 123. Version: 1.0. Item1 and Item2.";
+            addTextToPage(content);
+
+            TextFinder textFinder1 = new TextFinder("1", false, true);
+            textFinder1.getText(document);
+            List<PDFText> foundTexts1 = textFinder1.getFoundTexts();
+
+            assertEquals(1, foundTexts1.size(),
+                "Should find only the standalone '1' at the beginning");
+
+            TextFinder textFinder2 = new TextFinder("2", false, true);
+            textFinder2.getText(document);
+            List<PDFText> foundTexts2 = textFinder2.getFoundTexts();
+
+            assertEquals(1, foundTexts2.size(),
+                "Should find only the standalone '2' in the number list");
+        }
+
+        @Test
+        @DisplayName("Should handle special characters and punctuation boundaries")
+        void findDigitsWithPunctuationBoundaries() throws IOException {
+            String content = "Items: (1), [2], {3}, item#4, price$5, and 6%.";
+            addTextToPage(content);
+
+            TextFinder textFinder = new TextFinder("1", false, true);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertEquals(1, foundTexts.size(), "Should find '1' surrounded by parentheses");
+            assertEquals("1", foundTexts.get(0).getText());
+        }
+
+        @Test
+        @DisplayName("Should handle edge case with spacing and formatting")
+        void findDigitsWithSpacingIssues() throws IOException {
+            String content = "List: 1 , 2  ,  3   and item   1   here.";
+            addTextToPage(content);
+
+            TextFinder textFinder = new TextFinder("1", false, true);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertEquals(2, foundTexts.size(),
+                "Should find both '1' instances despite spacing variations");
+        }
+    }
+
    // Helper methods
    private void addTextToPage(String text) throws IOException {
        addTextToPage(page, text);