Merge 02324533e6 into 054bf15b24

2025-07-28 13:47:43 +02:00 · 2025-07-26 17:27:43 -04:00 · 2025-07-26 17:27:43 -04:00 · f63aff1153
commit f63aff1153
parent 054bf15b24 02324533e6
7 changed files with 4250 additions and 179 deletions
--- a/app/core/src/main/java/stirling/software/SPDF/controller/api/security/RedactController.java
+++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/security/RedactController.java
--- a/app/core/src/main/java/stirling/software/SPDF/pdf/TextFinder.java
+++ b/app/core/src/main/java/stirling/software/SPDF/pdf/TextFinder.java
@ -6,7 +6,7 @@ import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

-import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.pdfbox.text.TextPosition;

@ -17,91 +17,200 @@ import stirling.software.SPDF.model.PDFText;
@Slf4j
 public class TextFinder extends PDFTextStripper {

-    private final String searchText;
+    private final String searchTerm;
    private final boolean useRegex;
    private final boolean wholeWordSearch;
-    private final List<PDFText> textOccurrences = new ArrayList<>();
+    private final List<PDFText> foundTexts = new ArrayList<>();

-    public TextFinder(String searchText, boolean useRegex, boolean wholeWordSearch)
+    private final List<TextPosition> pageTextPositions = new ArrayList<>();
+    private final StringBuilder pageTextBuilder = new StringBuilder();
+
+    public TextFinder(String searchTerm, boolean useRegex, boolean wholeWordSearch)
            throws IOException {
-        this.searchText = searchText.toLowerCase();
+        super();
+        this.searchTerm = searchTerm;
        this.useRegex = useRegex;
        this.wholeWordSearch = wholeWordSearch;
-        setSortByPosition(true);
+        this.setWordSeparator(" ");
    }

-    private List<MatchInfo> findOccurrencesInText(String searchText, String content) {
-        List<MatchInfo> matches = new ArrayList<>();
-
-        Pattern pattern;
-
-        if (useRegex) {
-            // Use regex-based search
-            pattern =
-                    wholeWordSearch
-                            ? Pattern.compile("\\b" + searchText + "\\b")
-                            : Pattern.compile(searchText);
-        } else {
-            // Use normal text search
-            pattern =
-                    wholeWordSearch
-                            ? Pattern.compile("\\b" + Pattern.quote(searchText) + "\\b")
-                            : Pattern.compile(Pattern.quote(searchText));
-        }
-
-        Matcher matcher = pattern.matcher(content);
-        while (matcher.find()) {
-            matches.add(new MatchInfo(matcher.start(), matcher.end() - matcher.start()));
-        }
-        return matches;
+    @Override
+    protected void startPage(PDPage page) throws IOException {
+        super.startPage(page);
+        pageTextPositions.clear();
+        pageTextBuilder.setLength(0);
    }

    @Override
    protected void writeString(String text, List<TextPosition> textPositions) {
-        for (MatchInfo match : findOccurrencesInText(searchText, text.toLowerCase())) {
-            int index = match.startIndex;
-            if (index + match.matchLength <= textPositions.size()) {
-                // Initial values based on the first character
-                TextPosition first = textPositions.get(index);
-                float minX = first.getX();
-                float minY = first.getY();
-                float maxX = first.getX() + first.getWidth();
-                float maxY = first.getY() + first.getHeight();
+        pageTextBuilder.append(text);
+        pageTextPositions.addAll(textPositions);
+    }

-                // Loop over the rest of the characters and adjust bounding box values
-                for (int i = index; i < index + match.matchLength; i++) {
-                    TextPosition position = textPositions.get(i);
-                    minX = Math.min(minX, position.getX());
-                    minY = Math.min(minY, position.getY());
-                    maxX = Math.max(maxX, position.getX() + position.getWidth());
-                    maxY = Math.max(maxY, position.getY() + position.getHeight());
-                }
+    @Override
+    protected void writeWordSeparator() {
+        pageTextBuilder.append(getWordSeparator());
+        pageTextPositions.add(null); // Placeholder for separator
+    }

-                textOccurrences.add(
-                        new PDFText(getCurrentPageNo() - 1, minX, minY, maxX, maxY, text));
+    @Override
+    protected void writeLineSeparator() {
+        pageTextBuilder.append(getLineSeparator());
+        pageTextPositions.add(null); // Placeholder for separator
+    }
+
+    @Override
+    protected void endPage(PDPage page) throws IOException {
+        String text = pageTextBuilder.toString();
+        if (text.isEmpty() || this.searchTerm == null || this.searchTerm.isEmpty()) {
+            super.endPage(page);
+            return;
+        }
+
+        String processedSearchTerm = this.searchTerm.trim();
+        String regex = this.useRegex ? processedSearchTerm : "\\Q" + processedSearchTerm + "\\E";
+        if (this.wholeWordSearch) {
+            if (processedSearchTerm.length() == 1
+                    && Character.isDigit(processedSearchTerm.charAt(0))) {
+                regex = "(?<![\\w])" + regex + "(?![\\w])";
+            } else if (processedSearchTerm.length() == 1) {
+                regex = "(?<![\\w])" + regex + "(?![\\w])";
+            } else {
+                regex = "\\b" + regex + "\\b";
            }
        }
-    }

-    public List<PDFText> getTextLocations(PDDocument document) throws Exception {
-        this.getText(document);
+        Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
+        Matcher matcher = pattern.matcher(text);
+
        log.debug(
-                "Found "
-                        + textOccurrences.size()
-                        + " occurrences of '"
-                        + searchText
-                        + "' in the document.");
+                "Searching for '{}' in page {} with regex '{}' (wholeWord: {}, useRegex: {})",
+                processedSearchTerm,
+                getCurrentPageNo(),
+                regex,
+                wholeWordSearch,
+                useRegex);

-        return textOccurrences;
+        int matchCount = 0;
+        while (matcher.find()) {
+            matchCount++;
+            int matchStart = matcher.start();
+            int matchEnd = matcher.end();
+
+            log.debug(
+                    "Found match #{} at positions {}-{}: '{}'",
+                    matchCount,
+                    matchStart,
+                    matchEnd,
+                    matcher.group());
+
+            float minX = Float.MAX_VALUE;
+            float minY = Float.MAX_VALUE;
+            float maxX = Float.MIN_VALUE;
+            float maxY = Float.MIN_VALUE;
+            boolean foundPosition = false;
+
+            for (int i = matchStart; i < matchEnd; i++) {
+                if (i >= pageTextPositions.size()) {
+                    log.debug(
+                            "Position index {} exceeds available positions ({})",
+                            i,
+                            pageTextPositions.size());
+                    continue;
+                }
+                TextPosition pos = pageTextPositions.get(i);
+                if (pos != null) {
+                    foundPosition = true;
+                    minX = Math.min(minX, pos.getX());
+                    maxX = Math.max(maxX, pos.getX() + pos.getWidth());
+                    minY = Math.min(minY, pos.getY() - pos.getHeight());
+                    maxY = Math.max(maxY, pos.getY());
+                }
+            }
+
+            if (!foundPosition && matchStart < pageTextPositions.size()) {
+                log.debug(
+                        "Attempting to find nearby positions for match at {}-{}",
+                        matchStart,
+                        matchEnd);
+
+                for (int i = Math.max(0, matchStart - 5);
+                        i < Math.min(pageTextPositions.size(), matchEnd + 5);
+                        i++) {
+                    TextPosition pos = pageTextPositions.get(i);
+                    if (pos != null) {
+                        foundPosition = true;
+                        minX = Math.min(minX, pos.getX());
+                        maxX = Math.max(maxX, pos.getX() + pos.getWidth());
+                        minY = Math.min(minY, pos.getY() - pos.getHeight());
+                        maxY = Math.max(maxY, pos.getY());
+                        break;
+                    }
+                }
+            }
+
+            if (foundPosition) {
+                foundTexts.add(
+                        new PDFText(
+                                this.getCurrentPageNo() - 1,
+                                minX,
+                                minY,
+                                maxX,
+                                maxY,
+                                matcher.group()));
+                log.debug(
+                        "Added PDFText for match: page={}, bounds=({},{},{},{}), text='{}'",
+                        getCurrentPageNo() - 1,
+                        minX,
+                        minY,
+                        maxX,
+                        maxY,
+                        matcher.group());
+            } else {
+                log.warn(
+                        "Found text match '{}' but no valid position data at {}-{}",
+                        matcher.group(),
+                        matchStart,
+                        matchEnd);
+            }
+        }
+
+        log.debug(
+                "Page {} search complete: found {} matches for '{}'",
+                getCurrentPageNo(),
+                matchCount,
+                processedSearchTerm);
+
+        super.endPage(page);
    }

-    private class MatchInfo {
-        int startIndex;
-        int matchLength;
+    public List<PDFText> getFoundTexts() {
+        return foundTexts;
+    }

-        MatchInfo(int startIndex, int matchLength) {
-            this.startIndex = startIndex;
-            this.matchLength = matchLength;
+    public String getDebugInfo() {
+        StringBuilder debug = new StringBuilder();
+        debug.append("Extracted text length: ").append(pageTextBuilder.length()).append("\n");
+        debug.append("Position count: ").append(pageTextPositions.size()).append("\n");
+        debug.append("Text content: '")
+                .append(pageTextBuilder.toString().replace("\n", "\\n").replace("\r", "\\r"))
+                .append("'\n");
+
+        String text = pageTextBuilder.toString();
+        for (int i = 0; i < Math.min(text.length(), 50); i++) {
+            char c = text.charAt(i);
+            TextPosition pos = i < pageTextPositions.size() ? pageTextPositions.get(i) : null;
+            debug.append(
+                    String.format(
+                            "  [%d] '%c' (0x%02X) -> %s\n",
+                            i,
+                            c,
+                            (int) c,
+                            pos != null
+                                    ? String.format("(%.1f,%.1f)", pos.getX(), pos.getY())
+                                    : "null"));
        }
+
+        return debug.toString();
    }
 }
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextEncodingHelper.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextEncodingHelper.java
@ -0,0 +1,351 @@
+package stirling.software.SPDF.utils.text;
+
+import java.io.IOException;
+
+import org.apache.pdfbox.pdmodel.font.PDFont;
+import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
+import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
+import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
+
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+public class TextEncodingHelper {
+
+    public static boolean canEncodeCharacters(PDFont font, String text) {
+        if (font == null || text == null || text.isEmpty()) {
+            return false;
+        }
+
+        try {
+            // Step 1: Primary check - full-string encoding (permissive for "good" cases)
+            byte[] encoded = font.encode(text);
+            if (encoded.length > 0) {
+                log.debug(
+                        "Text '{}' has good full-string encoding for font {} - permissively allowing",
+                        text,
+                        font.getName() != null ? font.getName() : "Unknown");
+                return true;
+            }
+
+            // Step 2: Smart array-based fallback for TJ operator-style text
+            log.debug(
+                    "Full encoding failed for '{}' - using array-based fallback for font {}",
+                    text,
+                    font.getName() != null ? font.getName() : "Unknown");
+
+            return validateAsCodePointArray(font, text);
+
+        } catch (IOException | IllegalArgumentException e) {
+            log.debug(
+                    "Encoding exception for text '{}' with font {} - trying array fallback: {}",
+                    text,
+                    font.getName() != null ? font.getName() : "Unknown",
+                    e.getMessage());
+
+            if (isFontSubset(font.getName()) || hasCustomEncoding(font)) {
+                return validateAsCodePointArray(font, text);
+            }
+
+            return false; // Non-subset fonts with encoding exceptions are likely problematic
+        }
+    }
+
+    private static boolean validateAsCodePointArray(PDFont font, String text) {
+        int totalCodePoints = 0;
+        int successfulCodePoints = 0;
+
+        // Iterate through code points (handles surrogates correctly per Unicode docs)
+        for (int i = 0; i < text.length(); ) {
+            int codePoint = text.codePointAt(i);
+            String charStr = new String(Character.toChars(codePoint));
+            totalCodePoints++;
+
+            try {
+                // Test encoding for this code point
+                byte[] charEncoded = font.encode(charStr);
+                if (charEncoded.length > 0) {
+                    float charWidth = font.getStringWidth(charStr);
+
+                    if (charWidth >= 0) {
+                        successfulCodePoints++;
+                        log.debug(
+                                "Code point '{}' (U+{}) encoded successfully",
+                                charStr,
+                                Integer.toHexString(codePoint).toUpperCase());
+                    } else {
+                        log.debug(
+                                "Code point '{}' (U+{}) has invalid width: {}",
+                                charStr,
+                                Integer.toHexString(codePoint).toUpperCase(),
+                                charWidth);
+                    }
+                } else {
+                    log.debug(
+                            "Code point '{}' (U+{}) encoding failed - empty result",
+                            charStr,
+                            Integer.toHexString(codePoint).toUpperCase());
+                }
+            } catch (IOException | IllegalArgumentException e) {
+                log.debug(
+                        "Code point '{}' (U+{}) validation failed: {}",
+                        charStr,
+                        Integer.toHexString(codePoint).toUpperCase(),
+                        e.getMessage());
+            }
+
+            i += Character.charCount(codePoint); // Handle surrogates properly
+        }
+
+        double successRate =
+                totalCodePoints > 0 ? (double) successfulCodePoints / totalCodePoints : 0;
+        boolean isAcceptable = successRate >= 0.95;
+
+        log.debug(
+                "Array validation for '{}': {}/{} code points successful ({:.1f}%) - {}",
+                text,
+                successfulCodePoints,
+                totalCodePoints,
+                successRate * 100,
+                isAcceptable ? "ALLOWING" : "rejecting");
+
+        return isAcceptable;
+    }
+
+    public static boolean isTextSegmentRemovable(PDFont font, String text) {
+        if (font == null || text == null || text.isEmpty()) {
+            return false;
+        }
+
+        // Log the attempt
+        log.debug(
+                "Evaluating text segment for removal: '{}' with font {}",
+                text,
+                font.getName() != null ? font.getName() : "Unknown Font");
+
+        if (isSimpleCharacter(text)) {
+            try {
+                font.encode(text);
+                font.getStringWidth(text);
+                log.debug(
+                        "Text '{}' is a simple character and passed validation - allowing removal",
+                        text);
+                return true;
+            } catch (Exception e) {
+                log.debug(
+                        "Simple character '{}' failed basic validation with font {}: {}",
+                        text,
+                        font.getName() != null ? font.getName() : "Unknown",
+                        e.getMessage());
+                return false;
+            }
+        }
+
+        // For complex text, require comprehensive validation
+        return isTextFullyRemovable(font, text);
+    }
+
+    public static boolean isTextFullyRemovable(PDFont font, String text) {
+        if (font == null || text == null || text.isEmpty()) {
+            return false;
+        }
+
+        try {
+            // Check 1: Verify encoding capability using new smart approach
+            if (!canEncodeCharacters(font, text)) {
+                log.debug(
+                        "Text '{}' failed encoding validation for font {}",
+                        text,
+                        font.getName() != null ? font.getName() : "Unknown");
+                return false;
+            }
+
+            // Check 2: Validate width calculation capability
+            float width = font.getStringWidth(text);
+            if (width < 0) { // Allow zero width (invisible chars) but reject negative (invalid)
+                log.debug(
+                        "Text '{}' has invalid width {} for font {}",
+                        text,
+                        width,
+                        font.getName() != null ? font.getName() : "Unknown");
+                return false; // Invalid metrics prevent accurate removal
+            }
+
+            // Check 3: Verify font descriptor completeness for redaction area calculation
+            if (font.getFontDescriptor() == null) {
+                log.debug(
+                        "Missing font descriptor for font {}",
+                        font.getName() != null ? font.getName() : "Unknown");
+                return false;
+            }
+
+            // Check 4: Test bounding box calculation for redaction area
+            try {
+                font.getFontDescriptor().getFontBoundingBox();
+            } catch (IllegalArgumentException e) {
+                log.debug(
+                        "Font bounding box unavailable for font {}: {}",
+                        font.getName() != null ? font.getName() : "Unknown",
+                        e.getMessage());
+                return false;
+            }
+
+            log.debug(
+                    "Text '{}' passed comprehensive validation for font {}",
+                    text,
+                    font.getName() != null ? font.getName() : "Unknown");
+            return true;
+
+        } catch (IOException e) {
+            log.debug(
+                    "Text '{}' failed validation for font {} due to IO error: {}",
+                    text,
+                    font.getName() != null ? font.getName() : "Unknown",
+                    e.getMessage());
+            return false;
+        } catch (IllegalArgumentException e) {
+            log.debug(
+                    "Text '{}' failed validation for font {} due to argument error: {}",
+                    text,
+                    font.getName() != null ? font.getName() : "Unknown",
+                    e.getMessage());
+            return false;
+        }
+    }
+
+    private static boolean isSimpleCharacter(String text) {
+        if (text == null || text.isEmpty()) {
+            return false;
+        }
+
+        if (text.length() > 20) {
+            return false;
+        }
+
+        for (int i = 0; i < text.length(); i++) {
+            char c = text.charAt(i);
+
+            // Allow letters, digits, and whitespace (most common cases)
+            if (Character.isLetterOrDigit(c) || Character.isWhitespace(c)) {
+                continue;
+            }
+
+            // Allow common ASCII punctuation
+            if (c >= 32 && c <= 126 && ".,!?;:()-[]{}\"'/@#$%&*+=<>|\\~`".indexOf(c) >= 0) {
+                continue;
+            }
+
+            return false;
+        }
+
+        return true;
+    }
+
+    public static boolean hasCustomEncoding(PDFont font) {
+        try {
+            if (font instanceof PDSimpleFont simpleFont) {
+                try {
+                    Encoding encoding = simpleFont.getEncoding();
+                    if (encoding != null) {
+                        // Check for dictionary-based custom encodings
+                        if (encoding instanceof DictionaryEncoding) {
+                            log.debug("Font {} uses DictionaryEncoding (custom)", font.getName());
+                            return true;
+                        }
+
+                        String encodingName = encoding.getClass().getSimpleName();
+                        if (encodingName.contains("Custom")
+                                || encodingName.contains("Dictionary")) {
+                            log.debug(
+                                    "Font {} uses custom encoding: {}",
+                                    font.getName(),
+                                    encodingName);
+                            return true;
+                        }
+                    }
+                } catch (Exception e) {
+                    log.debug(
+                            "Encoding detection failed for font {}: {}",
+                            font.getName(),
+                            e.getMessage());
+                    return true; // Assume custom if detection fails
+                }
+            }
+
+            if (font instanceof org.apache.pdfbox.pdmodel.font.PDType0Font) {
+                log.debug(
+                        "Font {} is Type0 (CID) - generally uses standard CMaps",
+                        font.getName() != null ? font.getName() : "Unknown");
+                return false;
+            }
+
+            log.debug(
+                    "Font {} type {} - assuming standard encoding",
+                    font.getName() != null ? font.getName() : "Unknown",
+                    font.getClass().getSimpleName());
+            return false;
+
+        } catch (IllegalArgumentException e) {
+            log.debug(
+                    "Custom encoding detection failed for font {}: {}",
+                    font.getName() != null ? font.getName() : "Unknown",
+                    e.getMessage());
+            return false; // Be forgiving on detection failure
+        }
+    }
+
+    public static boolean fontSupportsCharacter(PDFont font, String character) {
+        if (font == null || character == null || character.isEmpty()) {
+            return false;
+        }
+
+        try {
+            byte[] encoded = font.encode(character);
+            if (encoded.length == 0) {
+                return false;
+            }
+
+            float width = font.getStringWidth(character);
+            return width > 0;
+
+        } catch (IOException | IllegalArgumentException e) {
+            log.debug(
+                    "Character '{}' not supported by font {}: {}",
+                    character,
+                    font.getName() != null ? font.getName() : "Unknown",
+                    e.getMessage());
+            return false;
+        }
+    }
+
+    public static boolean isFontSubset(String fontName) {
+        if (fontName == null) {
+            return false;
+        }
+        return fontName.matches("^[A-Z]{6}\\+.*");
+    }
+
+    public static boolean canCalculateBasicWidths(PDFont font) {
+        try {
+            float spaceWidth = font.getStringWidth(" ");
+            if (spaceWidth <= 0) {
+                return false;
+            }
+
+            String[] testChars = {"a", "A", "0", ".", "e", "!"};
+            for (String ch : testChars) {
+                try {
+                    float width = font.getStringWidth(ch);
+                    if (width > 0) {
+                        return true;
+                    }
+                } catch (IOException | IllegalArgumentException e) {
+                }
+            }
+
+            return false; // Can't calculate width for any test characters
+        } catch (IOException | IllegalArgumentException e) {
+            return false; // Font failed basic width calculation
+        }
+    }
+}
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/TextFinderUtils.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/TextFinderUtils.java
@ -0,0 +1,140 @@
+package stirling.software.SPDF.utils.text;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDResources;
+
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+public class TextFinderUtils {
+
+    public static boolean validateFontReliability(org.apache.pdfbox.pdmodel.font.PDFont font) {
+        if (font == null) {
+            return false;
+        }
+
+        if (font.isDamaged()) {
+            log.debug(
+                    "Font {} is marked as damaged - using TextEncodingHelper validation",
+                    font.getName());
+        }
+
+        if (TextEncodingHelper.canCalculateBasicWidths(font)) {
+            log.debug(
+                    "Font {} passed basic width calculations - considering reliable",
+                    font.getName());
+            return true;
+        }
+
+        String[] basicTests = {"1", "2", "3", "a", "A", "e", "E", " "};
+
+        int workingChars = 0;
+        for (String testChar : basicTests) {
+            if (TextEncodingHelper.canEncodeCharacters(font, testChar)) {
+                workingChars++;
+            }
+        }
+
+        if (workingChars > 0) {
+            log.debug(
+                    "Font {} can process {}/{} basic characters - considering reliable",
+                    font.getName(),
+                    workingChars,
+                    basicTests.length);
+            return true;
+        }
+
+        log.debug("Font {} failed all basic tests - considering unreliable", font.getName());
+        return false;
+    }
+
+    public static List<Pattern> createOptimizedSearchPatterns(
+            Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
+        List<Pattern> patterns = new ArrayList<>();
+
+        for (String term : searchTerms) {
+            if (term == null || term.trim().isEmpty()) {
+                continue;
+            }
+
+            try {
+                String patternString = useRegex ? term.trim() : Pattern.quote(term.trim());
+
+                if (wholeWordSearch) {
+                    patternString = applyWordBoundaries(term.trim(), patternString);
+                }
+
+                Pattern pattern =
+                        Pattern.compile(
+                                patternString, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
+                patterns.add(pattern);
+
+                log.debug("Created search pattern: '{}' -> '{}'", term.trim(), patternString);
+
+            } catch (Exception e) {
+                log.warn("Failed to create pattern for term '{}': {}", term, e.getMessage());
+            }
+        }
+
+        return patterns;
+    }
+
+    private static String applyWordBoundaries(String originalTerm, String patternString) {
+        if (originalTerm.length() == 1 && Character.isDigit(originalTerm.charAt(0))) {
+            return "(?<![\\w])" + patternString + "(?![\\w])";
+        } else if (originalTerm.length() == 1) {
+            return "(?<![\\w])" + patternString + "(?![\\w])";
+        } else {
+            return "\\b" + patternString + "\\b";
+        }
+    }
+
+    public static boolean hasProblematicFonts(PDPage page) {
+        if (page == null) {
+            return false;
+        }
+
+        try {
+            PDResources resources = page.getResources();
+            if (resources == null) {
+                return false;
+            }
+
+            int totalFonts = 0;
+            int completelyUnusableFonts = 0;
+
+            for (org.apache.pdfbox.cos.COSName fontName : resources.getFontNames()) {
+                try {
+                    org.apache.pdfbox.pdmodel.font.PDFont font = resources.getFont(fontName);
+                    if (font != null) {
+                        totalFonts++;
+                        if (!validateFontReliability(font)) {
+                            completelyUnusableFonts++;
+                        }
+                    }
+                } catch (Exception e) {
+                    log.debug("Font loading failed for {}: {}", fontName.getName(), e.getMessage());
+                    totalFonts++;
+                }
+            }
+
+            boolean hasProblems = totalFonts > 0 && (completelyUnusableFonts * 2 > totalFonts);
+            log.debug(
+                    "Page font analysis: {}/{} fonts are completely unusable - page {} problematic",
+                    completelyUnusableFonts,
+                    totalFonts,
+                    hasProblems ? "IS" : "is NOT");
+
+            return hasProblems;
+
+        } catch (Exception e) {
+            log.warn("Font analysis failed for page: {}", e.getMessage());
+            return false; // Be permissive if analysis fails
+        }
+    }
+}
--- a/app/core/src/main/java/stirling/software/SPDF/utils/text/WidthCalculator.java
+++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/WidthCalculator.java
@ -0,0 +1,136 @@
+package stirling.software.SPDF.utils.text;
+
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.font.PDFont;
+
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+public class WidthCalculator {
+
+    private static final int FONT_SCALE_FACTOR = 1000;
+
+    public static float calculateAccurateWidth(PDFont font, String text, float fontSize) {
+        if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
+            return 0;
+        }
+
+        if (!TextEncodingHelper.canEncodeCharacters(font, text)) {
+            log.debug(
+                    "Text cannot be encoded by font {}, using fallback width calculation",
+                    font.getName());
+            return calculateFallbackWidth(font, text, fontSize);
+        }
+
+        try {
+            float rawWidth = font.getStringWidth(text);
+            float scaledWidth = (rawWidth / FONT_SCALE_FACTOR) * fontSize;
+
+            log.debug(
+                    "Direct width calculation successful for font {}: {} -> {}",
+                    font.getName(),
+                    rawWidth,
+                    scaledWidth);
+            return scaledWidth;
+
+        } catch (Exception e) {
+            log.debug(
+                    "Direct width calculation failed for font {}: {}",
+                    font.getName(),
+                    e.getMessage());
+            return calculateWidthWithCharacterIteration(font, text, fontSize);
+        }
+    }
+
+    private static float calculateWidthWithCharacterIteration(
+            PDFont font, String text, float fontSize) {
+        try {
+            float totalWidth = 0;
+
+            for (int i = 0; i < text.length(); i++) {
+                String character = text.substring(i, i + 1);
+                try {
+                    byte[] encoded = font.encode(character);
+                    if (encoded.length > 0) {
+                        int glyphCode = encoded[0] & 0xFF;
+                        float glyphWidth = font.getWidth(glyphCode);
+
+                        if (glyphWidth == 0) {
+                            try {
+                                glyphWidth = font.getWidthFromFont(glyphCode);
+                            } catch (Exception e2) {
+                                glyphWidth = font.getAverageFontWidth();
+                            }
+                        }
+
+                        totalWidth += (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
+                    } else {
+                        totalWidth += (font.getAverageFontWidth() / FONT_SCALE_FACTOR) * fontSize;
+                    }
+                } catch (Exception e2) {
+                    totalWidth += (font.getAverageFontWidth() / FONT_SCALE_FACTOR) * fontSize;
+                }
+            }
+
+            log.debug("Character iteration width calculation: {}", totalWidth);
+            return totalWidth;
+
+        } catch (Exception e) {
+            log.debug("Character iteration failed: {}", e.getMessage());
+            return calculateFallbackWidth(font, text, fontSize);
+        }
+    }
+
+    private static float calculateFallbackWidth(PDFont font, String text, float fontSize) {
+        try {
+            if (font.getFontDescriptor() != null
+                    && font.getFontDescriptor().getFontBoundingBox() != null) {
+
+                PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox();
+                float avgCharWidth =
+                        bbox.getWidth() / FONT_SCALE_FACTOR * 0.6f; // Conservative estimate
+                float fallbackWidth = text.length() * avgCharWidth * fontSize;
+
+                log.debug("Bounding box fallback width: {}", fallbackWidth);
+                return fallbackWidth;
+            }
+
+            float avgWidth = font.getAverageFontWidth();
+            float fallbackWidth = (text.length() * avgWidth / FONT_SCALE_FACTOR) * fontSize;
+
+            log.debug("Average width fallback: {}", fallbackWidth);
+            return fallbackWidth;
+
+        } catch (Exception e) {
+            float conservativeWidth = text.length() * 0.5f * fontSize;
+            log.debug(
+                    "Conservative fallback width for font {}: {}",
+                    font.getName(),
+                    conservativeWidth);
+            return conservativeWidth;
+        }
+    }
+
+    public static boolean isWidthCalculationReliable(PDFont font) {
+        if (font == null) {
+            return false;
+        }
+
+        if (font.isDamaged()) {
+            log.debug("Font {} is damaged", font.getName());
+            return false;
+        }
+
+        if (!TextEncodingHelper.canCalculateBasicWidths(font)) {
+            log.debug("Font {} cannot perform basic width calculations", font.getName());
+            return false;
+        }
+
+        if (TextEncodingHelper.hasCustomEncoding(font)) {
+            log.debug("Font {} has custom encoding", font.getName());
+            return false;
+        }
+
+        return true;
+    }
+}
--- a/stirling-pdf/src/test/java/stirling/software/SPDF/controller/api/security/RedactControllerTest.java
+++ b/stirling-pdf/src/test/java/stirling/software/SPDF/controller/api/security/RedactControllerTest.java
--- a/stirling-pdf/src/test/java/stirling/software/SPDF/pdf/TextFinderTest.java
+++ b/stirling-pdf/src/test/java/stirling/software/SPDF/pdf/TextFinderTest.java
@ -0,0 +1,588 @@
+package stirling.software.SPDF.pdf;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
+import org.junit.jupiter.api.AfterEach;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+import stirling.software.SPDF.model.PDFText;
+
+@DisplayName("PDF Text Finder tests")
+@ExtendWith(MockitoExtension.class)
+class TextFinderTest {
+
+    private PDDocument document;
+    private PDPage page;
+
+    // Helpers
+    private void testTextFinding(String pageContent, String searchTerm, boolean useRegex, boolean wholeWord,
+                               String[] expectedTexts, int expectedCount) throws IOException {
+        addTextToPage(pageContent);
+        TextFinder textFinder = new TextFinder(searchTerm, useRegex, wholeWord);
+
+        textFinder.getText(document);
+        List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+        assertEquals(expectedCount, foundTexts.size(),
+            String.format("Expected %d matches for search term '%s'", expectedCount, searchTerm));
+
+        if (expectedTexts != null) {
+            for (String expectedText : expectedTexts) {
+                assertTrue(foundTexts.stream().anyMatch(text -> text.getText().equals(expectedText)),
+                    String.format("Expected to find text: '%s'", expectedText));
+            }
+        }
+
+        // Verify basic properties of found texts
+        foundTexts.forEach(text -> {
+            assertNotNull(text.getText());
+            assertTrue(text.getX1() >= 0);
+            assertTrue(text.getY1() >= 0);
+            assertTrue(text.getX2() >= text.getX1());
+            assertTrue(text.getY2() >= text.getY1());
+            assertEquals(0, text.getPageIndex()); // Single page test
+        });
+    }
+
+    @BeforeEach
+    void setUp() {
+        document = new PDDocument();
+        page = new PDPage(PDRectangle.A4);
+        document.addPage(page);
+    }
+
+    @AfterEach
+    void tearDown() throws IOException {
+        if (document != null) {
+            document.close();
+        }
+    }
+
+    @Nested
+    @DisplayName("Basic Text Search")
+    class BasicSearchTests {
+
+        @Test
+        @DisplayName("Should find simple text correctly")
+        void findSimpleText() throws IOException {
+            testTextFinding("This is a confidential document with secret information.",
+                           "confidential", false, false,
+                           new String[]{"confidential"}, 1);
+        }
+
+        @Test
+        @DisplayName("Should perform case-insensitive search")
+        void performCaseInsensitiveSearch() throws IOException {
+            testTextFinding("This document contains CONFIDENTIAL information.",
+                           "confidential", false, false,
+                           new String[]{"CONFIDENTIAL"}, 1);
+        }
+
+        @Test
+        @DisplayName("Should find multiple occurrences of same term")
+        void findMultipleOccurrences() throws IOException {
+            testTextFinding("The secret code is secret123. Keep this secret safe!",
+                           "secret", false, false,
+                           new String[]{"secret", "secret", "secret"}, 3);
+        }
+
+        @Test
+        @DisplayName("Should handle empty search term gracefully")
+        void handleEmptySearchTerm() throws IOException {
+            testTextFinding("This is a test document.", "", false, false, null, 0);
+        }
+
+        @Test
+        @DisplayName("Should handle null search term gracefully")
+        void handleNullSearchTerm() throws IOException {
+            testTextFinding("This is a test document.", null, false, false, null, 0);
+        }
+
+        @Test
+        @DisplayName("Should return no results when no match found")
+        void returnNoResultsWhenNoMatch() throws IOException {
+            testTextFinding("This is a test document.", "nonexistent", false, false, null, 0);
+        }
+    }
+
+    @Nested
+    @DisplayName("Whole Word Search")
+    class WholeWordSearchTests {
+
+        @Test
+        @DisplayName("Should find only whole words when enabled")
+        void findOnlyWholeWords() throws IOException {
+            testTextFinding("This is a test testing document with tested results.",
+                           "test", false, true,
+                           new String[]{"test"}, 1);
+        }
+
+        @Test
+        @DisplayName("Should find partial matches when whole word search disabled")
+        void findPartialMatches() throws IOException {
+            testTextFinding("This is a test testing document with tested results.",
+                           "test", false, false,
+                           new String[]{"test", "test", "test"}, 3);
+        }
+
+        @Test
+        @DisplayName("Should handle punctuation boundaries correctly")
+        void handlePunctuationBoundaries() throws IOException {
+            testTextFinding("Hello, world! Testing: test-case (test).",
+                           "test", false, true,
+                           new String[]{"test"}, 2); // Both standalone "test" and "test" in "test-case"
+        }
+
+        @Test
+        @DisplayName("Should handle word boundaries with special characters")
+        void handleSpecialCharacterBoundaries() throws IOException {
+            testTextFinding("Email: test@example.com and test.txt file",
+                           "test", false, true,
+                           new String[]{"test"}, 2); // Both in email and filename should match
+        }
+    }
+
+    @Nested
+    @DisplayName("Regular Expression Search")
+    class RegexSearchTests {
+
+        @Test
+        @DisplayName("Should find text matching regex pattern")
+        void findTextMatchingRegex() throws IOException {
+            testTextFinding("Contact John at 123-45-6789 or Jane at 987-65-4321 for details.",
+                           "\\d{3}-\\d{2}-\\d{4}", true, false,
+                           new String[]{"123-45-6789", "987-65-4321"}, 2);
+        }
+
+        @Test
+        @DisplayName("Should find email addresses with regex")
+        void findEmailAddresses() throws IOException {
+            testTextFinding("Email: test@example.com and admin@test.org",
+                           "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", true, false,
+                           new String[]{"test@example.com", "admin@test.org"}, 2);
+        }
+
+        @Test
+        @DisplayName("Should combine regex with whole word search")
+        void combineRegexWithWholeWord() throws IOException {
+            testTextFinding("Email: test@example.com and admin@test.org",
+                           "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", true, true,
+                           new String[]{"test@example.com", "admin@test.org"}, 2);
+        }
+
+        @Test
+        @DisplayName("Should find currency patterns")
+        void findCurrencyPatterns() throws IOException {
+            testTextFinding("Price: $100.50 and €75.25",
+                           "\\$\\d+\\.\\d{2}", true, false,
+                           new String[]{"$100.50"}, 1);
+        }
+
+        @ParameterizedTest
+        @ValueSource(strings = {
+            "\\d{4}-\\d{2}-\\d{2}", // Date pattern
+            "\\b[A-Z]{2,}\\b", // Uppercase words
+            "\\w+@\\w+\\.\\w+", // Simple email pattern
+            "\\$\\d+", // Simple currency
+            "\\b\\d{3,4}\\b" // 3-4 digit numbers
+        })
+        @DisplayName("Should handle various regex patterns")
+        void handleVariousRegexPatterns(String regexPattern) throws IOException {
+            String testContent = "Date: 2023-12-25, Email: test@domain.com, Price: $250, Code: ABC123, Number: 1234";
+            addTextToPage(testContent);
+
+            TextFinder textFinder = new TextFinder(regexPattern, true, false);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            // Each pattern should find at least one match in our test content
+            assertFalse(foundTexts.isEmpty(), String.format("Pattern '%s' should find at least one match", regexPattern));
+        }
+
+        @Test
+        @DisplayName("Should handle invalid regex gracefully")
+        void handleInvalidRegex() throws IOException {
+            addTextToPage("This is test content.");
+
+            try {
+                TextFinder textFinder = new TextFinder("[invalid regex(", true, false);
+                textFinder.getText(document);
+                List<PDFText> foundTexts = textFinder.getFoundTexts();
+                assertNotNull(foundTexts);
+            } catch (java.util.regex.PatternSyntaxException e) {
+                assertNotNull(e.getMessage());
+                assertTrue(e.getMessage().contains("Unclosed character class") ||
+                          e.getMessage().contains("syntax"),
+                          "Exception should indicate regex syntax error");
+            } catch (RuntimeException | IOException e) {
+                assertNotNull(e.getMessage());
+            }
+        }
+    }
+
+    @Nested
+    @DisplayName("Special Characters and Encoding")
+    class SpecialCharacterTests {
+
+        @Test
+        @DisplayName("Should handle international characters")
+        void handleInternationalCharacters() throws IOException {
+            testTextFinding("Hello café naïve résumé",
+                           "café", false, false,
+                           new String[]{"café"}, 1);
+        }
+
+        @Test
+        @DisplayName("Should find text with accented characters")
+        void findAccentedCharacters() throws IOException {
+            testTextFinding("Café, naïve, résumé, piñata",
+                           "café", false, false,
+                           new String[]{"Café"}, 1); // Case insensitive
+        }
+
+        @Test
+        @DisplayName("Should handle special symbols")
+        void handleSpecialSymbols() throws IOException {
+            testTextFinding("Symbols: © ® ™ ± × ÷ § ¶",
+                           "©", false, false,
+                           new String[]{"©"}, 1);
+        }
+
+        @Test
+        @DisplayName("Should find currency symbols")
+        void findCurrencySymbols() throws IOException {
+            testTextFinding("Prices: $100 €75 £50 ¥1000",
+                           "[€£¥]", true, false,
+                           new String[]{"€", "£", "¥"}, 3);
+        }
+    }
+
+    @Nested
+    @DisplayName("Multi-page Document Tests")
+    class MultiPageTests {
+
+        @Test
+        @DisplayName("Should find text across multiple pages")
+        void findTextAcrossPages() throws IOException {
+            PDPage secondPage = new PDPage(PDRectangle.A4);
+            document.addPage(secondPage);
+
+            addTextToPage("First page with confidential data.");
+
+            addTextToPage(secondPage, "Second page with secret information.");
+
+            TextFinder textFinder = new TextFinder("confidential|secret", true, false);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertEquals(2, foundTexts.size());
+
+            long page0Count = foundTexts.stream().filter(text -> text.getPageIndex() == 0).count();
+            long page1Count = foundTexts.stream().filter(text -> text.getPageIndex() == 1).count();
+
+            assertEquals(1, page0Count);
+            assertEquals(1, page1Count);
+        }
+
+        @Test
+        @DisplayName("Should handle empty pages gracefully")
+        void handleEmptyPages() throws IOException {
+            PDPage emptyPage = new PDPage(PDRectangle.A4);
+            document.addPage(emptyPage);
+
+            addTextToPage("Content on first page only.");
+
+            TextFinder textFinder = new TextFinder("content", false, false);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertEquals(1, foundTexts.size());
+            assertEquals(0, foundTexts.get(0).getPageIndex());
+        }
+    }
+
+    @Nested
+    @DisplayName("Performance and Boundary Tests")
+    class PerformanceTests {
+
+        @Test
+        @DisplayName("Should handle very long search terms")
+        void handleLongSearchTerms() throws IOException {
+            String longTerm = "a".repeat(1000);
+            String content = "Short text with " + longTerm + " embedded.";
+
+            testTextFinding(content, longTerm, false, false, new String[]{longTerm}, 1);
+        }
+
+        @Test
+        @DisplayName("Should handle documents with many pages efficiently")
+        void handleManyPages() throws IOException {
+            for (int i = 0; i < 10; i++) {
+                if (i > 0) { // The first page already exists
+                    document.addPage(new PDPage(PDRectangle.A4));
+                }
+                addTextToPage(document.getPage(i), "Page " + i + " contains searchable content.");
+            }
+
+            long startTime = System.currentTimeMillis();
+            TextFinder textFinder = new TextFinder("searchable", false, false);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+            long endTime = System.currentTimeMillis();
+
+            assertEquals(10, foundTexts.size());
+            assertTrue(endTime - startTime < 3000,
+                "Multi-page search should complete within 3 seconds");
+        }
+    }
+
+    @Nested
+    @DisplayName("Error Handling and Edge Cases")
+    class ErrorHandlingTests {
+
+        @Test
+        @DisplayName("Should handle null document gracefully")
+        void handleNullDocument() throws IOException {
+            TextFinder textFinder = new TextFinder("test", false, false);
+
+            try {
+                textFinder.getText(null);
+                List<PDFText> foundTexts = textFinder.getFoundTexts();
+                assertNotNull(foundTexts);
+                assertEquals(0, foundTexts.size());
+            } catch (Exception e) {
+                assertNotNull(e.getMessage());
+            }
+        }
+
+        @Test
+        @DisplayName("Should handle document without pages")
+        void handleDocumentWithoutPages() throws IOException {
+            try (PDDocument emptyDocument = new PDDocument()) {
+                TextFinder textFinder = new TextFinder("test", false, false);
+                textFinder.getText(emptyDocument);
+                List<PDFText> foundTexts = textFinder.getFoundTexts();
+                assertEquals(0, foundTexts.size());
+            }
+        }
+
+        @Test
+        @DisplayName("Should handle pages without content")
+        void handlePagesWithoutContent() throws IOException {
+            TextFinder textFinder = new TextFinder("test", false, false);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertEquals(0, foundTexts.size());
+        }
+
+        @Test
+        @DisplayName("Should handle extremely complex regex patterns")
+        void handleComplexRegexPatterns() throws IOException {
+            addTextToPage("Complex content with various patterns: abc123, def456, XYZ789");
+
+            String complexRegex = "(?=.*\\d)(?=.*[a-z])(?=.*[A-Z])[a-zA-Z\\d]{6}";
+
+            assertDoesNotThrow(() -> {
+                TextFinder textFinder = new TextFinder(complexRegex, true, false);
+                textFinder.getText(document);
+                List<PDFText> foundTexts = textFinder.getFoundTexts();
+                assertNotNull(foundTexts);
+            });
+        }
+
+        @ParameterizedTest
+        @ValueSource(strings = {"", " ", "\t", "\n", "\r\n", "   \t\n   "})
+        @DisplayName("Should handle whitespace-only search terms")
+        void handleWhitespaceSearchTerms(String whitespacePattern) throws IOException {
+            addTextToPage("This is normal text content.");
+
+            TextFinder textFinder = new TextFinder(whitespacePattern, false, false);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertEquals(0, foundTexts.size());
+        }
+    }
+
+    @Nested
+    @DisplayName("Text Coordinate Verification")
+    class CoordinateTests {
+
+        @Test
+        @DisplayName("Should provide accurate text coordinates")
+        void provideAccurateCoordinates() throws IOException {
+            addTextToPage("Sample text for coordinate testing.");
+
+            TextFinder textFinder = new TextFinder("coordinate", false, false);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertEquals(1, foundTexts.size());
+            PDFText foundText = foundTexts.get(0);
+
+            assertTrue(foundText.getX1() >= 0, "X1 should be non-negative");
+            assertTrue(foundText.getY1() >= 0, "Y1 should be non-negative");
+            assertTrue(foundText.getX2() > foundText.getX1(), "X2 should be greater than X1");
+            assertTrue(foundText.getY2() > foundText.getY1(), "Y2 should be greater than Y1");
+
+            double width = foundText.getX2() - foundText.getX1();
+            double height = foundText.getY2() - foundText.getY1();
+
+            assertTrue(width > 0, "Text width should be positive");
+            assertTrue(height > 0, "Text height should be positive");
+            assertTrue(width < 1000, "Text width should be reasonable");
+            assertTrue(height < 100, "Text height should be reasonable");
+        }
+
+        @Test
+        @DisplayName("Should handle overlapping text regions")
+        void handleOverlappingTextRegions() throws IOException {
+            addTextToPage("Overlapping test text content.");
+
+            TextFinder textFinder = new TextFinder("test", false, false);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertFalse(foundTexts.isEmpty());
+            foundTexts.forEach(text -> {
+                assertNotNull(text.getText());
+                assertTrue(text.getX1() >= 0 && text.getY1() >= 0);
+            });
+        }
+    }
+
+    @Nested
+    @DisplayName("Single Character and Digit Tests")
+    class SingleCharacterAndDigitTests {
+
+        @Test
+        @DisplayName("Should find single digits in various contexts with whole word search")
+        void findSingleDigitsWholeWord() throws IOException {
+            String content = "Item 1 of 5 costs $2.50. Order number: 1234. Reference: A1B.";
+            addTextToPage(content);
+
+            TextFinder textFinder = new TextFinder("1", false, true);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertEquals(1, foundTexts.size(),
+                "Should find exactly one standalone '1', not the ones embedded in other numbers/codes");
+            assertEquals("1", foundTexts.get(0).getText());
+        }
+
+        @Test
+        @DisplayName("Should find single digits without whole word search")
+        void findSingleDigitsNoWholeWord() throws IOException {
+            String content = "Item 1 of 5 costs $2.50. Order number: 1234. Reference: A1B.";
+            addTextToPage(content);
+
+            TextFinder textFinder = new TextFinder("1", false, false);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertTrue(foundTexts.size() >= 3,
+                "Should find multiple instances of '1' including standalone, in '1234', and in 'A1B'");
+        }
+
+        @Test
+        @DisplayName("Should find single characters in various contexts")
+        void findSingleCharacters() throws IOException {
+            String content = "Grade: A. Section B has item A-1. The letter A appears multiple times.";
+            addTextToPage(content);
+
+            TextFinder textFinder = new TextFinder("A", false, true);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertTrue(foundTexts.size() >= 2, "Should find multiple standalone 'A' characters");
+
+            for (PDFText found : foundTexts) {
+                assertEquals("A", found.getText());
+            }
+        }
+
+        @Test
+        @DisplayName("Should handle digits at word boundaries correctly")
+        void findDigitsAtWordBoundaries() throws IOException {
+            String content = "Numbers: 1, 2, 3. Code: 123. Version: 1.0. Item1 and Item2.";
+            addTextToPage(content);
+
+            TextFinder textFinder1 = new TextFinder("1", false, true);
+            textFinder1.getText(document);
+            List<PDFText> foundTexts1 = textFinder1.getFoundTexts();
+
+            assertEquals(1, foundTexts1.size(),
+                "Should find only the standalone '1' at the beginning");
+
+            TextFinder textFinder2 = new TextFinder("2", false, true);
+            textFinder2.getText(document);
+            List<PDFText> foundTexts2 = textFinder2.getFoundTexts();
+
+            assertEquals(1, foundTexts2.size(),
+                "Should find only the standalone '2' in the number list");
+        }
+
+        @Test
+        @DisplayName("Should handle special characters and punctuation boundaries")
+        void findDigitsWithPunctuationBoundaries() throws IOException {
+            String content = "Items: (1), [2], {3}, item#4, price$5, and 6%.";
+            addTextToPage(content);
+
+            TextFinder textFinder = new TextFinder("1", false, true);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertEquals(1, foundTexts.size(), "Should find '1' surrounded by parentheses");
+            assertEquals("1", foundTexts.get(0).getText());
+        }
+
+        @Test
+        @DisplayName("Should handle edge case with spacing and formatting")
+        void findDigitsWithSpacingIssues() throws IOException {
+            String content = "List: 1 , 2  ,  3   and item   1   here.";
+            addTextToPage(content);
+
+            TextFinder textFinder = new TextFinder("1", false, true);
+            textFinder.getText(document);
+            List<PDFText> foundTexts = textFinder.getFoundTexts();
+
+            assertEquals(2, foundTexts.size(),
+                "Should find both '1' instances despite spacing variations");
+        }
+    }
+
+    // Helper methods
+    private void addTextToPage(String text) throws IOException {
+        addTextToPage(page, text);
+    }
+
+    private void addTextToPage(PDPage targetPage, String text) throws IOException {
+        try (PDPageContentStream contentStream = new PDPageContentStream(document, targetPage)) {
+            contentStream.beginText();
+            contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 12);
+            contentStream.newLineAtOffset(50, 750);
+            contentStream.showText(text);
+            contentStream.endText();
+        }
+    }
+}