enhance language translation handling and improve redaction logic

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
2025-09-08 17:51:20 +02:00 · 2025-08-26 13:36:34 +02:00 · 2025-08-26 13:36:34 +02:00 · b2bd4aff61
commit b2bd4aff61
parent 38c261a82e
2 changed files with 149 additions and 293 deletions
--- a/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java
+++ b/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java
@ -814,10 +814,6 @@ public class RedactionService {
        return strategy.redact(request);
    }
    /**
     * Enhanced redaction with semantic scrubbing Integrates the PDFBox enhancement plan for both
     * text redaction and metadata cleanup
     */
    public byte[] redactPdfWithSemanticScrubbing(
            RedactPdfRequest request, Set<ScrubOption> scrubOptions) throws IOException {
@ -826,7 +822,6 @@ public class RedactionService {
            mode = "moderate";
        }
        // Perform standard redaction first
        RedactionModeStrategy strategy =
                switch (mode.toLowerCase()) {
                    case "visual" -> new VisualRedactionService(pdfDocumentFactory, this);
@ -836,13 +831,10 @@ public class RedactionService {
        byte[] redactedBytes = strategy.redact(request);
        // Apply semantic scrubbing to the redacted document
        if (scrubOptions != null && !scrubOptions.isEmpty()) {
            try (PDDocument document = pdfDocumentFactory.load(redactedBytes)) {
                DefaultSemanticScrubber scrubber = new DefaultSemanticScrubber();
                scrubber.scrub(document, scrubOptions);
                // Save the scrubbed document
                try (ByteArrayOutputStream output = new ByteArrayOutputStream()) {
                    document.save(output);
                    return output.toByteArray();
@ -858,20 +850,32 @@ public class RedactionService {
        return redactedBytes;
    }
-    public byte[] applySemanticScrubbing(MultipartFile file, Set<ScrubOption> scrubOptions)
+    private static TokenModificationResult convertToTJWithAdjustment(
-            throws IOException {
+            List<Object> tokens,
-        if (scrubOptions == null || scrubOptions.isEmpty()) {
+            int tokenIndex,
-            return file.getBytes(); // No scrubbing requested
+            String originalOperator,
-        }
+            String newText,
-
+            float adjustment,
-        try (PDDocument document = pdfDocumentFactory.load(file)) {
+            TextSegment segment) {
-            DefaultSemanticScrubber scrubber = new DefaultSemanticScrubber();
+        try {
-            scrubber.scrub(document, scrubOptions);
+            if (!isValidTokenIndex(tokens, tokenIndex) || segment == null) {
-
+                return TokenModificationResult.failure("Invalid token index or segment");
            try (ByteArrayOutputStream output = new ByteArrayOutputStream()) {
                document.save(output);
                return output.toByteArray();
            }
            COSArray array = new COSArray();
            COSString cos =
                    newText == null || newText.isEmpty()
                            ? EMPTY_COS_STRING
                            : new COSString(newText);
            array.add(cos);
            if (segment.getFontSize() > 0) {
                float kerning = (-adjustment / segment.getFontSize()) * FONT_SCALE_FACTOR;
                array.add(new COSFloat(kerning));
            }
            tokens.set(tokenIndex, array);
            updateOperatorSafely(tokens, tokenIndex, originalOperator);
            return TokenModificationResult.success();
        } catch (Exception e) {
            return TokenModificationResult.failure("Conversion to TJ failed: " + e.getMessage());
        }
    }
@ -934,6 +938,60 @@ public class RedactionService {
        }
    }
    private static List<MatchRange> findMatchesInSegments(
            List<TextSegment> segments,
            Set<String> targetWords,
            boolean useRegex,
            boolean wholeWordSearch) {
        List<MatchRange> allMatches = new ArrayList<>();
        List<Pattern> patterns =
                TextFinderUtils.createOptimizedSearchPatterns(
                        targetWords, useRegex, wholeWordSearch);
        int totalMatchesFound = 0;
        for (int i = 0; i < segments.size(); i++) {
            TextSegment segment = segments.get(i);
            String segmentText = segment.getText();
            if (segmentText == null || segmentText.isEmpty()) {
                continue;
            }
            if (segment.getFont() != null
                    && !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), segmentText)) {
                continue;
            }
            int segmentMatches = 0;
            for (Pattern pattern : patterns) {
                try {
                    var matcher = pattern.matcher(segmentText);
                    while (matcher.find()) {
                        int matchStart = matcher.start();
                        int matchEnd = matcher.end();
                        if (matchStart >= 0
                                && matchEnd <= segmentText.length()
                                && matchStart < matchEnd) {
                            String matchedText = segmentText.substring(matchStart, matchEnd);
                            allMatches.add(
                                    new MatchRange(
                                            segment.getStartPos() + matchStart,
                                            segment.getStartPos() + matchEnd));
                            segmentMatches++;
                            totalMatchesFound++;
                        }
                    }
                } catch (Exception e) {
                }
            }
        }
        allMatches.sort(Comparator.comparingInt(MatchRange::getStartPos));
        return allMatches;
    }
    private static WipeResult wipeAllSemanticTextInTokens(List<Object> tokens) {
        return wipeAllSemanticTextInTokens(tokens, true);
    }
@ -1195,99 +1253,21 @@ public class RedactionService {
        }
    }
-    private static List<MatchRange> findMatchesInSegments(
+    public byte[] applySemanticScrubbing(MultipartFile file, Set<ScrubOption> scrubOptions)
-            List<TextSegment> segments,
+            throws IOException {
-            Set<String> targetWords,
+        if (scrubOptions == null || scrubOptions.isEmpty()) {
-            boolean useRegex,
+            return file.getBytes();
            boolean wholeWordSearch) {
        List<MatchRange> allMatches = new ArrayList<>();
        List<Pattern> patterns =
                TextFinderUtils.createOptimizedSearchPatterns(
                        targetWords, useRegex, wholeWordSearch);
        log.debug("Searching for {} patterns in {} segments", patterns.size(), segments.size());
        int totalMatchesFound = 0;
        for (int i = 0; i < segments.size(); i++) {
            TextSegment segment = segments.get(i);
            String segmentText = segment.getText();
            if (segmentText == null || segmentText.isEmpty()) {
                log.debug("Skipping empty segment {}", i);
                continue;
            }
            log.debug("Processing segment {}: '{}'", i, segmentText);
            if (segment.getFont() != null
                    && !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), segmentText)) {
                log.debug(
                        "Skipping segment {} - font not removable: {}",
                        i,
                        segment.getFont().getName());
                continue;
            }
            int segmentMatches = 0;
            for (Pattern pattern : patterns) {
                try {
                    log.debug(
                            "Matching pattern '{}' against segment text '{}'",
                            pattern.pattern(),
                            segmentText);
                    var matcher = pattern.matcher(segmentText);
                    while (matcher.find()) {
                        int matchStart = matcher.start();
                        int matchEnd = matcher.end();
                        log.debug(
                                "Found match in segment {}: positions {}-{}",
                                i,
                                matchStart,
                                matchEnd);
                        if (matchStart >= 0
                                && matchEnd <= segmentText.length()
                                && matchStart < matchEnd) {
                            String matchedText = segmentText.substring(matchStart, matchEnd);
                            log.debug("Matched text: '{}'", matchedText);
                            allMatches.add(
                                    new MatchRange(
                                            segment.getStartPos() + matchStart,
                                            segment.getStartPos() + matchEnd));
                            segmentMatches++;
                            totalMatchesFound++;
                        }
                    }
                } catch (Exception e) {
                    log.error("Error matching pattern in segment {}: {}", i, e.getMessage());
                }
            }
            if (segmentMatches > 0) {
                log.info("Segment {} had {} matches", i, segmentMatches);
            }
        }
-        log.info("Total matches found across all segments: {}", totalMatchesFound);
+        try (PDDocument document = pdfDocumentFactory.load(file)) {
-        allMatches.sort(Comparator.comparingInt(MatchRange::getStartPos));
+            DefaultSemanticScrubber scrubber = new DefaultSemanticScrubber();
            scrubber.scrub(document, scrubOptions);
-        if (allMatches.isEmpty()) {
+            try (ByteArrayOutputStream output = new ByteArrayOutputStream()) {
-            log.warn("No matches found in segments. This might indicate:");
+                document.save(output);
-            log.warn("1. Text encoding issues preventing proper extraction");
+                return output.toByteArray();
            log.warn("2. Font compatibility issues");
            log.warn("3. Search terms not matching extracted text");
            log.warn("4. Whole word search filtering out matches");
            if (!segments.isEmpty()) {
                log.warn("Sample segment text: '{}'", segments.get(0).getText());
                log.warn("Target words: {}", targetWords);
                log.warn("Use regex: {}, Whole word search: {}", useRegex, wholeWordSearch);
            }
        }
        return allMatches;
    }
    private static float calculateCharacterSumWidth(PDFont font, String text) {
@ -1721,18 +1701,12 @@ public class RedactionService {
            boolean useRegex,
            boolean wholeWordSearch)
            throws IOException {
        log.debug("Processing page with {} target words: {}", targetWords.size(), targetWords);
        PDFStreamParser parser = new PDFStreamParser(page);
        List<Object> tokens = parseAllTokens(parser);
        int tokenCount = tokens.size();
        log.debug("Parsed {} tokens from page content stream", tokenCount);
        if (tokenCount == 0 && !targetWords.isEmpty()) {
            log.warn(
                    "No tokens parsed from page content stream - this might indicate encoding issues");
            log.warn("Attempting alternative verification for target words: {}", targetWords);
            try {
                TextFinder directFinder = new TextFinder("", false, false);
@ -1748,22 +1722,16 @@ public class RedactionService {
                }
                String extractedText = pageText.toString().trim();
                log.debug("Alternative text extraction found: '{}'", extractedText);
                for (String word : targetWords) {
-                    if (extractedText.toLowerCase().contains(word.toLowerCase())) {
+                    if (extractedText.toLowerCase().contains(word.toLowerCase())) {}
                        log.warn("Found target word '{}' via alternative extraction method", word);
                    }
                }
            } catch (Exception e) {
                log.error("Alternative text extraction failed: {}", e.getMessage());
            }
        }
        PDResources resources = page.getResources();
        if (resources != null) {
            log.debug("Processing XObjects for page");
            processPageXObjects(
                    document,
                    resources,
@ -1775,7 +1743,6 @@ public class RedactionService {
        List<TextSegment> textSegments =
                extractTextSegmentsFromTokens(page.getResources(), tokens, this.aggressiveMode);
        log.debug("Extracted {} text segments from tokens", textSegments.size());
        if (!textSegments.isEmpty()) {
            StringBuilder allText = new StringBuilder();
@ -1787,8 +1754,6 @@ public class RedactionService {
                    if (!isTextSafeForRedaction(segmentText)) {
                        hasProblematicChars = true;
                        segmentText = normalizeTextForRedaction(segmentText);
                        log.debug(
                                "Normalized problematic text in segment: original contained encoding issues");
                    }
                    allText.append(segmentText).append(" ");
                }
@ -1796,86 +1761,24 @@ public class RedactionService {
            String completeText = allText.toString().trim();
            if (!completeText.isEmpty()) {
-                log.debug("Complete extracted text: '{}'", completeText);
+                if (hasProblematicChars) {}
                if (hasProblematicChars) {
                    log.info("Applied character normalization to handle encoding issues");
                }
            }
        }
        List<MatchRange> matches;
        if (this.aggressiveMode) {
            log.debug("Using aggressive mode for matching");
            matches =
                    findAllMatchesAggressive(
                            textSegments, tokens, targetWords, useRegex, wholeWordSearch);
        } else {
            log.debug("Using moderate mode for matching");
            matches = findMatchesInSegments(textSegments, targetWords, useRegex, wholeWordSearch);
        }
        log.info("Found {} matches to redact", matches.size());
        if (!matches.isEmpty()) {
            log.debug("Match ranges: {}", matches);
        }
        List<Object> resultTokens = applyRedactionsToTokens(tokens, textSegments, matches);
        int modifications = tokens.size() - resultTokens.size();
        log.debug(
                "Applied redactions - original tokens: {}, result tokens: {}, modifications: {}",
                tokens.size(),
                resultTokens.size(),
                modifications);
        return resultTokens;
    }
    private static TokenModificationResult convertToTJWithAdjustment(
            List<Object> tokens,
            int tokenIndex,
            String originalOperator,
            String newText,
            float adjustment,
            TextSegment segment) {
        try {
            COSArray newArray = new COSArray();
            newArray.add(new COSString(newText));
            if (segment.getFontSize() > 0) {
                float kerning = (-adjustment / segment.getFontSize()) * FONT_SCALE_FACTOR;
                if (Math.abs(kerning) <= 10000f) {
                    newArray.add(new COSFloat(kerning));
                }
            }
            tokens.set(tokenIndex, newArray);
            return updateOperatorSafely(tokens, tokenIndex, originalOperator);
        } catch (Exception e) {
            return TokenModificationResult.failure("TJ conversion failed: " + e.getMessage());
        }
    }
    private static void addSpacingAdjustment(
            COSArray newArray, TextSegment segment, String originalText, String modifiedText) {
        try {
            if (segment.getFont() == null || segment.getFontSize() <= 0) return;
            float originalWidth =
                    calculateSafeWidth(originalText, segment.getFont(), segment.getFontSize());
            float modifiedWidth =
                    calculateSafeWidth(modifiedText, segment.getFont(), segment.getFontSize());
            float adjustment = originalWidth - modifiedWidth;
            if (Math.abs(adjustment) > PRECISION_THRESHOLD) {
                float kerning = (-adjustment / segment.getFontSize()) * FONT_SCALE_FACTOR * 1.10f;
                if (Math.abs(kerning) < 1000) {
                    newArray.add(new COSFloat(kerning));
                }
            }
        } catch (Exception e) {
        }
    }
    private float safeGetStringWidth(PDFont font, String text) {
        if (font == null || text == null || text.isEmpty()) return 0f;
        try {
@ -2104,7 +2007,7 @@ public class RedactionService {
                || allFoundTextsByPage == null
                || allFoundTextsByPage.isEmpty()
                || listOfText == null) {
-            log.info("No text found to redact or invalid input parameters");
+
            return false;
        }
@ -2116,18 +2019,16 @@ public class RedactionService {
                        .collect(Collectors.toSet());
        if (allSearchTerms.isEmpty()) {
-            log.info("No valid search terms provided");
+
            return false;
        }
        log.info("Starting text replacement with {} search terms", allSearchTerms.size());
        for (int sweep = 0; sweep < MAX_SWEEPS; sweep++) {
            processPages(document, allSearchTerms, useRegex, wholeWordSearchBool);
            if (!documentStillContainsTargets(
                    document, allSearchTerms, useRegex, wholeWordSearchBool)) {
-                log.info("SUCCESS: All targets removed after {} sweeps", sweep + 1);
+
                return false;
            }
        }
@ -2142,7 +2043,6 @@ public class RedactionService {
            }
        }
        log.error("FAILURE: Document still contains targets after {} sweeps", MAX_SWEEPS);
        return true;
    }
@ -2267,20 +2167,14 @@ public class RedactionService {
    private List<Object> applyRedactionsToTokens(
            List<Object> tokens, List<TextSegment> textSegments, List<MatchRange> matches) {
        log.debug(
                "Applying redactions to {} tokens with {} matches across {} segments",
                tokens.size(),
                matches.size(),
                textSegments.size());
        List<Object> newTokens = new ArrayList<>(tokens);
        int totalModifications = 0;
        if (this.aggressiveMode) {
-            log.debug("Using aggressive mode for token redaction");
+
            Map<Integer, List<AggressiveSegMatch>> perSeg = this.aggressiveSegMatches;
            if (perSeg != null && !perSeg.isEmpty()) {
-                log.debug("Processing {} aggressive segments", perSeg.size());
+
                List<Integer> segIndices = new ArrayList<>(perSeg.keySet());
                segIndices.sort(
                        (a, b) ->
@ -2294,26 +2188,17 @@ public class RedactionService {
                        continue;
                    }
                    log.debug(
                            "Processing aggressive segment {} with {} matches",
                            segIndex,
                            segMatches.size());
                    Object token = newTokens.get(segment.tokenIndex);
                    String opName = segment.operatorName;
                    if (("Tj".equals(opName) || "'".equals(opName) || "\"".equals(opName))
                            && token instanceof COSString cs) {
-                        log.debug(
+
                                "Redacting Tj/TjQuote operator at token index {}",
                                segment.tokenIndex);
                        COSString redacted =
                                redactCosStringByDecodedRanges(segment.font, cs, segMatches);
                        if (segment.font != null && segment.fontSize > 0) {
                            String originalText = getDecodedString(cs, segment.font);
                            String modifiedText = getDecodedString(redacted, segment.font);
-                            log.debug(
+
                                    "Original text: '{}', Modified text: '{}'",
                                    originalText,
                                    modifiedText);
                            float wOrig =
                                    calculateSafeWidth(
                                            originalText, segment.font, segment.fontSize);
@ -2322,7 +2207,7 @@ public class RedactionService {
                                            modifiedText, segment.font, segment.fontSize);
                            float adjustment = wOrig - wMod;
                            if (Math.abs(adjustment) > PRECISION_THRESHOLD) {
-                                log.debug("Applying kerning adjustment: {}", adjustment);
+
                                COSArray arr = new COSArray();
                                arr.add(redacted);
                                float kerning =
@ -2330,30 +2215,25 @@ public class RedactionService {
                                arr.add(new COSFloat(kerning));
                                newTokens.set(segment.tokenIndex, arr);
                                updateOperatorSafely(newTokens, segment.tokenIndex, opName);
                                totalModifications++;
                            } else {
                                newTokens.set(segment.tokenIndex, redacted);
                                totalModifications++;
                            }
                        } else {
                            newTokens.set(segment.tokenIndex, redacted);
                            totalModifications++;
                        }
                    } else if ("TJ".equals(opName) && token instanceof COSArray arr) {
-                        log.debug("Redacting TJ operator at token index {}", segment.tokenIndex);
+
                        COSArray redacted =
                                redactTJArrayByDecodedRanges(segment.font, arr, segMatches);
                        COSArray withKerning = buildKerningAdjustedTJArray(arr, redacted, segment);
                        newTokens.set(segment.tokenIndex, withKerning);
                        totalModifications++;
                    }
                }
-                log.info("Aggressive mode completed - {} modifications made", totalModifications);
+
                return newTokens;
            }
        }
        log.debug("Using moderate mode for token redaction");
        Map<Integer, List<MatchRange>> matchesBySegment = new HashMap<>();
        for (MatchRange match : matches) {
            for (int i = 0; i < textSegments.size(); i++) {
@ -2366,10 +2246,7 @@ public class RedactionService {
            }
        }
-        log.debug("Matches distributed across {} segments", matchesBySegment.size());
+        // removed noop forEach
        matchesBySegment.forEach(
                (segIdx, matchList) ->
                        log.debug("Segment {}: {} matches", segIdx, matchList.size()));
        List<ModificationTask> tasks = new ArrayList<>();
        for (Map.Entry<Integer, List<MatchRange>> entry : matchesBySegment.entrySet()) {
@ -2377,12 +2254,12 @@ public class RedactionService {
            List<MatchRange> segmentMatches = entry.getValue();
            if (segmentIndex < 0 || segmentIndex >= textSegments.size()) {
-                log.warn("Invalid segment index: {}", segmentIndex);
+
                continue;
            }
            TextSegment segment = textSegments.get(segmentIndex);
            if (segment == null) {
-                log.warn("Null segment at index: {}", segmentIndex);
+
                continue;
            }
@ -2390,39 +2267,24 @@ public class RedactionService {
                if ("Tj".equals(segment.operatorName)
                        || "'".equals(segment.operatorName)
                        || "\"".equals(segment.operatorName)) {
-                    log.debug(
+
                            "Creating modification task for Tj operator at segment {}",
                            segmentIndex);
                    String newText = applyRedactionsToSegmentText(segment, segmentMatches);
                    if (newText == null) newText = "";
                    float adjustment = calculateWidthAdjustment(segment, segmentMatches);
                    tasks.add(new ModificationTask(segment, newText, adjustment));
-                    log.debug(
+
                            "Task created: original='{}', new='{}', adjustment={}",
                            segment.getText(),
                            newText,
                            adjustment);
                } else if ("TJ".equals(segment.operatorName)) {
-                    log.debug(
+
                            "Creating modification task for TJ operator at segment {}",
                            segmentIndex);
                    tasks.add(new ModificationTask(segment, "", 0));
                }
            } catch (Exception e) {
-                log.error(
+
                        "Error creating modification task for segment {}: {}",
                        segmentIndex,
                        e.getMessage());
            }
        }
        log.info("Created {} modification tasks", tasks.size());
        tasks.sort((a, b) -> Integer.compare(b.segment.tokenIndex, a.segment.tokenIndex));
        int maxTasksToProcess = Math.min(tasks.size(), 1000);
        log.debug("Processing {} out of {} tasks (limit: 1000)", maxTasksToProcess, tasks.size());
        int successfulModifications = 0;
        for (int i = 0; i < maxTasksToProcess && i < tasks.size(); i++) {
            ModificationTask task = tasks.get(i);
            try {
@ -2431,39 +2293,22 @@ public class RedactionService {
                                textSegments.indexOf(task.segment), Collections.emptyList());
                if (task.segment.tokenIndex >= newTokens.size()) {
-                    log.warn(
+
                            "Token index {} out of bounds (size: {})",
                            task.segment.tokenIndex,
                            newTokens.size());
                    continue;
                }
                if (task.segment.getText() == null || task.segment.getText().isEmpty()) {
-                    log.debug("Skipping empty text segment at index {}", task.segment.tokenIndex);
+
                    continue;
                }
                log.debug(
                        "Applying redaction to token {}: '{}' -> '{}'",
                        task.segment.tokenIndex,
                        task.segment.getText(),
                        task.newText);
                modifyTokenForRedaction(
                        newTokens, task.segment, task.newText, task.adjustment, segmentMatches);
                successfulModifications++;
                totalModifications++;
            } catch (Exception e) {
-                log.error("Error applying redaction to task {}: {}", i, e.getMessage());
+
            }
        }
        log.info(
                "Redaction completed - {} successful modifications out of {} tasks",
                successfulModifications,
                tasks.size());
        log.info("Total modifications made: {}", totalModifications);
        return newTokens;
    }
@ -2981,6 +2826,24 @@ public class RedactionService {
        }
    }
    private void addSpacingAdjustment(
            COSArray array, TextSegment segment, String originalText, String modifiedText) {
        try {
            if (array == null || segment == null || segment.getFont() == null) return;
            if (Objects.equals(originalText, modifiedText)) return;
            float wOrig =
                    calculateSafeWidth(originalText, segment.getFont(), segment.getFontSize());
            float wMod = calculateSafeWidth(modifiedText, segment.getFont(), segment.getFontSize());
            float adjustment = wOrig - wMod;
            if (Math.abs(adjustment) <= PRECISION_THRESHOLD) return;
            float kerning = (-adjustment / segment.getFontSize()) * FONT_SCALE_FACTOR;
            array.add(new COSFloat(kerning));
        } catch (Exception ignored) {
        }
    }
    private void wipeAllTextInXObjects(PDDocument document, PDResources resources) {
        try {
            for (COSName xobjName : resources.getXObjectNames()) {
@ -3033,14 +2896,7 @@ public class RedactionService {
            FallbackStrategy fontStrategy)
            throws IOException {
        log.info(
                "Starting enhanced redaction with {} targets and {} scrub options",
                targetText.length,
                scrubOptions.size());
        byte[] result = redactPdfWithSemanticScrubbing(request, scrubOptions);
        log.info("Enhanced redaction completed successfully");
        return result;
    }
@ -3055,8 +2911,7 @@ public class RedactionService {
            for (int i = 0; i < text.length(); ) {
                int codePoint = text.codePointAt(i);
                if (!probe.hasGlyph(codePoint)) {
-                    log.debug(
+
                            "Font {} missing glyph for code point: {}", font.getName(), codePoint);
                    return false;
                }
                i += Character.charCount(codePoint);
@ -3064,7 +2919,7 @@ public class RedactionService {
            return true;
        } catch (Exception e) {
-            log.debug("Error validating font coverage", e);
+
            return false;
        }
    }
@ -3197,12 +3052,12 @@ public class RedactionService {
                                coverage.add(cid);
                            }
                        } catch (Exception e) {
-                            // Glyph not available
+
                        }
                    }
                }
            } catch (Exception e) {
-                log.debug("Could not build glyph coverage for font: {}", font.getName(), e);
+
            }
            return coverage;
        }
@ -3228,7 +3083,7 @@ public class RedactionService {
                    String charStr = new String(Character.toChars(codePoint));
                    return font.getStringWidth(charStr) / FONT_SCALE_FACTOR * fontSize;
                } catch (Exception e) {
-                    // Fall through
+
                }
            }
            return switch (strategy) {
@ -3266,7 +3121,7 @@ public class RedactionService {
                            validChars++;
                        }
                    } catch (Exception e) {
-                        // Skip
+
                    }
                }
@ -3298,8 +3153,6 @@ public class RedactionService {
                return;
            }
            log.info("Starting semantic scrub with options: {}", options);
            try {
                scrubStructureTree(document, options);
@ -3309,9 +3162,8 @@ public class RedactionService {
                    scrubAnnotations(document, options);
                }
                log.info("Semantic scrub completed successfully");
            } catch (Exception e) {
-                log.error("Error during semantic scrub", e);
+
            }
        }
@ -3324,7 +3176,7 @@ public class RedactionService {
                    scrubStructureElement(structRoot, options);
                }
            } catch (Exception e) {
-                log.debug("Could not scrub structure tree", e);
+
            }
        }
@ -3394,7 +3246,7 @@ public class RedactionService {
                    }
                }
            } catch (Exception e) {
-                log.debug("Could not scrub annotations", e);
+
            }
        }
    }
--- a/app/core/src/main/resources/templates/security/auto-redact.html
+++ b/app/core/src/main/resources/templates/security/auto-redact.html
@ -210,7 +210,11 @@
                }
        }
-    // Helper to get translated language from data attribute
+    // Language translations map populated by Thymeleaf for available OCR languages
        const languageTranslations = {};
        /*[# th:each="lang : ${languages}"]*/
        languageTranslations['[(${lang})]'] = /*[[#{${'lang.' + lang}}]]*/'[(${lang})]';
        /*[/]*/
        const localeToTesseract = {
            'en': 'eng', 'fr': 'fra', 'de': 'deu', 'es': 'spa', 'it': 'ita', 'pt': 'por', 'ru': 'rus',
@ -221,9 +225,10 @@
        };
        function getTranslatedLanguageName(shortCode) {
-            // Try to find a label with matching code and read its data-lang-name
+            // Use Thymeleaf-provided map; fall back to code when translation missing
-            const label = document.querySelector(`#languages label[for="language-${shortCode}"]`);
+            const name = languageTranslations[shortCode];
-            return (label && (label.dataset.langName || label.textContent)) || shortCode;
+            if (name && !/^\?{2,}.+\?{2,}$/.test(name)) return name;
            return shortCode;
        }
        function prioritizeLanguages() {
@ -235,9 +240,8 @@
                const label = element.querySelector('label');
                if (label) {
                    const langCode = label.getAttribute('for').split('-')[1];
-                    // Display translated name if available
+                    // Always set from translations map; gracefully falls back to code
-                    const translated = label.dataset.langName;
+                    label.textContent = getTranslatedLanguageName(langCode);
                    if (translated) label.textContent = translated;
                }
            });
            const browserLanguage = document.documentElement.lang || navigator.language || navigator.userLanguage;