enhance null checks and improve error handling in RedactionService and auto-redact.html

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-08-25 23:42:15 +02:00
parent e2ac7edad9
commit 5777b8e27e
2 changed files with 232 additions and 58 deletions

View File

@ -15,6 +15,7 @@ import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -89,6 +90,10 @@ public class RedactionService {
private final TempFileManager tempFileManager; private final TempFileManager tempFileManager;
private static List<Object> parseAllTokens(PDFStreamParser parser) throws IOException { private static List<Object> parseAllTokens(PDFStreamParser parser) throws IOException {
if (parser == null) {
return Collections.emptyList();
}
List<Object> tokens = new ArrayList<>(); List<Object> tokens = new ArrayList<>();
Object token; Object token;
while ((token = parser.parseNextToken()) != null) { while ((token = parser.parseNextToken()) != null) {
@ -98,8 +103,16 @@ public class RedactionService {
} }
private static String buildLanguageOption(RedactPdfRequest request) { private static String buildLanguageOption(RedactPdfRequest request) {
List<String> langs = (request != null) ? request.getLanguages() : null; if (request == null) {
return (langs == null || langs.isEmpty()) ? "eng" : String.join("+", langs); return "eng";
}
List<String> langs = request.getLanguages();
if (langs == null || langs.isEmpty()) {
return "eng";
}
return String.join("+", langs);
} }
private static byte[] processWithOcrMyPdfForRestoration( private static byte[] processWithOcrMyPdfForRestoration(
@ -183,17 +196,15 @@ public class RedactionService {
if (originalWord == null || originalWord.isEmpty()) return " "; if (originalWord == null || originalWord.isEmpty()) return " ";
if (font == null || fontSize <= 0) return " ".repeat(originalWord.length()); if (font == null || fontSize <= 0) return " ".repeat(originalWord.length());
// Enhanced font subset handling
if (TextEncodingHelper.isFontSubset(font.getName())) {
return createEnhancedSubsetPlaceholder(originalWord, targetWidth, font, fontSize);
}
if (!WidthCalculator.isWidthCalculationReliable(font))
return " ".repeat(originalWord.length());
final String repeat = " ".repeat(Math.max(1, originalWord.length()));
try { try {
if (TextEncodingHelper.isFontSubset(font.getName())) {
return createEnhancedSubsetPlaceholder(originalWord, targetWidth, font, fontSize);
}
if (!WidthCalculator.isWidthCalculationReliable(font)) {
return " ".repeat(originalWord.length());
}
float spaceWidth = WidthCalculator.calculateAccurateWidth(font, " ", fontSize); float spaceWidth = WidthCalculator.calculateAccurateWidth(font, " ", fontSize);
if (spaceWidth <= 0) { if (spaceWidth <= 0) {
return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize); return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
@ -205,8 +216,9 @@ public class RedactionService {
originalWord.length() * 2, Math.round(targetWidth / spaceWidth * 1.5f)); originalWord.length() * 2, Math.round(targetWidth / spaceWidth * 1.5f));
return " ".repeat(Math.min(spaceCount, maxSpaces)); return " ".repeat(Math.min(spaceCount, maxSpaces));
} catch (Exception e) { } catch (Exception e) {
log.debug("Error creating placeholder with width: {}", e.getMessage());
String result = createAlternativePlaceholder(originalWord, targetWidth, font, fontSize); String result = createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
return result != null ? result : repeat; return result != null ? result : " ".repeat(Math.max(1, originalWord.length()));
} }
} }
@ -304,23 +316,33 @@ public class RedactionService {
String[] parts = pageNumbers.split(","); String[] parts = pageNumbers.split(",");
for (String part : parts) { for (String part : parts) {
String trim = part.trim(); String trimmedPart = part.trim();
if (trim.contains("-")) { if (trimmedPart.isEmpty()) continue;
String[] range = trim.split("-");
if (trimmedPart.contains("-")) {
String[] range = trimmedPart.split("-", 2);
if (range.length == 2) { if (range.length == 2) {
try { try {
int start = Integer.parseInt(range[0].trim()); int start = Integer.parseInt(range[0].trim());
int end = Integer.parseInt(range[1].trim()); int end = Integer.parseInt(range[1].trim());
for (int i = start; i <= end; i++) {
result.add(i); if (start <= end && start > 0 && end > 0) {
for (int i = start; i <= end; i++) {
result.add(i);
}
} }
} catch (NumberFormatException ignored) { } catch (NumberFormatException e) {
log.warn("Invalid page range format: '{}'", trimmedPart);
} }
} }
} else { } else {
try { try {
result.add(Integer.parseInt(trim)); int pageNum = Integer.parseInt(trimmedPart);
} catch (NumberFormatException ignored) { if (pageNum > 0) {
result.add(pageNum);
}
} catch (NumberFormatException e) {
log.warn("Invalid page number: '{}'", trimmedPart);
} }
} }
} }
@ -329,13 +351,19 @@ public class RedactionService {
} }
private static Color decodeOrDefault(String hex) { private static Color decodeOrDefault(String hex) {
if (hex == null) { if (hex == null || hex.trim().isEmpty()) {
return Color.BLACK; return Color.BLACK;
} }
String colorString = (!hex.isEmpty() && hex.charAt(0) == '#') ? hex : "#" + hex;
String colorString = hex.trim();
if (!colorString.startsWith("#")) {
colorString = "#" + colorString;
}
try { try {
return Color.decode(colorString); return Color.decode(colorString);
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
log.warn("Invalid color format '{}', using default black", hex);
return Color.BLACK; return Color.BLACK;
} }
} }
@ -343,18 +371,30 @@ public class RedactionService {
private static void redactFoundText( private static void redactFoundText(
PDDocument document, List<PDFText> blocks, float customPadding, Color redactColor) PDDocument document, List<PDFText> blocks, float customPadding, Color redactColor)
throws IOException { throws IOException {
if (document == null || blocks == null || blocks.isEmpty()) {
return;
}
var allPages = document.getDocumentCatalog().getPages(); var allPages = document.getDocumentCatalog().getPages();
Map<Integer, List<PDFText>> blocksByPage = new HashMap<>(); Map<Integer, List<PDFText>> blocksByPage = new HashMap<>();
for (PDFText block : blocks) { for (PDFText block : blocks) {
blocksByPage.computeIfAbsent(block.getPageIndex(), k -> new ArrayList<>()).add(block); if (block != null && block.getPageIndex() >= 0) {
blocksByPage
.computeIfAbsent(block.getPageIndex(), k -> new ArrayList<>())
.add(block);
}
} }
for (Map.Entry<Integer, List<PDFText>> entry : blocksByPage.entrySet()) { for (Map.Entry<Integer, List<PDFText>> entry : blocksByPage.entrySet()) {
Integer pageIndex = entry.getKey(); Integer pageIndex = entry.getKey();
if (pageIndex >= allPages.getCount()) { if (pageIndex == null || pageIndex >= allPages.getCount()) {
continue; continue;
} }
PDPage page = allPages.get(pageIndex); PDPage page = allPages.get(pageIndex);
List<PDFText> pageBlocks = entry.getValue(); List<PDFText> pageBlocks = entry.getValue();
try (PDPageContentStream cs = try (PDPageContentStream cs =
new PDPageContentStream( new PDPageContentStream(
document, page, PDPageContentStream.AppendMode.APPEND, true, true)) { document, page, PDPageContentStream.AppendMode.APPEND, true, true)) {
@ -362,16 +402,19 @@ public class RedactionService {
try { try {
cs.setNonStrokingColor(redactColor); cs.setNonStrokingColor(redactColor);
PDRectangle pageBox = page.getBBox(); PDRectangle pageBox = page.getBBox();
for (PDFText b : pageBlocks) {
for (PDFText block : pageBlocks) {
if (block == null) continue;
float padding = float padding =
(b.getY2() - b.getY1()) * DEFAULT_TEXT_PADDING_MULTIPLIER (block.getY2() - block.getY1()) * DEFAULT_TEXT_PADDING_MULTIPLIER
+ customPadding; + customPadding;
float width = b.getX2() - b.getX1(); float width = block.getX2() - block.getX1();
cs.addRect( cs.addRect(
b.getX1(), block.getX1(),
pageBox.getHeight() - b.getY2() - padding, pageBox.getHeight() - block.getY2() - padding,
width, width,
b.getY2() - b.getY1() + 2 * padding); block.getY2() - block.getY1() + 2 * padding);
} }
cs.fill(); cs.fill();
} finally { } finally {
@ -383,6 +426,10 @@ public class RedactionService {
static void writeFilteredContentStream(PDDocument document, PDPage page, List<Object> tokens) static void writeFilteredContentStream(PDDocument document, PDPage page, List<Object> tokens)
throws IOException { throws IOException {
if (document == null || page == null || tokens == null) {
throw new IllegalArgumentException("Document, page, and tokens cannot be null");
}
PDStream newStream = new PDStream(document); PDStream newStream = new PDStream(document);
try (var out = newStream.createOutputStream()) { try (var out = newStream.createOutputStream()) {
new ContentStreamWriter(out).writeTokens(tokens); new ContentStreamWriter(out).writeTokens(tokens);
@ -400,6 +447,10 @@ public class RedactionService {
Set<String> targetWords, Set<String> targetWords,
boolean useRegex, boolean useRegex,
boolean wholeWordSearch) { boolean wholeWordSearch) {
if (document == null || targetWords == null || targetWords.isEmpty() || pageIndex < 0) {
return false;
}
try { try {
for (String term : targetWords) { for (String term : targetWords) {
if (term == null || term.isBlank()) continue; if (term == null || term.isBlank()) continue;
@ -417,6 +468,10 @@ public class RedactionService {
} }
return false; return false;
} catch (Exception e) { } catch (Exception e) {
log.warn(
"Error checking if page {} still contains targets: {}",
pageIndex,
e.getMessage());
return true; return true;
} }
} }
@ -426,6 +481,10 @@ public class RedactionService {
Set<String> targetWords, Set<String> targetWords,
boolean useRegex, boolean useRegex,
boolean wholeWordSearch) { boolean wholeWordSearch) {
if (document == null || targetWords == null || targetWords.isEmpty()) {
return false;
}
try { try {
for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) { for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) {
if (pageStillContainsTargets( if (pageStillContainsTargets(
@ -435,21 +494,28 @@ public class RedactionService {
} }
return false; return false;
} catch (Exception e) { } catch (Exception e) {
log.warn("Error checking if document still contains targets: {}", e.getMessage());
return true; return true;
} }
} }
public static Map<Integer, List<PDFText>> findTextToRedact( public static Map<Integer, List<PDFText>> findTextToRedact(
PDDocument document, String[] listOfText, boolean useRegex, boolean wholeWordSearch) { PDDocument document, String[] listOfText, boolean useRegex, boolean wholeWordSearch) {
if (document == null || listOfText == null) {
return Collections.emptyMap();
}
Map<Integer, List<PDFText>> allFoundTextsByPage = new HashMap<>(); Map<Integer, List<PDFText>> allFoundTextsByPage = new HashMap<>();
for (String text : listOfText) { for (String text : listOfText) {
String t = text.trim(); if (text == null) continue;
if (t.isEmpty()) {
String trimmedText = text.trim();
if (trimmedText.isEmpty()) {
continue; continue;
} }
try { try {
TextFinder finder = new TextFinder(t, useRegex, wholeWordSearch); TextFinder finder = new TextFinder(trimmedText, useRegex, wholeWordSearch);
finder.getText(document); finder.getText(document);
List<PDFText> foundTexts = finder.getFoundTexts(); List<PDFText> foundTexts = finder.getFoundTexts();
@ -459,6 +525,7 @@ public class RedactionService {
.add(found); .add(found);
} }
} catch (Exception e) { } catch (Exception e) {
log.warn("Error finding text '{}': {}", trimmedText, e.getMessage());
} }
} }
@ -473,10 +540,19 @@ public class RedactionService {
Boolean convertToImage, Boolean convertToImage,
boolean isTextRemovalMode) boolean isTextRemovalMode)
throws IOException { throws IOException {
List<PDFText> allFoundTexts = new ArrayList<>(); if (document == null) {
for (List<PDFText> pageTexts : allFoundTextsByPage.values()) { throw new IllegalArgumentException("Document cannot be null");
allFoundTexts.addAll(pageTexts);
} }
List<PDFText> allFoundTexts = new ArrayList<>();
if (allFoundTextsByPage != null) {
for (List<PDFText> pageTexts : allFoundTextsByPage.values()) {
if (pageTexts != null) {
allFoundTexts.addAll(pageTexts);
}
}
}
if (!allFoundTexts.isEmpty() && !isTextRemovalMode) { if (!allFoundTexts.isEmpty() && !isTextRemovalMode) {
Color redactColor = decodeOrDefault(colorString); Color redactColor = decodeOrDefault(colorString);
redactFoundText(document, allFoundTexts, customPadding, redactColor); redactFoundText(document, allFoundTexts, customPadding, redactColor);
@ -528,15 +604,24 @@ public class RedactionService {
Set<String> targetWords, Set<String> targetWords,
boolean useRegex, boolean useRegex,
boolean wholeWordSearch) { boolean wholeWordSearch) {
if (completeText == null || targetWords == null || targetWords.isEmpty()) {
return Collections.emptyList();
}
List<Pattern> patterns = List<Pattern> patterns =
TextFinderUtils.createOptimizedSearchPatterns( TextFinderUtils.createOptimizedSearchPatterns(
targetWords, useRegex, wholeWordSearch); targetWords, useRegex, wholeWordSearch);
return patterns.stream() return patterns.stream()
.flatMap( .flatMap(
pattern -> { pattern -> {
try { try {
return pattern.matcher(completeText).results(); return pattern.matcher(completeText).results();
} catch (Exception e) { } catch (Exception e) {
log.debug(
"Error matching pattern '{}': {}",
pattern.pattern(),
e.getMessage());
return java.util.stream.Stream.empty(); return java.util.stream.Stream.empty();
} }
}) })
@ -547,9 +632,16 @@ public class RedactionService {
private static void performFallbackModification( private static void performFallbackModification(
List<Object> tokens, int tokenIndex, String newText) { List<Object> tokens, int tokenIndex, String newText) {
if (tokens == null || tokenIndex < 0 || tokenIndex >= tokens.size() || newText == null) {
return;
}
try { try {
tokens.set(tokenIndex, newText.isEmpty() ? EMPTY_COS_STRING : new COSString(newText)); tokens.set(tokenIndex, newText.isEmpty() ? EMPTY_COS_STRING : new COSString(newText));
} catch (Exception e) { } catch (Exception e) {
log.debug(
"Fallback modification failed, attempting emergency fallback: {}",
e.getMessage());
performEmergencyFallback(tokens, tokenIndex); performEmergencyFallback(tokens, tokenIndex);
} }
} }
@ -675,6 +767,10 @@ public class RedactionService {
private static void writeRedactedContentToXObject( private static void writeRedactedContentToXObject(
PDDocument document, PDFormXObject formXObject, List<Object> redactedTokens) PDDocument document, PDFormXObject formXObject, List<Object> redactedTokens)
throws IOException { throws IOException {
if (document == null || formXObject == null || redactedTokens == null) {
throw new IllegalArgumentException("Document, form XObject, and tokens cannot be null");
}
var cosStream = formXObject.getCOSObject(); var cosStream = formXObject.getCOSObject();
try (var out = cosStream.createOutputStream()) { try (var out = cosStream.createOutputStream()) {
new ContentStreamWriter(out).writeTokens(redactedTokens); new ContentStreamWriter(out).writeTokens(redactedTokens);
@ -791,12 +887,19 @@ public class RedactionService {
} }
private static List<Object> deepCopyTokens(List<Object> original) { private static List<Object> deepCopyTokens(List<Object> original) {
if (original == null) {
return new ArrayList<>();
}
List<Object> copy = new ArrayList<>(original.size()); List<Object> copy = new ArrayList<>(original.size());
for (Object obj : original) { for (Object obj : original) {
if (obj instanceof COSDictionary dict) { if (obj instanceof COSDictionary dict) {
COSDictionary newDict = new COSDictionary(); COSDictionary newDict = new COSDictionary();
for (COSName key : dict.keySet()) { for (COSName key : dict.keySet()) {
newDict.setItem(key, dict.getDictionaryObject(key)); COSBase value = dict.getDictionaryObject(key);
if (value != null) {
newDict.setItem(key, value);
}
} }
copy.add(newDict); copy.add(newDict);
} else if (obj instanceof List<?> nestedList } else if (obj instanceof List<?> nestedList
@ -838,7 +941,7 @@ public class RedactionService {
private static String normalizeTextForRedaction(String text) { private static String normalizeTextForRedaction(String text) {
if (text == null) return null; if (text == null) return null;
StringBuilder normalized = new StringBuilder(); StringBuilder normalized = new StringBuilder(text.length());
for (int i = 0; i < text.length(); i++) { for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i); char c = text.charAt(i);
@ -961,9 +1064,11 @@ public class RedactionService {
} }
private static String sanitizeText(String text) { private static String sanitizeText(String text) {
if (text == null) return ""; if (text == null || text.isEmpty()) {
return "";
}
StringBuilder sanitized = new StringBuilder(); StringBuilder sanitized = new StringBuilder(text.length());
for (char c : text.toCharArray()) { for (char c : text.toCharArray()) {
sanitized.append( sanitized.append(
(Character.isISOControl(c) && c != '\n' && c != '\t' && c != '\r') (Character.isISOControl(c) && c != '\n' && c != '\t' && c != '\r')
@ -1283,6 +1388,10 @@ public class RedactionService {
} }
private static float calculateCharacterSumWidth(PDFont font, String text) { private static float calculateCharacterSumWidth(PDFont font, String text) {
if (font == null || text == null || text.isEmpty()) {
return -1f;
}
float totalWidth = 0f; float totalWidth = 0f;
for (char c : text.toCharArray()) { for (char c : text.toCharArray()) {
try { try {
@ -1295,21 +1404,33 @@ public class RedactionService {
} }
private static boolean isValidTokenIndex(List<Object> tokens, int index) { private static boolean isValidTokenIndex(List<Object> tokens, int index) {
return index >= 0 && index < tokens.size(); return tokens != null && index >= 0 && index < tokens.size();
} }
private static String buildCompleteText(List<TextSegment> segments) { private static String buildCompleteText(List<TextSegment> segments) {
if (segments == null || segments.isEmpty()) {
return "";
}
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for (TextSegment segment : segments) { for (TextSegment segment : segments) {
sb.append(segment.text); if (segment != null && segment.text != null) {
sb.append(segment.text);
}
} }
return sb.toString(); return sb.toString();
} }
private static boolean isProperFontSubset(String fontName) { private static boolean isProperFontSubset(String fontName) {
if (fontName.length() < 7) return false; if (fontName == null || fontName.length() < 7) {
return false;
}
for (int i = 0; i < 6; i++) { for (int i = 0; i < 6; i++) {
if (fontName.charAt(i) < 'A' || fontName.charAt(i) > 'Z') return false; char c = fontName.charAt(i);
if (c < 'A' || c > 'Z') {
return false;
}
} }
return fontName.charAt(6) == '+'; return fontName.charAt(6) == '+';
} }
@ -1341,10 +1462,15 @@ public class RedactionService {
} }
private static void performEmergencyFallback(List<Object> tokens, int tokenIndex) { private static void performEmergencyFallback(List<Object> tokens, int tokenIndex) {
if (tokens == null || tokenIndex < 0 || tokenIndex >= tokens.size()) {
return;
}
try { try {
tokens.set(tokenIndex, EMPTY_COS_STRING); tokens.set(tokenIndex, EMPTY_COS_STRING);
} catch (Exception e) { } catch (Exception e) {
log.error("Emergency fallback failed: {}", e.getMessage()); log.error(
"Emergency fallback failed for token index {}: {}", tokenIndex, e.getMessage());
} }
} }
@ -1380,12 +1506,21 @@ public class RedactionService {
} }
private static boolean hasReliableWidthMetrics(PDFont font) { private static boolean hasReliableWidthMetrics(PDFont font) {
if (font == null) {
return false;
}
try { try {
String testString = "AbCdEf123"; String testString = "AbCdEf123";
float width1 = font.getStringWidth(testString); float width1 = font.getStringWidth(testString);
float width2 = calculateCharacterSumWidth(font, testString); float width2 = calculateCharacterSumWidth(font, testString);
if (width1 <= 0 || width2 <= 0) return false;
return Math.abs(width1 - width2) / Math.max(width1, width2) < 0.05f; if (width1 <= 0 || width2 <= 0) {
return false;
}
float maxWidth = Math.max(width1, width2);
return Math.abs(width1 - width2) / maxWidth < 0.05f;
} catch (Exception e) { } catch (Exception e) {
return false; return false;
} }
@ -1555,8 +1690,15 @@ public class RedactionService {
} }
private static int getActualStringLength(COSString cosString, PDFont font) { private static int getActualStringLength(COSString cosString, PDFont font) {
if (cosString == null) {
return 0;
}
try { try {
if (font == null) return cosString.getString().length(); if (font == null) {
return cosString.getString().length();
}
String decodedText = TextDecodingHelper.tryDecodeWithFont(font, cosString); String decodedText = TextDecodingHelper.tryDecodeWithFont(font, cosString);
return decodedText != null ? decodedText.length() : cosString.getString().length(); return decodedText != null ? decodedText.length() : cosString.getString().length();
} catch (Exception e) { } catch (Exception e) {
@ -1575,7 +1717,10 @@ public class RedactionService {
} }
private static boolean isValidTJArray(COSArray array) { private static boolean isValidTJArray(COSArray array) {
if (array == null || array.size() == 0) return false; if (array == null || array.size() == 0) {
return false;
}
for (COSBase element : array) { for (COSBase element : array) {
if (!(element instanceof COSString) && !(element instanceof COSNumber)) { if (!(element instanceof COSString) && !(element instanceof COSNumber)) {
return false; return false;
@ -1746,14 +1891,24 @@ public class RedactionService {
String[] listOfText, String[] listOfText,
boolean useRegex, boolean useRegex,
boolean wholeWordSearchBool) { boolean wholeWordSearchBool) {
if (allFoundTextsByPage.isEmpty()) return; if (document == null
|| allFoundTextsByPage == null
|| allFoundTextsByPage.isEmpty()
|| listOfText == null) {
return;
}
Set<String> allSearchTerms = Set<String> allSearchTerms =
Arrays.stream(listOfText) Arrays.stream(listOfText)
.filter(Objects::nonNull)
.map(String::trim) .map(String::trim)
.filter(s -> !s.isEmpty()) .filter(s -> !s.isEmpty())
.collect(Collectors.toSet()); .collect(Collectors.toSet());
if (allSearchTerms.isEmpty()) {
return;
}
this.aggressiveMode = true; this.aggressiveMode = true;
this.aggressiveSegMatches = new HashMap<>(); this.aggressiveSegMatches = new HashMap<>();
@ -1783,7 +1938,11 @@ public class RedactionService {
anyResidual = true; anyResidual = true;
processResidualText(document, page, filtered); processResidualText(document, page, filtered);
} }
} catch (Exception ignored) { } catch (Exception e) {
log.warn(
"Error processing page {} in aggressive mode: {}",
pageIndex,
e.getMessage());
} }
} }
@ -1938,17 +2097,26 @@ public class RedactionService {
String[] listOfText, String[] listOfText,
boolean useRegex, boolean useRegex,
boolean wholeWordSearchBool) { boolean wholeWordSearchBool) {
if (allFoundTextsByPage.isEmpty()) { if (document == null
log.info("No text found to redact"); || allFoundTextsByPage == null
|| allFoundTextsByPage.isEmpty()
|| listOfText == null) {
log.info("No text found to redact or invalid input parameters");
return false; return false;
} }
Set<String> allSearchTerms = Set<String> allSearchTerms =
Arrays.stream(listOfText) Arrays.stream(listOfText)
.filter(Objects::nonNull)
.map(String::trim) .map(String::trim)
.filter(s -> !s.isEmpty()) .filter(s -> !s.isEmpty())
.collect(Collectors.toSet()); .collect(Collectors.toSet());
if (allSearchTerms.isEmpty()) {
log.info("No valid search terms provided");
return false;
}
log.info("Starting text replacement with {} search terms", allSearchTerms.size()); log.info("Starting text replacement with {} search terms", allSearchTerms.size());
for (int sweep = 0; sweep < MAX_SWEEPS; sweep++) { for (int sweep = 0; sweep < MAX_SWEEPS; sweep++) {
@ -1961,7 +2129,6 @@ public class RedactionService {
} }
} }
// Verification attempts
for (int attempt = 0; attempt < 3; attempt++) { for (int attempt = 0; attempt < 3; attempt++) {
if (!documentStillContainsTargets( if (!documentStillContainsTargets(
document, allSearchTerms, useRegex, wholeWordSearchBool)) { document, allSearchTerms, useRegex, wholeWordSearchBool)) {
@ -2063,6 +2230,10 @@ public class RedactionService {
Set<String> allSearchTerms, Set<String> allSearchTerms,
boolean useRegex, boolean useRegex,
boolean wholeWordSearchBool) { boolean wholeWordSearchBool) {
if (document == null || allSearchTerms == null || allSearchTerms.isEmpty()) {
return;
}
for (PDPage page : document.getPages()) { for (PDPage page : document.getPages()) {
try { try {
List<Object> filtered = List<Object> filtered =

View File

@ -73,7 +73,7 @@
<div class="redaction-options-group"> <div class="redaction-options-group">
<label class="form-label fw-bold mb-3" th:text="#{autoRedact.redactionStyleLabel}"></label> <label class="form-label fw-bold mb-3" th:text="#{autoRedact.redactionStyleLabel}"></label>
<div class="form-check mb-2"> <div class="form-check mb-2">
<input aria-describedby="visual-desc" class="form-check-input" id="visualImage" name="redactionMode" type="radio" value="visual"> <input aria-describedby="visual-desc" checked class="form-check-input" id="visualImage" name="redactionMode" type="radio" value="visual">
<label class="form-check-label" for="visualImage" th:text="#{autoRedact.visualRedactionLabel}">Visual</label> <label class="form-check-label" for="visualImage" th:text="#{autoRedact.visualRedactionLabel}">Visual</label>
<small class="form-text text-muted d-block mt-1" id="visual-desc" th:text="#{autoRedact.visualRedactionDescription}">Converts to image with visual redactions for maximum security.</small> <small class="form-text text-muted d-block mt-1" id="visual-desc" th:text="#{autoRedact.visualRedactionDescription}">Converts to image with visual redactions for maximum security.</small>
</div> </div>
@ -83,7 +83,7 @@
<small class="form-text text-muted d-block mt-1" id="delete-desc" th:text="#{autoRedact.deleteTextDescription}">Removes the text completely. This may alter the original layout or leave a gap.</small> <small class="form-text text-muted d-block mt-1" id="delete-desc" th:text="#{autoRedact.deleteTextDescription}">Removes the text completely. This may alter the original layout or leave a gap.</small>
</div> </div>
<div class="form-check mb-3"> <div class="form-check mb-3">
<input aria-describedby="keep-desc" checked class="form-check-input" id="keepLayout" name="redactionMode" type="radio" value="moderate"> <input aria-describedby="keep-desc" class="form-check-input" id="keepLayout" name="redactionMode" type="radio" value="moderate">
<label class="form-check-label" for="keepLayout" th:text="#{autoRedact.keepLayoutLabel}">Keep Layout</label> <label class="form-check-label" for="keepLayout" th:text="#{autoRedact.keepLayoutLabel}">Keep Layout</label>
<small class="form-text text-muted d-block mt-1" id="keep-desc" th:text="#{autoRedact.keepLayoutDescription}">Covers text with a redaction box, preserving the page's original design.</small> <small class="form-text text-muted d-block mt-1" id="keep-desc" th:text="#{autoRedact.keepLayoutDescription}">Covers text with a redaction box, preserving the page's original design.</small>
</div> </div>
@ -126,7 +126,7 @@
<label class="form-label" for="languages">OCR Languages</label> <label class="form-label" for="languages">OCR Languages</label>
<div id="languages"> <div id="languages">
<div class="form-check" th:each="language, iterStat : ${languages}"> <div class="form-check" th:each="language, iterStat : ${languages}">
<input onchange="handleLangSelection()" required th:id="${'language-' + language}" th:name="languages" th:value="${language}" type="checkbox" /> <input onchange="handleLangSelection()" required th:checked="${language == 'eng'}" th:id="${'language-' + language}" th:name="languages" th:value="${language}" type="checkbox" />
<label th:for="${'language-' + language}" th:text="${language}"></label> <label th:for="${'language-' + language}" th:text="${language}"></label>
</div> </div>
</div> </div>
@ -275,6 +275,9 @@
// Initialize language list ordering & labels // Initialize language list ordering & labels
prioritizeLanguages(); prioritizeLanguages();
// Handle pre-selected English language
handleLangSelection();
}); });
</script> </script>
</body> </body>