enhance redaction options and implement visual redaction with OCR restoration

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-08-25 19:31:00 +02:00
parent 7db58ad6dd
commit f236505cae
4 changed files with 623 additions and 448 deletions

View File

@ -6,11 +6,13 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.stereotype.Service;
import stirling.software.SPDF.model.PDFText; import stirling.software.SPDF.model.PDFText;
import stirling.software.SPDF.model.api.security.RedactPdfRequest; import stirling.software.SPDF.model.api.security.RedactPdfRequest;
import stirling.software.common.service.CustomPDFDocumentFactory; import stirling.software.common.service.CustomPDFDocumentFactory;
@Service
class AggressiveRedactionService implements RedactionModeStrategy { class AggressiveRedactionService implements RedactionModeStrategy {
private final CustomPDFDocumentFactory pdfDocumentFactory; private final CustomPDFDocumentFactory pdfDocumentFactory;
@ -49,16 +51,9 @@ class AggressiveRedactionService implements RedactionModeStrategy {
? "#000000" ? "#000000"
: request.getRedactColor(); : request.getRedactColor();
if (residualExists) { if (residualExists) {
fb = pdfDocumentFactory.load(request.getFileInput()); // Use the new visual redaction with OCR restoration fallback
Map<Integer, List<PDFText>> fbFound = return helper.performVisualRedactionWithOcrRestoration(
RedactionService.findTextToRedact(fb, listOfText, useRegex, wholeWord); request, listOfText, useRegex, wholeWord);
return RedactionService.finalizeRedaction(
fb,
fbFound,
effectiveColor,
request.getCustomPadding(), /*force*/
true,
false);
} }
return RedactionService.finalizeRedaction( return RedactionService.finalizeRedaction(
doc, doc,

View File

@ -6,11 +6,13 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.stereotype.Service;
import stirling.software.SPDF.model.PDFText; import stirling.software.SPDF.model.PDFText;
import stirling.software.SPDF.model.api.security.RedactPdfRequest; import stirling.software.SPDF.model.api.security.RedactPdfRequest;
import stirling.software.common.service.CustomPDFDocumentFactory; import stirling.software.common.service.CustomPDFDocumentFactory;
@Service
class ModerateRedactionService implements RedactionModeStrategy { class ModerateRedactionService implements RedactionModeStrategy {
private final CustomPDFDocumentFactory pdfDocumentFactory; private final CustomPDFDocumentFactory pdfDocumentFactory;
@ -46,17 +48,9 @@ class ModerateRedactionService implements RedactionModeStrategy {
? "#000000" ? "#000000"
: request.getRedactColor(); : request.getRedactColor();
if (fallbackToBoxOnly) { if (fallbackToBoxOnly) {
fallback = pdfDocumentFactory.load(request.getFileInput()); // Use the new visual redaction with OCR restoration fallback
allFound = return helper.performVisualRedactionWithOcrRestoration(
RedactionService.findTextToRedact( request, listOfText, useRegex, wholeWord);
fallback, listOfText, useRegex, wholeWord);
return RedactionService.finalizeRedaction(
fallback,
allFound,
effectiveColor,
request.getCustomPadding(),
request.getConvertPDFToImage(),
false);
} }
return RedactionService.finalizeRedaction( return RedactionService.finalizeRedaction(
doc, doc,

View File

@ -1,6 +1,7 @@
package stirling.software.SPDF.service; package stirling.software.SPDF.service;
import java.awt.Color; import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
@ -17,6 +18,8 @@ import java.util.Set;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import javax.imageio.ImageIO;
import org.apache.pdfbox.contentstream.operator.Operator; import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSBase;
@ -25,6 +28,7 @@ import org.apache.pdfbox.cos.COSFloat;
import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter; import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
@ -38,6 +42,7 @@ import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.graphics.PDXObject; import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern; import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
@ -58,6 +63,11 @@ import stirling.software.SPDF.utils.text.WidthCalculator;
import stirling.software.common.model.api.security.RedactionArea; import stirling.software.common.model.api.security.RedactionArea;
import stirling.software.common.service.CustomPDFDocumentFactory; import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.PdfUtils; import stirling.software.common.util.PdfUtils;
import stirling.software.common.util.ProcessExecutor;
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
import stirling.software.common.util.TempDirectory;
import stirling.software.common.util.TempFile;
import stirling.software.common.util.TempFileManager;
@Service @Service
@Slf4j @Slf4j
@ -70,10 +80,11 @@ public class RedactionService {
private static final int FONT_SCALE_FACTOR = 1000; private static final int FONT_SCALE_FACTOR = 1000;
private static final Set<String> TEXT_SHOWING_OPERATORS = Set.of("Tj", "TJ", "'", "\""); private static final Set<String> TEXT_SHOWING_OPERATORS = Set.of("Tj", "TJ", "'", "\"");
private static final COSString EMPTY_COS_STRING = new COSString(""); private static final COSString EMPTY_COS_STRING = new COSString("");
private static final int MAX_SWEEPS = 5; private static final int MAX_SWEEPS = 3;
private boolean aggressiveMode = false; private boolean aggressiveMode = false;
private Map<Integer, List<AggressiveSegMatch>> aggressiveSegMatches = null; private Map<Integer, List<AggressiveSegMatch>> aggressiveSegMatches = null;
private final CustomPDFDocumentFactory pdfDocumentFactory; private final CustomPDFDocumentFactory pdfDocumentFactory;
private final TempFileManager tempFileManager;
private static void redactAreas( private static void redactAreas(
List<RedactionArea> redactionAreas, PDDocument document, PDPageTree allPages) List<RedactionArea> redactionAreas, PDDocument document, PDPageTree allPages)
@ -98,7 +109,7 @@ public class RedactionService {
} }
for (Map.Entry<Integer, List<RedactionArea>> entry : redactionsByPage.entrySet()) { for (Map.Entry<Integer, List<RedactionArea>> entry : redactionsByPage.entrySet()) {
Integer pageNumber = entry.getKey(); int pageNumber = entry.getKey();
List<RedactionArea> areasForPage = entry.getValue(); List<RedactionArea> areasForPage = entry.getValue();
if (pageNumber > allPages.getCount()) { if (pageNumber > allPages.getCount()) {
continue; continue;
@ -110,14 +121,13 @@ public class RedactionService {
document, page, PDPageContentStream.AppendMode.APPEND, true, true)) { document, page, PDPageContentStream.AppendMode.APPEND, true, true)) {
contentStream.saveGraphicsState(); contentStream.saveGraphicsState();
for (RedactionArea redactionArea : areasForPage) { for (RedactionArea redactionArea : areasForPage) {
Color redactColor = decodeOrDefault(redactionArea.getColor()); contentStream.setNonStrokingColor(decodeOrDefault(redactionArea.getColor()));
contentStream.setNonStrokingColor(redactColor);
float x = redactionArea.getX().floatValue(); float x = redactionArea.getX().floatValue();
float y = redactionArea.getY().floatValue(); float y = redactionArea.getY().floatValue();
float width = redactionArea.getWidth().floatValue(); float width = redactionArea.getWidth().floatValue();
float height = redactionArea.getHeight().floatValue(); float height = redactionArea.getHeight().floatValue();
float pdfY = page.getBBox().getHeight() - y - height; contentStream.addRect(
contentStream.addRect(x, pdfY, width, height); x, page.getBBox().getHeight() - y - height, width, height);
contentStream.fill(); contentStream.fill();
} }
contentStream.restoreGraphicsState(); contentStream.restoreGraphicsState();
@ -133,11 +143,11 @@ public class RedactionService {
List<Integer> pageNumberList = parsePageNumbers(pageNumbers); List<Integer> pageNumberList = parsePageNumbers(pageNumbers);
for (Integer pageNumber : pageNumberList) { for (int pageNumber : pageNumberList) {
if (pageNumber <= 0 || pageNumber > allPages.getCount()) { if (pageNumber <= 0 || pageNumber > allPages.getCount()) {
continue; // Skip invalid page numbers continue;
} }
PDPage page = allPages.get(pageNumber - 1); // Convert to 0-based index PDPage page = allPages.get(pageNumber - 1);
try (PDPageContentStream contentStream = try (PDPageContentStream contentStream =
new PDPageContentStream( new PDPageContentStream(
document, page, PDPageContentStream.AppendMode.APPEND, true, true)) { document, page, PDPageContentStream.AppendMode.APPEND, true, true)) {
@ -255,55 +265,28 @@ public class RedactionService {
boolean useRegex, boolean useRegex,
boolean wholeWordSearch) { boolean wholeWordSearch) {
try { try {
log.debug("Checking page {} for {} target words", pageIndex + 1, targetWords.size());
for (String term : targetWords) { for (String term : targetWords) {
if (term == null || term.isBlank()) { if (term == null || term.isBlank()) {
log.debug("Skipping empty/null term");
continue; continue;
} }
log.debug("Searching for term '{}' on page {}", term, pageIndex + 1);
TextFinder finder = new TextFinder(term, useRegex, wholeWordSearch); TextFinder finder = new TextFinder(term, useRegex, wholeWordSearch);
finder.setStartPage(pageIndex + 1); finder.setStartPage(pageIndex + 1);
finder.setEndPage(pageIndex + 1); finder.setEndPage(pageIndex + 1);
finder.getText(document); finder.getText(document);
List<PDFText> foundTexts = finder.getFoundTexts(); List<PDFText> foundTexts = finder.getFoundTexts();
log.debug(
"Found {} instances of '{}' on page {}",
foundTexts.size(),
term,
pageIndex + 1);
for (PDFText ft : foundTexts) { for (PDFText ft : foundTexts) {
if (ft.getPageIndex() == pageIndex) { if (ft.getPageIndex() == pageIndex) {
log.warn(
"FOUND REMAINING TARGET: '{}' on page {} - text content: '{}'",
term,
pageIndex + 1,
ft.getText() != null ? ft.getText() : "null");
return true; return true;
} }
} }
if (!foundTexts.isEmpty()) { if (!foundTexts.isEmpty()) {}
log.debug(
"Found instances but not on target page {} (found on pages: {})",
pageIndex + 1,
foundTexts.stream()
.map(ft -> String.valueOf(ft.getPageIndex() + 1))
.distinct()
.collect(java.util.stream.Collectors.joining(", ")));
}
} }
log.debug("Page {} contains no target words", pageIndex + 1);
return false; return false;
} catch (Exception e) { } catch (Exception e) {
log.error("Error checking page {} for targets: {}", pageIndex + 1, e.getMessage());
log.warn("Due to error, assuming page {} may still contain targets", pageIndex + 1);
return true; return true;
} }
} }
@ -313,28 +296,20 @@ public class RedactionService {
Set<String> targetWords, Set<String> targetWords,
boolean useRegex, boolean useRegex,
boolean wholeWordSearch) { boolean wholeWordSearch) {
log.debug("Verifying if document still contains targets: {}", targetWords);
try { try {
int idx = -1; int idx = -1;
final int numberOfPages = document.getNumberOfPages(); final int numberOfPages = document.getNumberOfPages();
for (int i = 0; i < numberOfPages; i++) { for (int i = 0; i < numberOfPages; i++) {
idx++; idx++;
log.debug("Checking page {} for remaining targets", idx + 1);
if (pageStillContainsTargets( if (pageStillContainsTargets(
document, idx, targetWords, useRegex, wholeWordSearch)) { document, idx, targetWords, useRegex, wholeWordSearch)) {
log.warn("Page {} still contains target words", idx + 1);
return true; return true;
} }
} }
log.info("Document verification completed - no targets found");
return false; return false;
} catch (Exception e) { } catch (Exception e) {
log.error("Error during document verification: {}", e.getMessage());
log.warn("Due to verification error, assuming targets may still exist");
return true; return true;
} }
} }
@ -342,57 +317,26 @@ public class RedactionService {
public static Map<Integer, List<PDFText>> findTextToRedact( public static Map<Integer, List<PDFText>> findTextToRedact(
PDDocument document, String[] listOfText, boolean useRegex, boolean wholeWordSearch) { PDDocument document, String[] listOfText, boolean useRegex, boolean wholeWordSearch) {
Map<Integer, List<PDFText>> allFoundTextsByPage = new HashMap<>(); Map<Integer, List<PDFText>> allFoundTextsByPage = new HashMap<>();
log.info(
"Starting text search with {} terms, useRegex={}, wholeWordSearch={}",
listOfText.length,
useRegex,
wholeWordSearch);
int totalInstancesFound = 0;
for (String text : listOfText) { for (String text : listOfText) {
String t = text.trim(); String t = text.trim();
if (t.isEmpty()) { if (t.isEmpty()) {
log.debug("Skipping empty search term");
continue; continue;
} }
log.info("Searching for term: '{}'", t);
try { try {
TextFinder finder = new TextFinder(t, useRegex, wholeWordSearch); TextFinder finder = new TextFinder(t, useRegex, wholeWordSearch);
finder.getText(document); finder.getText(document);
List<PDFText> foundTexts = finder.getFoundTexts(); List<PDFText> foundTexts = finder.getFoundTexts();
log.info("Found {} instances of '{}' across the document", foundTexts.size(), t);
for (PDFText found : foundTexts) { for (PDFText found : foundTexts) {
allFoundTextsByPage allFoundTextsByPage
.computeIfAbsent(found.getPageIndex(), k -> new ArrayList<>()) .computeIfAbsent(found.getPageIndex(), k -> new ArrayList<>())
.add(found); .add(found);
log.debug(
"Found instance on page {}: '{}'",
found.getPageIndex() + 1,
found.getText() != null ? found.getText() : "null");
totalInstancesFound++;
} }
} catch (Exception e) { } catch (Exception e) {
log.error("Error searching for term '{}': {}", t, e.getMessage());
} }
} }
log.info("Total instances found across all search terms: {}", totalInstancesFound);
log.info(
"Text found on {} pages out of {} total pages",
allFoundTextsByPage.size(),
document.getNumberOfPages());
// Log distribution by page
allFoundTextsByPage.forEach(
(pageIndex, texts) -> {
log.info("Page {}: {} instances", pageIndex + 1, texts.size());
});
return allFoundTextsByPage; return allFoundTextsByPage;
} }
@ -650,6 +594,122 @@ public class RedactionService {
return strategy.redact(request); return strategy.redact(request);
} }
private static boolean isTextSafeForRedaction(String text) {
if (text == null || text.isEmpty()) return true;
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
int codePoint = c;
if (codePoint >= 65488) {
return false;
}
if (Character.isISOControl(c) && c != '\n' && c != '\t' && c != '\r') {
return false;
}
}
return true;
}
private static List<Object> deepCopyTokens(List<Object> original) {
List<Object> copy = new ArrayList<>(original.size());
for (Object obj : original) {
if (obj instanceof COSDictionary dict) {
COSDictionary newDict = new COSDictionary();
for (COSName key : dict.keySet()) {
newDict.setItem(key, dict.getDictionaryObject(key));
}
copy.add(newDict);
} else if (obj instanceof List<?> nestedList
&& !nestedList.isEmpty()
&& nestedList.get(0) instanceof Object) {
try {
@SuppressWarnings("unchecked")
List<Object> objectList = (List<Object>) nestedList;
copy.add(deepCopyTokens(objectList));
} catch (ClassCastException e) {
copy.add(obj);
}
} else {
copy.add(obj);
}
}
return copy;
}
private static TokenModificationResult updateOperatorSafely(
List<Object> tokens, int tokenIndex, String originalOperator) {
try {
int operatorIndex = tokenIndex + 1;
if (isValidTokenIndex(tokens, operatorIndex)
&& tokens.get(operatorIndex) instanceof Operator op
&& op.getName().equals(originalOperator)) {
tokens.set(operatorIndex, Operator.getOperator("TJ"));
}
return TokenModificationResult.success();
} catch (Exception e) {
return TokenModificationResult.success();
}
}
private static WipeResult wipeAllSemanticTextInTokens(List<Object> tokens) {
return wipeAllSemanticTextInTokens(tokens, true);
}
public byte[] performVisualRedactionWithOcrRestoration(
RedactPdfRequest request,
String[] listOfText,
boolean useRegex,
boolean wholeWordSearch)
throws IOException {
PDDocument visualRedactedDoc = null;
try {
visualRedactedDoc = pdfDocumentFactory.load(request.getFileInput());
Map<Integer, List<PDFText>> allFound =
findTextToRedact(visualRedactedDoc, listOfText, useRegex, wholeWordSearch);
String effectiveColor =
(request.getRedactColor() == null || request.getRedactColor().isBlank())
? "#000000"
: request.getRedactColor();
byte[] visualRedactedBytes =
finalizeRedaction(
visualRedactedDoc,
allFound,
effectiveColor,
request.getCustomPadding(),
true,
false);
return performOcrRestoration(visualRedactedBytes, request);
} catch (Exception e) {
throw new IOException(
"Visual redaction with OCR restoration failed: " + e.getMessage(), e);
} finally {
if (visualRedactedDoc != null) {
try {
visualRedactedDoc.close();
} catch (IOException ignore) {
}
}
}
}
private byte[] performOcrRestoration(byte[] redactedPdfBytes, RedactPdfRequest request)
throws IOException, InterruptedException {
try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf");
TempFile tempOutputFile = new TempFile(tempFileManager, ".pdf")) {
java.nio.file.Files.write(tempInputFile.getPath(), redactedPdfBytes);
if (isOcrMyPdfAvailable()) {
return processWithOcrMyPdfForRestoration(
tempInputFile.getPath(), tempOutputFile.getPath(), request);
} else if (isTesseractAvailable()) {
return processWithTesseractForRestoration(
tempInputFile.getPath(), tempOutputFile.getPath(), request);
}
return redactedPdfBytes;
}
}
private static String getDecodedString(COSString cosString, PDFont font) { private static String getDecodedString(COSString cosString, PDFont font) {
try { try {
String decoded = TextDecodingHelper.tryDecodeWithFont(font, cosString); String decoded = TextDecodingHelper.tryDecodeWithFont(font, cosString);
@ -671,7 +731,6 @@ public class RedactionService {
text.getBytes(java.nio.charset.StandardCharsets.UTF_8)); text.getBytes(java.nio.charset.StandardCharsets.UTF_8));
} }
} catch (Exception e) { } catch (Exception e) {
// Fall through to return newString
} }
} }
return newString; return newString;
@ -740,48 +799,26 @@ public class RedactionService {
return normalized.toString(); return normalized.toString();
} }
private static boolean isTextSafeForRedaction(String text) { private boolean isOcrMyPdfAvailable() {
if (text == null || text.isEmpty()) return true;
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
int codePoint = c;
if (codePoint >= 65488) {
return false; // Contains problematic high-range characters
}
if (Character.isISOControl(c) && c != '\n' && c != '\t' && c != '\r') {
return false; // Contains problematic control characters
}
}
return true;
}
private static List<Object> deepCopyTokens(List<Object> original) {
List<Object> copy = new ArrayList<>(original.size());
for (Object obj : original) {
if (obj instanceof COSDictionary dict) {
COSDictionary newDict = new COSDictionary();
for (COSName key : dict.keySet()) {
newDict.setItem(key, dict.getDictionaryObject(key));
}
copy.add(newDict);
} else if (obj instanceof List<?> nestedList
&& !nestedList.isEmpty()
&& nestedList.get(0) instanceof Object) {
try { try {
@SuppressWarnings("unchecked") ProcessExecutorResult result =
List<Object> objectList = (List<Object>) nestedList; ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
copy.add(deepCopyTokens(objectList)); .runCommandWithOutputHandling(Arrays.asList("ocrmypdf", "--version"));
} catch (ClassCastException e) { return result.getRc() == 0;
copy.add(obj); // Fallback to shallow copy if cast fails } catch (Exception e) {
} return false;
} else {
copy.add(obj); // Shallow copy for primitives/operators
} }
} }
return copy;
private boolean isTesseractAvailable() {
try {
ProcessExecutorResult result =
ProcessExecutor.getInstance(ProcessExecutor.Processes.TESSERACT)
.runCommandWithOutputHandling(Arrays.asList("tesseract", "--version"));
return result.getRc() == 0;
} catch (Exception e) {
return false;
}
} }
private static boolean isFontSuitableForWidthCalculation(PDFont font) { private static boolean isFontSuitableForWidthCalculation(PDFont font) {
@ -1027,15 +1064,43 @@ public class RedactionService {
return res; return res;
} }
private float safeGetStringWidth(PDFont font, String text) { private byte[] processWithOcrMyPdfForRestoration(
// Delegate to WidthCalculator; convert from user-space at fontSize=1 to font units java.nio.file.Path inputPath, java.nio.file.Path outputPath, RedactPdfRequest request)
if (font == null || text == null || text.isEmpty()) return 0f; throws IOException, InterruptedException {
try { List<String> command =
float widthAtSize1 = WidthCalculator.calculateAccurateWidth(font, text, 1.0f); Arrays.asList(
return widthAtSize1 * FONT_SCALE_FACTOR; // convert back to font units for callers "ocrmypdf",
} catch (Exception e) { "--verbose",
return 0f; "1",
"--output-type",
"pdf",
"--pdf-renderer",
"sandwich",
"--language",
"eng",
"--optimize",
"0",
"--jpeg-quality",
"100",
"--png-quality",
"9",
"--force-ocr",
"--deskew",
"--clean",
"--clean-final",
inputPath.toString(),
outputPath.toString());
ProcessExecutorResult result =
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
.runCommandWithOutputHandling(command);
if (result.getRc() != 0) {
throw new IOException(
"OCRmyPDF restoration failed with return code: "
+ result.getRc()
+ ". Error: "
+ result.getMessages());
} }
return java.nio.file.Files.readAllBytes(outputPath);
} }
private static boolean removeSemanticProperties(COSDictionary dict, boolean removeTU) { private static boolean removeSemanticProperties(COSDictionary dict, boolean removeTU) {
@ -1166,7 +1231,6 @@ public class RedactionService {
return WidthCalculator.calculateAccurateWidth(font, text, fontSize); return WidthCalculator.calculateAccurateWidth(font, text, fontSize);
} }
} catch (Exception e) { } catch (Exception e) {
// Width calculation failed
} }
return 0f; return 0f;
} }
@ -1235,166 +1299,56 @@ public class RedactionService {
} }
} }
public boolean performTextReplacement( private byte[] processWithTesseractForRestoration(
PDDocument document, java.nio.file.Path inputPath, java.nio.file.Path outputPath, RedactPdfRequest request)
Map<Integer, List<PDFText>> allFoundTextsByPage, throws IOException, InterruptedException {
String[] listOfText, try (TempDirectory tempDir = new TempDirectory(tempFileManager)) {
boolean useRegex, java.io.File tempOutputDir = new java.io.File(tempDir.getPath().toFile(), "output");
boolean wholeWordSearchBool) { java.io.File tempImagesDir = new java.io.File(tempDir.getPath().toFile(), "images");
if (allFoundTextsByPage.isEmpty()) { java.io.File finalOutputFile =
log.info("No text found to redact"); new java.io.File(tempDir.getPath().toFile(), "final_output.pdf");
return false; tempOutputDir.mkdirs();
tempImagesDir.mkdirs();
try (PDDocument document = pdfDocumentFactory.load(inputPath.toFile())) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
int pageCount = document.getNumberOfPages();
PDFMergerUtility merger = new PDFMergerUtility();
merger.setDestinationFileName(finalOutputFile.toString());
for (int pageNum = 0; pageNum < pageCount; pageNum++) {
BufferedImage image = pdfRenderer.renderImageWithDPI(pageNum, 600);
java.io.File imagePath =
new java.io.File(tempImagesDir, "page_" + pageNum + ".png");
ImageIO.write(image, "png", imagePath);
List<String> command =
Arrays.asList(
"tesseract",
imagePath.toString(),
new java.io.File(tempOutputDir, "page_" + pageNum).toString(),
"-l",
"eng",
"--dpi",
"600",
"--psm",
"1",
"pdf");
ProcessExecutorResult result =
ProcessExecutor.getInstance(ProcessExecutor.Processes.TESSERACT)
.runCommandWithOutputHandling(command);
if (result.getRc() != 0) {
throw new IOException(
"Tesseract restoration failed with return code: " + result.getRc());
} }
try { java.io.File pageOutputPath =
Set<String> allSearchTerms = new java.io.File(tempOutputDir, "page_" + pageNum + ".pdf");
Arrays.stream(listOfText) merger.addSource(pageOutputPath);
.map(String::trim)
.filter(s -> !s.isEmpty())
.collect(Collectors.toSet());
log.info(
"Starting text replacement with {} search terms: {}",
allSearchTerms.size(),
allSearchTerms);
log.info("Total pages in document: {}", document.getNumberOfPages());
log.info("Initial text found on {} pages", allFoundTextsByPage.size());
// Count initial instances
int initialTotalInstances =
allFoundTextsByPage.values().stream().mapToInt(List::size).sum();
log.info("Total initial instances to redact: {}", initialTotalInstances);
int finalSweepCount = 0;
for (int sweep = 0; sweep < MAX_SWEEPS; sweep++) {
finalSweepCount = sweep + 1;
log.info("=== Starting sweep {} of {} ===", sweep + 1, MAX_SWEEPS);
int pagesProcessed = 0;
int totalModifications = 0;
for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) {
PDPage page = document.getPages().get(pageIndex);
List<PDFText> pageFoundTexts =
allFoundTextsByPage.getOrDefault(pageIndex, List.of());
log.debug(
"Processing page {} - found {} instances",
pageIndex + 1,
pageFoundTexts.size());
List<Object> filtered =
createTokensWithoutTargetText(
document, page, allSearchTerms, useRegex, wholeWordSearchBool);
writeFilteredContentStream(document, page, filtered);
// Count modifications (rough estimate based on token count difference)
int tokenDiff = Math.abs(filtered.size() - getOriginalTokenCount(page));
totalModifications += tokenDiff;
pagesProcessed++;
log.debug("Page {} - token modifications: {}", pageIndex + 1, tokenDiff);
} }
merger.mergeDocuments(null);
log.info( java.nio.file.Files.copy(
"Sweep {} completed - processed {} pages, total modifications: {}", finalOutputFile.toPath(),
sweep + 1, outputPath,
pagesProcessed, java.nio.file.StandardCopyOption.REPLACE_EXISTING);
totalModifications);
// Check remaining targets
boolean stillContainsTargets =
documentStillContainsTargets(
document, allSearchTerms, useRegex, wholeWordSearchBool);
if (!stillContainsTargets) {
log.info("SUCCESS: All targets removed after {} sweeps", sweep + 1);
break;
} else {
log.warn(
"WARNING: Still contains targets after sweep {} - continuing...",
sweep + 1);
} }
} return java.nio.file.Files.readAllBytes(outputPath);
// Final verification - run multiple times to catch any missed instances
boolean finalCheck = false;
for (int verifyAttempt = 0; verifyAttempt < 3; verifyAttempt++) {
log.info("Final verification attempt {} of 3", verifyAttempt + 1);
finalCheck =
documentStillContainsTargets(
document, allSearchTerms, useRegex, wholeWordSearchBool);
if (!finalCheck) {
log.info(
"Verification attempt {} passed - no targets found", verifyAttempt + 1);
break;
} else {
log.warn("Verification attempt {} found remaining targets", verifyAttempt + 1);
if (verifyAttempt < 2) {
log.info("Performing additional cleanup sweep due to verification failure");
// Try one more sweep
for (PDPage page : document.getPages()) {
List<Object> additionalFiltered =
createTokensWithoutTargetText(
document,
page,
allSearchTerms,
useRegex,
wholeWordSearchBool);
writeFilteredContentStream(document, page, additionalFiltered);
}
}
}
}
if (finalCheck) {
log.error(
"FAILURE: Document still contains targets after {} sweeps and {} verification attempts. Falling back to visual redaction.",
MAX_SWEEPS,
3);
log.error("Remaining search terms: {}", allSearchTerms);
// Log detailed information about what was found
log.error("=== DETAILED FAILURE ANALYSIS ===");
for (int pageIdx = 0; pageIdx < document.getNumberOfPages(); pageIdx++) {
for (String term : allSearchTerms) {
try {
TextFinder finder = new TextFinder(term, useRegex, wholeWordSearchBool);
finder.setStartPage(pageIdx + 1);
finder.setEndPage(pageIdx + 1);
finder.getText(document);
for (PDFText found : finder.getFoundTexts()) {
if (found.getPageIndex() == pageIdx) {
log.error(
"REMAINING: '{}' found on page {} at position ({}, {})",
term,
pageIdx + 1,
found.getX1(),
found.getY1());
}
}
} catch (Exception e) {
log.error(
"Error during failure analysis for term '{}' on page {}: {}",
term,
pageIdx + 1,
e.getMessage());
}
}
}
log.error("=== END FAILURE ANALYSIS ===");
return true; // Return true to indicate fallback needed
} else {
log.info(
"SUCCESS: All text redaction completed successfully after {} sweeps",
finalSweepCount);
return false; // Return false to indicate success
}
} catch (Exception e) {
log.error("Exception during text replacement: {}", e.getMessage(), e);
return true;
} }
} }
@ -1460,22 +1414,16 @@ public class RedactionService {
} }
} }
} catch (Exception e) { } catch (Exception e) {
// Failed to add spacing adjustment
} }
} }
private static TokenModificationResult updateOperatorSafely( private float safeGetStringWidth(PDFont font, String text) {
List<Object> tokens, int tokenIndex, String originalOperator) { if (font == null || text == null || text.isEmpty()) return 0f;
try { try {
int operatorIndex = tokenIndex + 1; float widthAtSize1 = WidthCalculator.calculateAccurateWidth(font, text, 1.0f);
if (isValidTokenIndex(tokens, operatorIndex) return widthAtSize1 * FONT_SCALE_FACTOR;
&& tokens.get(operatorIndex) instanceof Operator op
&& op.getName().equals(originalOperator)) {
tokens.set(operatorIndex, Operator.getOperator("TJ"));
}
return TokenModificationResult.success();
} catch (Exception e) { } catch (Exception e) {
return TokenModificationResult.success(); // Non-critical failure return 0f;
} }
} }
@ -1519,9 +1467,7 @@ public class RedactionService {
float adjustment = wOrig - wMod; float adjustment = wOrig - wMod;
if (Math.abs(adjustment) > PRECISION_THRESHOLD) { if (Math.abs(adjustment) > PRECISION_THRESHOLD) {
float kerning = (-adjustment / segment.getFontSize()) * FONT_SCALE_FACTOR; float kerning = (-adjustment / segment.getFontSize()) * FONT_SCALE_FACTOR;
// If next token is a number, combine; otherwise insert new number
if (i + 1 < size && redactedArray.get(i + 1) instanceof COSNumber num) { if (i + 1 < size && redactedArray.get(i + 1) instanceof COSNumber num) {
// Skip adding the next separately and add combined value
i++; i++;
float combined = num.floatValue() + kerning; float combined = num.floatValue() + kerning;
out.add(new COSFloat(combined)); out.add(new COSFloat(combined));
@ -1661,7 +1607,6 @@ public class RedactionService {
List<TextSegment> textSegments = extractTextSegments(page, tokens, this.aggressiveMode); List<TextSegment> textSegments = extractTextSegments(page, tokens, this.aggressiveMode);
log.debug("Extracted {} text segments from tokens", textSegments.size()); log.debug("Extracted {} text segments from tokens", textSegments.size());
// Log extracted text content for debugging
if (!textSegments.isEmpty()) { if (!textSegments.isEmpty()) {
StringBuilder allText = new StringBuilder(); StringBuilder allText = new StringBuilder();
boolean hasProblematicChars = false; boolean hasProblematicChars = false;
@ -1733,9 +1678,161 @@ public class RedactionService {
return problematicRatio > 0.3; return problematicRatio > 0.3;
} }
private static WipeResult wipeAllSemanticTextInTokens(List<Object> tokens) { public boolean performTextReplacement(
return wipeAllSemanticTextInTokens( PDDocument document,
tokens, true); // Default to removing TU for backward compatibility Map<Integer, List<PDFText>> allFoundTextsByPage,
String[] listOfText,
boolean useRegex,
boolean wholeWordSearchBool) {
if (allFoundTextsByPage.isEmpty()) {
log.info("No text found to redact");
return false;
}
try {
Set<String> allSearchTerms =
Arrays.stream(listOfText)
.map(String::trim)
.filter(s -> !s.isEmpty())
.collect(Collectors.toSet());
log.info(
"Starting text replacement with {} search terms: {}",
allSearchTerms.size(),
allSearchTerms);
log.info("Total pages in document: {}", document.getNumberOfPages());
log.info("Initial text found on {} pages", allFoundTextsByPage.size());
int initialTotalInstances =
allFoundTextsByPage.values().stream().mapToInt(List::size).sum();
log.info("Total initial instances to redact: {}", initialTotalInstances);
int finalSweepCount = 0;
for (int sweep = 0; sweep < MAX_SWEEPS; sweep++) {
finalSweepCount = sweep + 1;
log.info("=== Starting sweep {} of {} ===", sweep + 1, MAX_SWEEPS);
int pagesProcessed = 0;
int totalModifications = 0;
for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) {
PDPage page = document.getPages().get(pageIndex);
List<PDFText> pageFoundTexts =
allFoundTextsByPage.getOrDefault(pageIndex, List.of());
log.debug(
"Processing page {} - found {} instances",
pageIndex + 1,
pageFoundTexts.size());
List<Object> filtered =
createTokensWithoutTargetText(
document, page, allSearchTerms, useRegex, wholeWordSearchBool);
writeFilteredContentStream(document, page, filtered);
int tokenDiff = Math.abs(filtered.size() - getOriginalTokenCount(page));
totalModifications += tokenDiff;
pagesProcessed++;
log.debug("Page {} - token modifications: {}", pageIndex + 1, tokenDiff);
}
log.info(
"Sweep {} completed - processed {} pages, total modifications: {}",
sweep + 1,
pagesProcessed,
totalModifications);
boolean stillContainsTargets =
documentStillContainsTargets(
document, allSearchTerms, useRegex, wholeWordSearchBool);
if (!stillContainsTargets) {
log.info("SUCCESS: All targets removed after {} sweeps", sweep + 1);
break;
} else {
log.warn(
"WARNING: Still contains targets after sweep {} - continuing...",
sweep + 1);
}
}
boolean finalCheck = false;
for (int verifyAttempt = 0; verifyAttempt < 3; verifyAttempt++) {
log.info("Final verification attempt {} of 3", verifyAttempt + 1);
finalCheck =
documentStillContainsTargets(
document, allSearchTerms, useRegex, wholeWordSearchBool);
if (!finalCheck) {
log.info(
"Verification attempt {} passed - no targets found", verifyAttempt + 1);
break;
} else {
log.warn("Verification attempt {} found remaining targets", verifyAttempt + 1);
if (verifyAttempt < 2) {
log.info("Performing additional cleanup sweep due to verification failure");
for (PDPage page : document.getPages()) {
List<Object> additionalFiltered =
createTokensWithoutTargetText(
document,
page,
allSearchTerms,
useRegex,
wholeWordSearchBool);
writeFilteredContentStream(document, page, additionalFiltered);
}
}
}
}
if (finalCheck) {
log.error(
"FAILURE: Document still contains targets after {} sweeps and {} verification attempts. Falling back to visual redaction with OCR restoration.",
MAX_SWEEPS,
3);
log.error("Remaining search terms: {}", allSearchTerms);
log.error("=== DETAILED FAILURE ANALYSIS ===");
for (int pageIdx = 0; pageIdx < document.getNumberOfPages(); pageIdx++) {
for (String term : allSearchTerms) {
try {
TextFinder finder = new TextFinder(term, useRegex, wholeWordSearchBool);
finder.setStartPage(pageIdx + 1);
finder.setEndPage(pageIdx + 1);
finder.getText(document);
for (PDFText found : finder.getFoundTexts()) {
if (found.getPageIndex() == pageIdx) {
log.error(
"REMAINING: '{}' found on page {} at position ({}, {})",
term,
pageIdx + 1,
found.getX1(),
found.getY1());
}
}
} catch (Exception e) {
log.error(
"Error during failure analysis for term '{}' on page {}: {}",
term,
pageIdx + 1,
e.getMessage());
}
}
}
log.error("=== END FAILURE ANALYSIS ===");
return true;
} else {
log.info(
"SUCCESS: All text redaction completed successfully after {} sweeps",
finalSweepCount);
return false;
}
} catch (Exception e) {
log.error("Exception during text replacement: {}", e.getMessage(), e);
return true;
}
} }
private COSArray createRedactedTJArray( private COSArray createRedactedTJArray(
@ -1905,7 +2002,6 @@ public class RedactionService {
log.warn("3. Search terms not matching extracted text"); log.warn("3. Search terms not matching extracted text");
log.warn("4. Whole word search filtering out matches"); log.warn("4. Whole word search filtering out matches");
// Log some debugging info
if (!segments.isEmpty()) { if (!segments.isEmpty()) {
log.warn("Sample segment text: '{}'", segments.get(0).getText()); log.warn("Sample segment text: '{}'", segments.get(0).getText());
log.warn("Target words: {}", targetWords); log.warn("Target words: {}", targetWords);
@ -2010,7 +2106,6 @@ public class RedactionService {
log.debug("Redacting TJ operator at token index {}", segment.tokenIndex); log.debug("Redacting TJ operator at token index {}", segment.tokenIndex);
COSArray redacted = COSArray redacted =
redactTJArrayByDecodedRanges(segment.font, arr, segMatches); redactTJArrayByDecodedRanges(segment.font, arr, segMatches);
// Inject kerning adjustments per string element to preserve layout
COSArray withKerning = buildKerningAdjustedTJArray(arr, redacted, segment); COSArray withKerning = buildKerningAdjustedTJArray(arr, redacted, segment);
newTokens.set(segment.tokenIndex, withKerning); newTokens.set(segment.tokenIndex, withKerning);
totalModifications++; totalModifications++;
@ -2529,7 +2624,6 @@ public class RedactionService {
try { try {
performEmergencyFallback(tokens, segment.tokenIndex); performEmergencyFallback(tokens, segment.tokenIndex);
} catch (Exception emergencyError) { } catch (Exception emergencyError) {
// Final fallback failed - continue processing
} }
} }
} }
@ -2562,7 +2656,7 @@ public class RedactionService {
if (!this.aggressiveMode if (!this.aggressiveMode
&& segment.getFont() != null && segment.getFont() != null
&& !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), originalText)) { && !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), originalText)) {
newArray.add(cosString); // Keep original COSString to preserve encoding newArray.add(cosString);
return; return;
} }
@ -2572,7 +2666,7 @@ public class RedactionService {
List<MatchRange> sortedMatches = List<MatchRange> sortedMatches =
matches.stream().sorted(Comparator.comparingInt(MatchRange::getStartPos)).toList(); matches.stream().sorted(Comparator.comparingInt(MatchRange::getStartPos)).toList();
int cumulativeOffset = 0; // Track cumulative text changes int cumulativeOffset = 0;
for (MatchRange match : sortedMatches) { for (MatchRange match : sortedMatches) {
int stringStartInPage = segment.getStartPos() + textOffsetInSegment; int stringStartInPage = segment.getStartPos() + textOffsetInSegment;
@ -2668,7 +2762,7 @@ public class RedactionService {
} }
private int wipeAllTextInResources(PDDocument document, PDResources resources) { private int wipeAllTextInResources(PDDocument document, PDResources resources) {
int totalMods = 0; // aggregated but currently not returned to caller int totalMods = 0;
try { try {
totalMods += wipeAllSemanticTextInProperties(resources); totalMods += wipeAllSemanticTextInProperties(resources);
for (COSName xobjName : resources.getXObjectNames()) { for (COSName xobjName : resources.getXObjectNames()) {
@ -2685,7 +2779,6 @@ public class RedactionService {
return totalMods; return totalMods;
} }
// Helper classes
private record WidthMeasurement(float width, boolean valid) { private record WidthMeasurement(float width, boolean valid) {
public static WidthMeasurement invalid() { public static WidthMeasurement invalid() {

View File

@ -4,6 +4,53 @@
<head> <head>
<th:block th:insert="~{fragments/common :: head(title=#{autoRedact.title}, header=#{autoRedact.header})}"></th:block> <th:block th:insert="~{fragments/common :: head(title=#{autoRedact.title}, header=#{autoRedact.header})}"></th:block>
<style>
.redaction-options-group {
margin-bottom: 1rem;
}
.form-text.text-muted {
color: #6c757d !important;
}
.btn-primary:focus {
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
outline: 2px solid #0d6efd;
outline-offset: 2px;
}
.form-check-input:focus {
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
outline: 2px solid #0d6efd;
outline-offset: 2px;
}
.form-control:focus, .form-select:focus {
border-color: #0d6efd;
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
outline: 2px solid #0d6efd;
outline-offset: 2px;
}
.form-check-input:checked {
background-color: #0d6efd;
border-color: #0d6efd;
}
.sr-only {
position: absolute;
width: 1px;
height: 1px;
padding: 0;
margin: -1px;
overflow: hidden;
clip: rect(0, 0, 0, 0);
white-space: nowrap;
border: 0;
}
</style>
</head> </head>
<body> <body>
@ -18,35 +65,112 @@
<svg class="material-symbols-rounded tool-header-icon security"> <svg class="material-symbols-rounded tool-header-icon security">
<use xlink:href="/images/redact-auto.svg#icon-redact-auto"></use> <use xlink:href="/images/redact-auto.svg#icon-redact-auto"></use>
</svg> </svg>
<span class="tool-header-text" th:text="#{autoRedact.header}"></span> <span class="tool-header-text" id="form-title" th:text="#{autoRedact.header}"></span>
</div> </div>
<form enctype="multipart/form-data" id="autoRedactForm" method="post" th:action="@{'api/v1/security/auto-redact'}"> <form aria-labelledby="form-title" enctype="multipart/form-data" id="autoRedactForm"
method="post" th:action="@{'api/v1/security/auto-redact'}">
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multipleInputsForSingleRequest=false, disableMultipleFiles=true, accept='application/pdf')}"></div> <div th:replace="~{fragments/common :: fileSelector(name='fileInput', multipleInputsForSingleRequest=false, disableMultipleFiles=true, accept='application/pdf')}"></div>
<div class="mb-3"> <div class="mb-3">
<label for="listOfText" class="form-label" th:text="#{autoRedact.textsToRedactLabel}"></label> <label class="form-label" for="listOfText" th:text="#{autoRedact.textsToRedactLabel}"></label>
<textarea class="form-control" id="listOfText" name="listOfText" rows="4" required <textarea class="form-control" id="listOfText" name="listOfText" required rows="4"
th:placeholder="#{autoRedact.textsToRedactPlaceholder}"></textarea> th:placeholder="#{autoRedact.textsToRedactPlaceholder}"></textarea>
</div> </div>
<div class="mb-3"> <div class="mb-3">
<label for="defaultColor" class="form-label" th:text="#{autoRedact.colorLabel}">Color</label> <div class="form-check">
<select class="form-control" id="defaultColor" name="defaultColor" <input class="form-check-input" id="useRegex" name="useRegex" type="checkbox">
onchange="handleColorChange(this.value)"> <label class="form-check-label" for="useRegex" th:text="#{autoRedact.useRegexLabel}"></label>
<option value="#000000" th:text="#{black}">Black</option> </div>
<option value="#FFFFFF" th:text="#{white}">White</option> <div class="form-check">
<option value="#FF0000" th:text="#{red}">Red</option> <input class="form-check-input" id="wholeWordSearch" name="wholeWordSearch" type="checkbox">
<option value="#00FF00" th:text="#{green}">Green</option> <label class="form-check-label" for="wholeWordSearch" th:text="#{autoRedact.wholeWordSearchLabel}"></label>
<option value="#0000FF" th:text="#{blue}">Blue</option> </div>
<option value="custom" th:text="#{custom}">Custom...</option> </div>
<div class="redaction-options-group">
<label class="form-label fw-bold mb-3">Redaction style</label>
<div class="form-check mb-2">
<input aria-describedby="visual-desc" class="form-check-input" id="visualImage" name="redactionMode" type="radio" value="visual">
<label class="form-check-label" for="visualImage">Visual</label>
<small class="form-text text-muted d-block mt-1" id="visual-desc">Converts to image with visual redactions for maximum security.</small>
</div>
<div class="form-check mb-2">
<input aria-describedby="delete-desc" class="form-check-input" id="deleteText" name="redactionMode" type="radio" value="aggressive">
<label class="form-check-label" for="deleteText">Delete Text</label>
<small class="form-text text-muted d-block mt-1" id="delete-desc">Removes the text completely. This may alter the original layout or leave a gap.</small>
</div>
<div class="form-check mb-3">
<input aria-describedby="keep-desc" checked class="form-check-input" id="keepLayout" name="redactionMode" type="radio" value="moderate">
<label class="form-check-label" for="keepLayout">Keep Layout</label>
<small class="form-text text-muted d-block mt-1" id="keep-desc">Covers text with a redaction box, preserving the page's original design.</small>
</div>
<div class="form-check">
<input aria-describedby="guarantee-desc" class="form-check-input" id="guaranteeRedaction" name="convertPDFToImage" type="checkbox">
<label class="form-check-label" for="guaranteeRedaction">PDF image</label>
<small class="form-text text-muted d-block mt-1" id="guarantee-desc">For maximum security, uses an image-based method to ensure text is unrecoverable. May slightly affect document quality.</small>
</div>
</div>
<br>
<div class="mb-3">
<label class="form-label" for="defaultColor" th:text="#{autoRedact.colorLabel}"></label>
<select class="form-select" id="defaultColor" name="defaultColor" onchange="handleColorChange(this.value)">
<option th:text="#{black}" value="#000000">Black</option>
<option th:text="#{white}" value="#FFFFFF">White</option>
<option th:text="#{red}" value="#FF0000">Red</option>
<option th:text="#{green}" value="#00FF00">Green</option>
<option th:text="#{blue}" value="#0000FF">Blue</option>
<option th:text="#{custom}" value="custom">Custom...</option>
</select> </select>
</div> </div>
<!-- Custom Color Input -->
<div class="mb-3" id="customColorContainer" style="display: none;"> <div class="mb-3" id="customColorContainer" style="display: none;">
<label for="customColor" class="form-label" th:text="#{autoRedact.colorLabel}">Custom Color</label> <label class="form-label" for="customColor">Custom Color (Hex)</label>
<input type="text" class="form-control" id="customColor" name="redactColor" placeholder="#FF00FF"> <input class="form-control" id="customColor" name="redactColor" placeholder="#FF00FF" type="text">
</div> </div>
<div class="mb-3">
<label class="form-label" for="customPadding" th:text="#{autoRedact.customPaddingLabel}"></label>
<input class="form-control" id="customPadding" max="1" min="0" name="customPadding"
step="0.1" type="number" value="0.1">
</div>
<br>
<div class="mb-3">
<label class="form-label" for="ocrLanguage">OCR Language</label>
<select aria-describedby="ocr-desc" class="form-select" id="ocrLanguage" name="ocrLanguage">
<option value="eng">English</option>
<option value="spa">Spanish</option>
<option value="fra">French</option>
<option value="deu">German</option>
<option value="ita">Italian</option>
<option value="por">Portuguese</option>
<option value="rus">Russian</option>
<option value="ara">Arabic</option>
<option value="chi_sim">Chinese (Simplified)</option>
<option value="jpn">Japanese</option>
<option value="kor">Korean</option>
<option value="hin">Hindi</option>
</select>
<small class="form-text text-muted" id="ocr-desc">Used when OCR restoration is needed</small>
</div>
<input id="aggressiveMode" name="aggressiveMode" type="hidden" value="false">
<div class="mb-3 text-center">
<button class="btn btn-primary" id="submitBtn" th:text="#{autoRedact.submitButton}" type="submit"></button>
</div>
</form>
</div>
</div>
</div>
</div>
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
</div>
<script> <script>
function handleColorChange(selectedValue) { function handleColorChange(selectedValue) {
const container = document.getElementById('customColorContainer'); const container = document.getElementById('customColorContainer');
@ -63,74 +187,43 @@
} }
document.addEventListener('DOMContentLoaded', function () { document.addEventListener('DOMContentLoaded', function () {
const redactionModeSelect = document.getElementById('redactionMode'); const redactionModeRadios = document.querySelectorAll('input[name="redactionMode"]');
const aggressiveModeHidden = document.getElementById('aggressiveMode'); const aggressiveModeHidden = document.getElementById('aggressiveMode');
const convertToImageCheckbox = document.getElementById('convertPDFToImage'); const guaranteeRedactionCheckbox = document.getElementById('guaranteeRedaction');
const defaultColor = document.getElementById('defaultColor'); const defaultColor = document.getElementById('defaultColor');
redactionModeSelect.addEventListener('change', function () { function updateMode() {
const mode = redactionModeSelect.value; const selectedMode = document.querySelector('input[name="redactionMode"]:checked');
aggressiveModeHidden.value = (mode === 'aggressive') ? 'true' : 'false'; if (selectedMode) {
if (mode === 'visual') { // Set aggressive mode for delete text option
convertToImageCheckbox.checked = true; aggressiveModeHidden.value = selectedMode.value === 'aggressive' ? 'true' : 'false';
// Handle PDF image checkbox based on selection
if (selectedMode.value === 'visual') {
// Visual mode automatically enables PDF image for maximum security
guaranteeRedactionCheckbox.checked = true;
} else {
// Delete Text and Keep Layout modes disable PDF image
guaranteeRedactionCheckbox.checked = false;
} }
}
}
redactionModeRadios.forEach(radio => {
radio.addEventListener('change', updateMode);
}); });
if (defaultColor) { if (defaultColor) {
handleColorChange(defaultColor.value); handleColorChange(defaultColor.value);
// Set initial value for customColor input when a pre-defined color is selected
const customColorInput = document.getElementById('customColor');
if (defaultColor.value !== 'custom') {
customColorInput.value = defaultColor.value;
}
} }
aggressiveModeHidden.value = (redactionModeSelect.value === 'aggressive') ? 'true' : 'false'; updateMode();
if (redactionModeSelect.value === 'visual') {
convertToImageCheckbox.checked = true;
}
}); });
</script> </script>
<div class="mb-3 form-check">
<input type="checkbox" id="useRegex" name="useRegex">
<label for="useRegex" th:text="#{autoRedact.useRegexLabel}"></label>
</div>
<div class="mb-3 form-check">
<input type="checkbox" id="wholeWordSearch" name="wholeWordSearch">
<label for="wholeWordSearch" th:text="#{autoRedact.wholeWordSearchLabel}"></label>
</div>
<div class="mb-3">
<label for="customPadding" class="form-label" th:text="#{autoRedact.customPaddingLabel}"></label>
<input type="number" step="0.1" class="form-control" id="customPadding" name="customPadding"
value="0.1">
</div>
<div class="mb-3 form-check">
<input id="convertPDFToImage" name="convertPDFToImage" type="checkbox">
<label for="convertPDFToImage" th:text="#{autoRedact.convertPDFToImageLabel}"></label>
</div>
<div class="mb-3">
<label class="form-label" for="redactionMode" th:text="#{autoRedact.redactionModeLabel}">Redaction Mode</label>
<select class="form-control" id="redactionMode" name="redactionMode">
<option th:text="#{autoRedact.redactionMode.moderate}" value="moderate">Moderate - Smart text removal with
fallback
</option>
<option th:text="#{autoRedact.redactionMode.visual}" value="visual">Visual - Black boxes only</option>
<option th:text="#{autoRedact.redactionMode.aggressive}" value="aggressive">Aggressive - Force text removal
</option>
</select>
</div>
<!-- Keep for backward compatibility -->
<input id="aggressiveMode" name="aggressiveMode" type="hidden" value="false">
<button type="submit" id="submitBtn" class="btn btn-primary"
th:text="#{autoRedact.submitButton}"></button>
</form>
</div>
</div>
</div>
</div>
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
</div>
</body> </body>
</html> </html>