mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
enhance redaction options and implement visual redaction with OCR restoration
Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
7db58ad6dd
commit
f236505cae
@ -6,11 +6,13 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import stirling.software.SPDF.model.PDFText;
|
import stirling.software.SPDF.model.PDFText;
|
||||||
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
||||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||||
|
|
||||||
|
@Service
|
||||||
class AggressiveRedactionService implements RedactionModeStrategy {
|
class AggressiveRedactionService implements RedactionModeStrategy {
|
||||||
|
|
||||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||||
@ -49,16 +51,9 @@ class AggressiveRedactionService implements RedactionModeStrategy {
|
|||||||
? "#000000"
|
? "#000000"
|
||||||
: request.getRedactColor();
|
: request.getRedactColor();
|
||||||
if (residualExists) {
|
if (residualExists) {
|
||||||
fb = pdfDocumentFactory.load(request.getFileInput());
|
// Use the new visual redaction with OCR restoration fallback
|
||||||
Map<Integer, List<PDFText>> fbFound =
|
return helper.performVisualRedactionWithOcrRestoration(
|
||||||
RedactionService.findTextToRedact(fb, listOfText, useRegex, wholeWord);
|
request, listOfText, useRegex, wholeWord);
|
||||||
return RedactionService.finalizeRedaction(
|
|
||||||
fb,
|
|
||||||
fbFound,
|
|
||||||
effectiveColor,
|
|
||||||
request.getCustomPadding(), /*force*/
|
|
||||||
true,
|
|
||||||
false);
|
|
||||||
}
|
}
|
||||||
return RedactionService.finalizeRedaction(
|
return RedactionService.finalizeRedaction(
|
||||||
doc,
|
doc,
|
||||||
|
@ -6,11 +6,13 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import stirling.software.SPDF.model.PDFText;
|
import stirling.software.SPDF.model.PDFText;
|
||||||
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
||||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||||
|
|
||||||
|
@Service
|
||||||
class ModerateRedactionService implements RedactionModeStrategy {
|
class ModerateRedactionService implements RedactionModeStrategy {
|
||||||
|
|
||||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||||
@ -46,17 +48,9 @@ class ModerateRedactionService implements RedactionModeStrategy {
|
|||||||
? "#000000"
|
? "#000000"
|
||||||
: request.getRedactColor();
|
: request.getRedactColor();
|
||||||
if (fallbackToBoxOnly) {
|
if (fallbackToBoxOnly) {
|
||||||
fallback = pdfDocumentFactory.load(request.getFileInput());
|
// Use the new visual redaction with OCR restoration fallback
|
||||||
allFound =
|
return helper.performVisualRedactionWithOcrRestoration(
|
||||||
RedactionService.findTextToRedact(
|
request, listOfText, useRegex, wholeWord);
|
||||||
fallback, listOfText, useRegex, wholeWord);
|
|
||||||
return RedactionService.finalizeRedaction(
|
|
||||||
fallback,
|
|
||||||
allFound,
|
|
||||||
effectiveColor,
|
|
||||||
request.getCustomPadding(),
|
|
||||||
request.getConvertPDFToImage(),
|
|
||||||
false);
|
|
||||||
}
|
}
|
||||||
return RedactionService.finalizeRedaction(
|
return RedactionService.finalizeRedaction(
|
||||||
doc,
|
doc,
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package stirling.software.SPDF.service;
|
package stirling.software.SPDF.service;
|
||||||
|
|
||||||
import java.awt.Color;
|
import java.awt.Color;
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
@ -17,6 +18,8 @@ import java.util.Set;
|
|||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
|
||||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||||
import org.apache.pdfbox.cos.COSArray;
|
import org.apache.pdfbox.cos.COSArray;
|
||||||
import org.apache.pdfbox.cos.COSBase;
|
import org.apache.pdfbox.cos.COSBase;
|
||||||
@ -25,6 +28,7 @@ import org.apache.pdfbox.cos.COSFloat;
|
|||||||
import org.apache.pdfbox.cos.COSName;
|
import org.apache.pdfbox.cos.COSName;
|
||||||
import org.apache.pdfbox.cos.COSNumber;
|
import org.apache.pdfbox.cos.COSNumber;
|
||||||
import org.apache.pdfbox.cos.COSString;
|
import org.apache.pdfbox.cos.COSString;
|
||||||
|
import org.apache.pdfbox.multipdf.PDFMergerUtility;
|
||||||
import org.apache.pdfbox.pdfparser.PDFStreamParser;
|
import org.apache.pdfbox.pdfparser.PDFStreamParser;
|
||||||
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
|
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
@ -38,6 +42,7 @@ import org.apache.pdfbox.pdmodel.font.PDFont;
|
|||||||
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
|
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern;
|
import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern;
|
||||||
|
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
@ -58,6 +63,11 @@ import stirling.software.SPDF.utils.text.WidthCalculator;
|
|||||||
import stirling.software.common.model.api.security.RedactionArea;
|
import stirling.software.common.model.api.security.RedactionArea;
|
||||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||||
import stirling.software.common.util.PdfUtils;
|
import stirling.software.common.util.PdfUtils;
|
||||||
|
import stirling.software.common.util.ProcessExecutor;
|
||||||
|
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
|
||||||
|
import stirling.software.common.util.TempDirectory;
|
||||||
|
import stirling.software.common.util.TempFile;
|
||||||
|
import stirling.software.common.util.TempFileManager;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@ -70,10 +80,11 @@ public class RedactionService {
|
|||||||
private static final int FONT_SCALE_FACTOR = 1000;
|
private static final int FONT_SCALE_FACTOR = 1000;
|
||||||
private static final Set<String> TEXT_SHOWING_OPERATORS = Set.of("Tj", "TJ", "'", "\"");
|
private static final Set<String> TEXT_SHOWING_OPERATORS = Set.of("Tj", "TJ", "'", "\"");
|
||||||
private static final COSString EMPTY_COS_STRING = new COSString("");
|
private static final COSString EMPTY_COS_STRING = new COSString("");
|
||||||
private static final int MAX_SWEEPS = 5;
|
private static final int MAX_SWEEPS = 3;
|
||||||
private boolean aggressiveMode = false;
|
private boolean aggressiveMode = false;
|
||||||
private Map<Integer, List<AggressiveSegMatch>> aggressiveSegMatches = null;
|
private Map<Integer, List<AggressiveSegMatch>> aggressiveSegMatches = null;
|
||||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||||
|
private final TempFileManager tempFileManager;
|
||||||
|
|
||||||
private static void redactAreas(
|
private static void redactAreas(
|
||||||
List<RedactionArea> redactionAreas, PDDocument document, PDPageTree allPages)
|
List<RedactionArea> redactionAreas, PDDocument document, PDPageTree allPages)
|
||||||
@ -98,7 +109,7 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (Map.Entry<Integer, List<RedactionArea>> entry : redactionsByPage.entrySet()) {
|
for (Map.Entry<Integer, List<RedactionArea>> entry : redactionsByPage.entrySet()) {
|
||||||
Integer pageNumber = entry.getKey();
|
int pageNumber = entry.getKey();
|
||||||
List<RedactionArea> areasForPage = entry.getValue();
|
List<RedactionArea> areasForPage = entry.getValue();
|
||||||
if (pageNumber > allPages.getCount()) {
|
if (pageNumber > allPages.getCount()) {
|
||||||
continue;
|
continue;
|
||||||
@ -110,14 +121,13 @@ public class RedactionService {
|
|||||||
document, page, PDPageContentStream.AppendMode.APPEND, true, true)) {
|
document, page, PDPageContentStream.AppendMode.APPEND, true, true)) {
|
||||||
contentStream.saveGraphicsState();
|
contentStream.saveGraphicsState();
|
||||||
for (RedactionArea redactionArea : areasForPage) {
|
for (RedactionArea redactionArea : areasForPage) {
|
||||||
Color redactColor = decodeOrDefault(redactionArea.getColor());
|
contentStream.setNonStrokingColor(decodeOrDefault(redactionArea.getColor()));
|
||||||
contentStream.setNonStrokingColor(redactColor);
|
|
||||||
float x = redactionArea.getX().floatValue();
|
float x = redactionArea.getX().floatValue();
|
||||||
float y = redactionArea.getY().floatValue();
|
float y = redactionArea.getY().floatValue();
|
||||||
float width = redactionArea.getWidth().floatValue();
|
float width = redactionArea.getWidth().floatValue();
|
||||||
float height = redactionArea.getHeight().floatValue();
|
float height = redactionArea.getHeight().floatValue();
|
||||||
float pdfY = page.getBBox().getHeight() - y - height;
|
contentStream.addRect(
|
||||||
contentStream.addRect(x, pdfY, width, height);
|
x, page.getBBox().getHeight() - y - height, width, height);
|
||||||
contentStream.fill();
|
contentStream.fill();
|
||||||
}
|
}
|
||||||
contentStream.restoreGraphicsState();
|
contentStream.restoreGraphicsState();
|
||||||
@ -133,11 +143,11 @@ public class RedactionService {
|
|||||||
|
|
||||||
List<Integer> pageNumberList = parsePageNumbers(pageNumbers);
|
List<Integer> pageNumberList = parsePageNumbers(pageNumbers);
|
||||||
|
|
||||||
for (Integer pageNumber : pageNumberList) {
|
for (int pageNumber : pageNumberList) {
|
||||||
if (pageNumber <= 0 || pageNumber > allPages.getCount()) {
|
if (pageNumber <= 0 || pageNumber > allPages.getCount()) {
|
||||||
continue; // Skip invalid page numbers
|
continue;
|
||||||
}
|
}
|
||||||
PDPage page = allPages.get(pageNumber - 1); // Convert to 0-based index
|
PDPage page = allPages.get(pageNumber - 1);
|
||||||
try (PDPageContentStream contentStream =
|
try (PDPageContentStream contentStream =
|
||||||
new PDPageContentStream(
|
new PDPageContentStream(
|
||||||
document, page, PDPageContentStream.AppendMode.APPEND, true, true)) {
|
document, page, PDPageContentStream.AppendMode.APPEND, true, true)) {
|
||||||
@ -255,55 +265,28 @@ public class RedactionService {
|
|||||||
boolean useRegex,
|
boolean useRegex,
|
||||||
boolean wholeWordSearch) {
|
boolean wholeWordSearch) {
|
||||||
try {
|
try {
|
||||||
log.debug("Checking page {} for {} target words", pageIndex + 1, targetWords.size());
|
|
||||||
|
|
||||||
for (String term : targetWords) {
|
for (String term : targetWords) {
|
||||||
if (term == null || term.isBlank()) {
|
if (term == null || term.isBlank()) {
|
||||||
log.debug("Skipping empty/null term");
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
log.debug("Searching for term '{}' on page {}", term, pageIndex + 1);
|
|
||||||
TextFinder finder = new TextFinder(term, useRegex, wholeWordSearch);
|
TextFinder finder = new TextFinder(term, useRegex, wholeWordSearch);
|
||||||
finder.setStartPage(pageIndex + 1);
|
finder.setStartPage(pageIndex + 1);
|
||||||
finder.setEndPage(pageIndex + 1);
|
finder.setEndPage(pageIndex + 1);
|
||||||
finder.getText(document);
|
finder.getText(document);
|
||||||
|
|
||||||
List<PDFText> foundTexts = finder.getFoundTexts();
|
List<PDFText> foundTexts = finder.getFoundTexts();
|
||||||
log.debug(
|
|
||||||
"Found {} instances of '{}' on page {}",
|
|
||||||
foundTexts.size(),
|
|
||||||
term,
|
|
||||||
pageIndex + 1);
|
|
||||||
|
|
||||||
for (PDFText ft : foundTexts) {
|
for (PDFText ft : foundTexts) {
|
||||||
if (ft.getPageIndex() == pageIndex) {
|
if (ft.getPageIndex() == pageIndex) {
|
||||||
log.warn(
|
|
||||||
"FOUND REMAINING TARGET: '{}' on page {} - text content: '{}'",
|
|
||||||
term,
|
|
||||||
pageIndex + 1,
|
|
||||||
ft.getText() != null ? ft.getText() : "null");
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!foundTexts.isEmpty()) {
|
if (!foundTexts.isEmpty()) {}
|
||||||
log.debug(
|
|
||||||
"Found instances but not on target page {} (found on pages: {})",
|
|
||||||
pageIndex + 1,
|
|
||||||
foundTexts.stream()
|
|
||||||
.map(ft -> String.valueOf(ft.getPageIndex() + 1))
|
|
||||||
.distinct()
|
|
||||||
.collect(java.util.stream.Collectors.joining(", ")));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log.debug("Page {} contains no target words", pageIndex + 1);
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("Error checking page {} for targets: {}", pageIndex + 1, e.getMessage());
|
|
||||||
log.warn("Due to error, assuming page {} may still contain targets", pageIndex + 1);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -313,28 +296,20 @@ public class RedactionService {
|
|||||||
Set<String> targetWords,
|
Set<String> targetWords,
|
||||||
boolean useRegex,
|
boolean useRegex,
|
||||||
boolean wholeWordSearch) {
|
boolean wholeWordSearch) {
|
||||||
log.debug("Verifying if document still contains targets: {}", targetWords);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
int idx = -1;
|
int idx = -1;
|
||||||
final int numberOfPages = document.getNumberOfPages();
|
final int numberOfPages = document.getNumberOfPages();
|
||||||
for (int i = 0; i < numberOfPages; i++) {
|
for (int i = 0; i < numberOfPages; i++) {
|
||||||
idx++;
|
idx++;
|
||||||
log.debug("Checking page {} for remaining targets", idx + 1);
|
|
||||||
|
|
||||||
if (pageStillContainsTargets(
|
if (pageStillContainsTargets(
|
||||||
document, idx, targetWords, useRegex, wholeWordSearch)) {
|
document, idx, targetWords, useRegex, wholeWordSearch)) {
|
||||||
log.warn("Page {} still contains target words", idx + 1);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info("Document verification completed - no targets found");
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("Error during document verification: {}", e.getMessage());
|
|
||||||
log.warn("Due to verification error, assuming targets may still exist");
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -342,57 +317,26 @@ public class RedactionService {
|
|||||||
public static Map<Integer, List<PDFText>> findTextToRedact(
|
public static Map<Integer, List<PDFText>> findTextToRedact(
|
||||||
PDDocument document, String[] listOfText, boolean useRegex, boolean wholeWordSearch) {
|
PDDocument document, String[] listOfText, boolean useRegex, boolean wholeWordSearch) {
|
||||||
Map<Integer, List<PDFText>> allFoundTextsByPage = new HashMap<>();
|
Map<Integer, List<PDFText>> allFoundTextsByPage = new HashMap<>();
|
||||||
log.info(
|
|
||||||
"Starting text search with {} terms, useRegex={}, wholeWordSearch={}",
|
|
||||||
listOfText.length,
|
|
||||||
useRegex,
|
|
||||||
wholeWordSearch);
|
|
||||||
|
|
||||||
int totalInstancesFound = 0;
|
|
||||||
|
|
||||||
for (String text : listOfText) {
|
for (String text : listOfText) {
|
||||||
String t = text.trim();
|
String t = text.trim();
|
||||||
if (t.isEmpty()) {
|
if (t.isEmpty()) {
|
||||||
log.debug("Skipping empty search term");
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info("Searching for term: '{}'", t);
|
|
||||||
try {
|
try {
|
||||||
TextFinder finder = new TextFinder(t, useRegex, wholeWordSearch);
|
TextFinder finder = new TextFinder(t, useRegex, wholeWordSearch);
|
||||||
finder.getText(document);
|
finder.getText(document);
|
||||||
List<PDFText> foundTexts = finder.getFoundTexts();
|
List<PDFText> foundTexts = finder.getFoundTexts();
|
||||||
|
|
||||||
log.info("Found {} instances of '{}' across the document", foundTexts.size(), t);
|
|
||||||
|
|
||||||
for (PDFText found : foundTexts) {
|
for (PDFText found : foundTexts) {
|
||||||
allFoundTextsByPage
|
allFoundTextsByPage
|
||||||
.computeIfAbsent(found.getPageIndex(), k -> new ArrayList<>())
|
.computeIfAbsent(found.getPageIndex(), k -> new ArrayList<>())
|
||||||
.add(found);
|
.add(found);
|
||||||
|
|
||||||
log.debug(
|
|
||||||
"Found instance on page {}: '{}'",
|
|
||||||
found.getPageIndex() + 1,
|
|
||||||
found.getText() != null ? found.getText() : "null");
|
|
||||||
totalInstancesFound++;
|
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("Error searching for term '{}': {}", t, e.getMessage());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info("Total instances found across all search terms: {}", totalInstancesFound);
|
|
||||||
log.info(
|
|
||||||
"Text found on {} pages out of {} total pages",
|
|
||||||
allFoundTextsByPage.size(),
|
|
||||||
document.getNumberOfPages());
|
|
||||||
|
|
||||||
// Log distribution by page
|
|
||||||
allFoundTextsByPage.forEach(
|
|
||||||
(pageIndex, texts) -> {
|
|
||||||
log.info("Page {}: {} instances", pageIndex + 1, texts.size());
|
|
||||||
});
|
|
||||||
|
|
||||||
return allFoundTextsByPage;
|
return allFoundTextsByPage;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -650,6 +594,122 @@ public class RedactionService {
|
|||||||
return strategy.redact(request);
|
return strategy.redact(request);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean isTextSafeForRedaction(String text) {
|
||||||
|
if (text == null || text.isEmpty()) return true;
|
||||||
|
|
||||||
|
for (int i = 0; i < text.length(); i++) {
|
||||||
|
char c = text.charAt(i);
|
||||||
|
int codePoint = c;
|
||||||
|
|
||||||
|
if (codePoint >= 65488) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (Character.isISOControl(c) && c != '\n' && c != '\t' && c != '\r') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Object> deepCopyTokens(List<Object> original) {
|
||||||
|
List<Object> copy = new ArrayList<>(original.size());
|
||||||
|
for (Object obj : original) {
|
||||||
|
if (obj instanceof COSDictionary dict) {
|
||||||
|
COSDictionary newDict = new COSDictionary();
|
||||||
|
for (COSName key : dict.keySet()) {
|
||||||
|
newDict.setItem(key, dict.getDictionaryObject(key));
|
||||||
|
}
|
||||||
|
copy.add(newDict);
|
||||||
|
} else if (obj instanceof List<?> nestedList
|
||||||
|
&& !nestedList.isEmpty()
|
||||||
|
&& nestedList.get(0) instanceof Object) {
|
||||||
|
try {
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
List<Object> objectList = (List<Object>) nestedList;
|
||||||
|
copy.add(deepCopyTokens(objectList));
|
||||||
|
} catch (ClassCastException e) {
|
||||||
|
copy.add(obj);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
copy.add(obj);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static TokenModificationResult updateOperatorSafely(
|
||||||
|
List<Object> tokens, int tokenIndex, String originalOperator) {
|
||||||
|
try {
|
||||||
|
int operatorIndex = tokenIndex + 1;
|
||||||
|
if (isValidTokenIndex(tokens, operatorIndex)
|
||||||
|
&& tokens.get(operatorIndex) instanceof Operator op
|
||||||
|
&& op.getName().equals(originalOperator)) {
|
||||||
|
tokens.set(operatorIndex, Operator.getOperator("TJ"));
|
||||||
|
}
|
||||||
|
return TokenModificationResult.success();
|
||||||
|
} catch (Exception e) {
|
||||||
|
return TokenModificationResult.success();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static WipeResult wipeAllSemanticTextInTokens(List<Object> tokens) {
|
||||||
|
return wipeAllSemanticTextInTokens(tokens, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] performVisualRedactionWithOcrRestoration(
|
||||||
|
RedactPdfRequest request,
|
||||||
|
String[] listOfText,
|
||||||
|
boolean useRegex,
|
||||||
|
boolean wholeWordSearch)
|
||||||
|
throws IOException {
|
||||||
|
PDDocument visualRedactedDoc = null;
|
||||||
|
try {
|
||||||
|
visualRedactedDoc = pdfDocumentFactory.load(request.getFileInput());
|
||||||
|
Map<Integer, List<PDFText>> allFound =
|
||||||
|
findTextToRedact(visualRedactedDoc, listOfText, useRegex, wholeWordSearch);
|
||||||
|
String effectiveColor =
|
||||||
|
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||||
|
? "#000000"
|
||||||
|
: request.getRedactColor();
|
||||||
|
byte[] visualRedactedBytes =
|
||||||
|
finalizeRedaction(
|
||||||
|
visualRedactedDoc,
|
||||||
|
allFound,
|
||||||
|
effectiveColor,
|
||||||
|
request.getCustomPadding(),
|
||||||
|
true,
|
||||||
|
false);
|
||||||
|
return performOcrRestoration(visualRedactedBytes, request);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IOException(
|
||||||
|
"Visual redaction with OCR restoration failed: " + e.getMessage(), e);
|
||||||
|
} finally {
|
||||||
|
if (visualRedactedDoc != null) {
|
||||||
|
try {
|
||||||
|
visualRedactedDoc.close();
|
||||||
|
} catch (IOException ignore) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] performOcrRestoration(byte[] redactedPdfBytes, RedactPdfRequest request)
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf");
|
||||||
|
TempFile tempOutputFile = new TempFile(tempFileManager, ".pdf")) {
|
||||||
|
java.nio.file.Files.write(tempInputFile.getPath(), redactedPdfBytes);
|
||||||
|
if (isOcrMyPdfAvailable()) {
|
||||||
|
return processWithOcrMyPdfForRestoration(
|
||||||
|
tempInputFile.getPath(), tempOutputFile.getPath(), request);
|
||||||
|
} else if (isTesseractAvailable()) {
|
||||||
|
return processWithTesseractForRestoration(
|
||||||
|
tempInputFile.getPath(), tempOutputFile.getPath(), request);
|
||||||
|
}
|
||||||
|
return redactedPdfBytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static String getDecodedString(COSString cosString, PDFont font) {
|
private static String getDecodedString(COSString cosString, PDFont font) {
|
||||||
try {
|
try {
|
||||||
String decoded = TextDecodingHelper.tryDecodeWithFont(font, cosString);
|
String decoded = TextDecodingHelper.tryDecodeWithFont(font, cosString);
|
||||||
@ -671,7 +731,6 @@ public class RedactionService {
|
|||||||
text.getBytes(java.nio.charset.StandardCharsets.UTF_8));
|
text.getBytes(java.nio.charset.StandardCharsets.UTF_8));
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Fall through to return newString
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return newString;
|
return newString;
|
||||||
@ -740,48 +799,26 @@ public class RedactionService {
|
|||||||
return normalized.toString();
|
return normalized.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isTextSafeForRedaction(String text) {
|
private boolean isOcrMyPdfAvailable() {
|
||||||
if (text == null || text.isEmpty()) return true;
|
|
||||||
|
|
||||||
for (int i = 0; i < text.length(); i++) {
|
|
||||||
char c = text.charAt(i);
|
|
||||||
int codePoint = c;
|
|
||||||
|
|
||||||
if (codePoint >= 65488) {
|
|
||||||
return false; // Contains problematic high-range characters
|
|
||||||
}
|
|
||||||
if (Character.isISOControl(c) && c != '\n' && c != '\t' && c != '\r') {
|
|
||||||
return false; // Contains problematic control characters
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<Object> deepCopyTokens(List<Object> original) {
|
|
||||||
List<Object> copy = new ArrayList<>(original.size());
|
|
||||||
for (Object obj : original) {
|
|
||||||
if (obj instanceof COSDictionary dict) {
|
|
||||||
COSDictionary newDict = new COSDictionary();
|
|
||||||
for (COSName key : dict.keySet()) {
|
|
||||||
newDict.setItem(key, dict.getDictionaryObject(key));
|
|
||||||
}
|
|
||||||
copy.add(newDict);
|
|
||||||
} else if (obj instanceof List<?> nestedList
|
|
||||||
&& !nestedList.isEmpty()
|
|
||||||
&& nestedList.get(0) instanceof Object) {
|
|
||||||
try {
|
try {
|
||||||
@SuppressWarnings("unchecked")
|
ProcessExecutorResult result =
|
||||||
List<Object> objectList = (List<Object>) nestedList;
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
|
||||||
copy.add(deepCopyTokens(objectList));
|
.runCommandWithOutputHandling(Arrays.asList("ocrmypdf", "--version"));
|
||||||
} catch (ClassCastException e) {
|
return result.getRc() == 0;
|
||||||
copy.add(obj); // Fallback to shallow copy if cast fails
|
} catch (Exception e) {
|
||||||
}
|
return false;
|
||||||
} else {
|
|
||||||
copy.add(obj); // Shallow copy for primitives/operators
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return copy;
|
|
||||||
|
private boolean isTesseractAvailable() {
|
||||||
|
try {
|
||||||
|
ProcessExecutorResult result =
|
||||||
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.TESSERACT)
|
||||||
|
.runCommandWithOutputHandling(Arrays.asList("tesseract", "--version"));
|
||||||
|
return result.getRc() == 0;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isFontSuitableForWidthCalculation(PDFont font) {
|
private static boolean isFontSuitableForWidthCalculation(PDFont font) {
|
||||||
@ -1027,15 +1064,43 @@ public class RedactionService {
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
private float safeGetStringWidth(PDFont font, String text) {
|
private byte[] processWithOcrMyPdfForRestoration(
|
||||||
// Delegate to WidthCalculator; convert from user-space at fontSize=1 to font units
|
java.nio.file.Path inputPath, java.nio.file.Path outputPath, RedactPdfRequest request)
|
||||||
if (font == null || text == null || text.isEmpty()) return 0f;
|
throws IOException, InterruptedException {
|
||||||
try {
|
List<String> command =
|
||||||
float widthAtSize1 = WidthCalculator.calculateAccurateWidth(font, text, 1.0f);
|
Arrays.asList(
|
||||||
return widthAtSize1 * FONT_SCALE_FACTOR; // convert back to font units for callers
|
"ocrmypdf",
|
||||||
} catch (Exception e) {
|
"--verbose",
|
||||||
return 0f;
|
"1",
|
||||||
|
"--output-type",
|
||||||
|
"pdf",
|
||||||
|
"--pdf-renderer",
|
||||||
|
"sandwich",
|
||||||
|
"--language",
|
||||||
|
"eng",
|
||||||
|
"--optimize",
|
||||||
|
"0",
|
||||||
|
"--jpeg-quality",
|
||||||
|
"100",
|
||||||
|
"--png-quality",
|
||||||
|
"9",
|
||||||
|
"--force-ocr",
|
||||||
|
"--deskew",
|
||||||
|
"--clean",
|
||||||
|
"--clean-final",
|
||||||
|
inputPath.toString(),
|
||||||
|
outputPath.toString());
|
||||||
|
ProcessExecutorResult result =
|
||||||
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
|
||||||
|
.runCommandWithOutputHandling(command);
|
||||||
|
if (result.getRc() != 0) {
|
||||||
|
throw new IOException(
|
||||||
|
"OCRmyPDF restoration failed with return code: "
|
||||||
|
+ result.getRc()
|
||||||
|
+ ". Error: "
|
||||||
|
+ result.getMessages());
|
||||||
}
|
}
|
||||||
|
return java.nio.file.Files.readAllBytes(outputPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean removeSemanticProperties(COSDictionary dict, boolean removeTU) {
|
private static boolean removeSemanticProperties(COSDictionary dict, boolean removeTU) {
|
||||||
@ -1166,7 +1231,6 @@ public class RedactionService {
|
|||||||
return WidthCalculator.calculateAccurateWidth(font, text, fontSize);
|
return WidthCalculator.calculateAccurateWidth(font, text, fontSize);
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Width calculation failed
|
|
||||||
}
|
}
|
||||||
return 0f;
|
return 0f;
|
||||||
}
|
}
|
||||||
@ -1235,166 +1299,56 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean performTextReplacement(
|
private byte[] processWithTesseractForRestoration(
|
||||||
PDDocument document,
|
java.nio.file.Path inputPath, java.nio.file.Path outputPath, RedactPdfRequest request)
|
||||||
Map<Integer, List<PDFText>> allFoundTextsByPage,
|
throws IOException, InterruptedException {
|
||||||
String[] listOfText,
|
try (TempDirectory tempDir = new TempDirectory(tempFileManager)) {
|
||||||
boolean useRegex,
|
java.io.File tempOutputDir = new java.io.File(tempDir.getPath().toFile(), "output");
|
||||||
boolean wholeWordSearchBool) {
|
java.io.File tempImagesDir = new java.io.File(tempDir.getPath().toFile(), "images");
|
||||||
if (allFoundTextsByPage.isEmpty()) {
|
java.io.File finalOutputFile =
|
||||||
log.info("No text found to redact");
|
new java.io.File(tempDir.getPath().toFile(), "final_output.pdf");
|
||||||
return false;
|
tempOutputDir.mkdirs();
|
||||||
|
tempImagesDir.mkdirs();
|
||||||
|
try (PDDocument document = pdfDocumentFactory.load(inputPath.toFile())) {
|
||||||
|
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||||
|
int pageCount = document.getNumberOfPages();
|
||||||
|
PDFMergerUtility merger = new PDFMergerUtility();
|
||||||
|
merger.setDestinationFileName(finalOutputFile.toString());
|
||||||
|
for (int pageNum = 0; pageNum < pageCount; pageNum++) {
|
||||||
|
BufferedImage image = pdfRenderer.renderImageWithDPI(pageNum, 600);
|
||||||
|
java.io.File imagePath =
|
||||||
|
new java.io.File(tempImagesDir, "page_" + pageNum + ".png");
|
||||||
|
ImageIO.write(image, "png", imagePath);
|
||||||
|
List<String> command =
|
||||||
|
Arrays.asList(
|
||||||
|
"tesseract",
|
||||||
|
imagePath.toString(),
|
||||||
|
new java.io.File(tempOutputDir, "page_" + pageNum).toString(),
|
||||||
|
"-l",
|
||||||
|
"eng",
|
||||||
|
"--dpi",
|
||||||
|
"600",
|
||||||
|
"--psm",
|
||||||
|
"1",
|
||||||
|
"pdf");
|
||||||
|
ProcessExecutorResult result =
|
||||||
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.TESSERACT)
|
||||||
|
.runCommandWithOutputHandling(command);
|
||||||
|
if (result.getRc() != 0) {
|
||||||
|
throw new IOException(
|
||||||
|
"Tesseract restoration failed with return code: " + result.getRc());
|
||||||
}
|
}
|
||||||
try {
|
java.io.File pageOutputPath =
|
||||||
Set<String> allSearchTerms =
|
new java.io.File(tempOutputDir, "page_" + pageNum + ".pdf");
|
||||||
Arrays.stream(listOfText)
|
merger.addSource(pageOutputPath);
|
||||||
.map(String::trim)
|
|
||||||
.filter(s -> !s.isEmpty())
|
|
||||||
.collect(Collectors.toSet());
|
|
||||||
|
|
||||||
log.info(
|
|
||||||
"Starting text replacement with {} search terms: {}",
|
|
||||||
allSearchTerms.size(),
|
|
||||||
allSearchTerms);
|
|
||||||
log.info("Total pages in document: {}", document.getNumberOfPages());
|
|
||||||
log.info("Initial text found on {} pages", allFoundTextsByPage.size());
|
|
||||||
|
|
||||||
// Count initial instances
|
|
||||||
int initialTotalInstances =
|
|
||||||
allFoundTextsByPage.values().stream().mapToInt(List::size).sum();
|
|
||||||
log.info("Total initial instances to redact: {}", initialTotalInstances);
|
|
||||||
|
|
||||||
int finalSweepCount = 0;
|
|
||||||
for (int sweep = 0; sweep < MAX_SWEEPS; sweep++) {
|
|
||||||
finalSweepCount = sweep + 1;
|
|
||||||
log.info("=== Starting sweep {} of {} ===", sweep + 1, MAX_SWEEPS);
|
|
||||||
int pagesProcessed = 0;
|
|
||||||
int totalModifications = 0;
|
|
||||||
|
|
||||||
for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) {
|
|
||||||
PDPage page = document.getPages().get(pageIndex);
|
|
||||||
List<PDFText> pageFoundTexts =
|
|
||||||
allFoundTextsByPage.getOrDefault(pageIndex, List.of());
|
|
||||||
|
|
||||||
log.debug(
|
|
||||||
"Processing page {} - found {} instances",
|
|
||||||
pageIndex + 1,
|
|
||||||
pageFoundTexts.size());
|
|
||||||
|
|
||||||
List<Object> filtered =
|
|
||||||
createTokensWithoutTargetText(
|
|
||||||
document, page, allSearchTerms, useRegex, wholeWordSearchBool);
|
|
||||||
writeFilteredContentStream(document, page, filtered);
|
|
||||||
|
|
||||||
// Count modifications (rough estimate based on token count difference)
|
|
||||||
int tokenDiff = Math.abs(filtered.size() - getOriginalTokenCount(page));
|
|
||||||
totalModifications += tokenDiff;
|
|
||||||
pagesProcessed++;
|
|
||||||
|
|
||||||
log.debug("Page {} - token modifications: {}", pageIndex + 1, tokenDiff);
|
|
||||||
}
|
}
|
||||||
|
merger.mergeDocuments(null);
|
||||||
log.info(
|
java.nio.file.Files.copy(
|
||||||
"Sweep {} completed - processed {} pages, total modifications: {}",
|
finalOutputFile.toPath(),
|
||||||
sweep + 1,
|
outputPath,
|
||||||
pagesProcessed,
|
java.nio.file.StandardCopyOption.REPLACE_EXISTING);
|
||||||
totalModifications);
|
|
||||||
|
|
||||||
// Check remaining targets
|
|
||||||
boolean stillContainsTargets =
|
|
||||||
documentStillContainsTargets(
|
|
||||||
document, allSearchTerms, useRegex, wholeWordSearchBool);
|
|
||||||
|
|
||||||
if (!stillContainsTargets) {
|
|
||||||
log.info("SUCCESS: All targets removed after {} sweeps", sweep + 1);
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
log.warn(
|
|
||||||
"WARNING: Still contains targets after sweep {} - continuing...",
|
|
||||||
sweep + 1);
|
|
||||||
}
|
}
|
||||||
}
|
return java.nio.file.Files.readAllBytes(outputPath);
|
||||||
|
|
||||||
// Final verification - run multiple times to catch any missed instances
|
|
||||||
boolean finalCheck = false;
|
|
||||||
for (int verifyAttempt = 0; verifyAttempt < 3; verifyAttempt++) {
|
|
||||||
log.info("Final verification attempt {} of 3", verifyAttempt + 1);
|
|
||||||
finalCheck =
|
|
||||||
documentStillContainsTargets(
|
|
||||||
document, allSearchTerms, useRegex, wholeWordSearchBool);
|
|
||||||
|
|
||||||
if (!finalCheck) {
|
|
||||||
log.info(
|
|
||||||
"Verification attempt {} passed - no targets found", verifyAttempt + 1);
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
log.warn("Verification attempt {} found remaining targets", verifyAttempt + 1);
|
|
||||||
if (verifyAttempt < 2) {
|
|
||||||
log.info("Performing additional cleanup sweep due to verification failure");
|
|
||||||
// Try one more sweep
|
|
||||||
for (PDPage page : document.getPages()) {
|
|
||||||
List<Object> additionalFiltered =
|
|
||||||
createTokensWithoutTargetText(
|
|
||||||
document,
|
|
||||||
page,
|
|
||||||
allSearchTerms,
|
|
||||||
useRegex,
|
|
||||||
wholeWordSearchBool);
|
|
||||||
writeFilteredContentStream(document, page, additionalFiltered);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (finalCheck) {
|
|
||||||
log.error(
|
|
||||||
"FAILURE: Document still contains targets after {} sweeps and {} verification attempts. Falling back to visual redaction.",
|
|
||||||
MAX_SWEEPS,
|
|
||||||
3);
|
|
||||||
log.error("Remaining search terms: {}", allSearchTerms);
|
|
||||||
|
|
||||||
// Log detailed information about what was found
|
|
||||||
log.error("=== DETAILED FAILURE ANALYSIS ===");
|
|
||||||
for (int pageIdx = 0; pageIdx < document.getNumberOfPages(); pageIdx++) {
|
|
||||||
for (String term : allSearchTerms) {
|
|
||||||
try {
|
|
||||||
TextFinder finder = new TextFinder(term, useRegex, wholeWordSearchBool);
|
|
||||||
finder.setStartPage(pageIdx + 1);
|
|
||||||
finder.setEndPage(pageIdx + 1);
|
|
||||||
finder.getText(document);
|
|
||||||
|
|
||||||
for (PDFText found : finder.getFoundTexts()) {
|
|
||||||
if (found.getPageIndex() == pageIdx) {
|
|
||||||
log.error(
|
|
||||||
"REMAINING: '{}' found on page {} at position ({}, {})",
|
|
||||||
term,
|
|
||||||
pageIdx + 1,
|
|
||||||
found.getX1(),
|
|
||||||
found.getY1());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error(
|
|
||||||
"Error during failure analysis for term '{}' on page {}: {}",
|
|
||||||
term,
|
|
||||||
pageIdx + 1,
|
|
||||||
e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
log.error("=== END FAILURE ANALYSIS ===");
|
|
||||||
|
|
||||||
return true; // Return true to indicate fallback needed
|
|
||||||
} else {
|
|
||||||
log.info(
|
|
||||||
"SUCCESS: All text redaction completed successfully after {} sweeps",
|
|
||||||
finalSweepCount);
|
|
||||||
return false; // Return false to indicate success
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("Exception during text replacement: {}", e.getMessage(), e);
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1460,22 +1414,16 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Failed to add spacing adjustment
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static TokenModificationResult updateOperatorSafely(
|
private float safeGetStringWidth(PDFont font, String text) {
|
||||||
List<Object> tokens, int tokenIndex, String originalOperator) {
|
if (font == null || text == null || text.isEmpty()) return 0f;
|
||||||
try {
|
try {
|
||||||
int operatorIndex = tokenIndex + 1;
|
float widthAtSize1 = WidthCalculator.calculateAccurateWidth(font, text, 1.0f);
|
||||||
if (isValidTokenIndex(tokens, operatorIndex)
|
return widthAtSize1 * FONT_SCALE_FACTOR;
|
||||||
&& tokens.get(operatorIndex) instanceof Operator op
|
|
||||||
&& op.getName().equals(originalOperator)) {
|
|
||||||
tokens.set(operatorIndex, Operator.getOperator("TJ"));
|
|
||||||
}
|
|
||||||
return TokenModificationResult.success();
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return TokenModificationResult.success(); // Non-critical failure
|
return 0f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1519,9 +1467,7 @@ public class RedactionService {
|
|||||||
float adjustment = wOrig - wMod;
|
float adjustment = wOrig - wMod;
|
||||||
if (Math.abs(adjustment) > PRECISION_THRESHOLD) {
|
if (Math.abs(adjustment) > PRECISION_THRESHOLD) {
|
||||||
float kerning = (-adjustment / segment.getFontSize()) * FONT_SCALE_FACTOR;
|
float kerning = (-adjustment / segment.getFontSize()) * FONT_SCALE_FACTOR;
|
||||||
// If next token is a number, combine; otherwise insert new number
|
|
||||||
if (i + 1 < size && redactedArray.get(i + 1) instanceof COSNumber num) {
|
if (i + 1 < size && redactedArray.get(i + 1) instanceof COSNumber num) {
|
||||||
// Skip adding the next separately and add combined value
|
|
||||||
i++;
|
i++;
|
||||||
float combined = num.floatValue() + kerning;
|
float combined = num.floatValue() + kerning;
|
||||||
out.add(new COSFloat(combined));
|
out.add(new COSFloat(combined));
|
||||||
@ -1661,7 +1607,6 @@ public class RedactionService {
|
|||||||
List<TextSegment> textSegments = extractTextSegments(page, tokens, this.aggressiveMode);
|
List<TextSegment> textSegments = extractTextSegments(page, tokens, this.aggressiveMode);
|
||||||
log.debug("Extracted {} text segments from tokens", textSegments.size());
|
log.debug("Extracted {} text segments from tokens", textSegments.size());
|
||||||
|
|
||||||
// Log extracted text content for debugging
|
|
||||||
if (!textSegments.isEmpty()) {
|
if (!textSegments.isEmpty()) {
|
||||||
StringBuilder allText = new StringBuilder();
|
StringBuilder allText = new StringBuilder();
|
||||||
boolean hasProblematicChars = false;
|
boolean hasProblematicChars = false;
|
||||||
@ -1733,9 +1678,161 @@ public class RedactionService {
|
|||||||
return problematicRatio > 0.3;
|
return problematicRatio > 0.3;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static WipeResult wipeAllSemanticTextInTokens(List<Object> tokens) {
|
public boolean performTextReplacement(
|
||||||
return wipeAllSemanticTextInTokens(
|
PDDocument document,
|
||||||
tokens, true); // Default to removing TU for backward compatibility
|
Map<Integer, List<PDFText>> allFoundTextsByPage,
|
||||||
|
String[] listOfText,
|
||||||
|
boolean useRegex,
|
||||||
|
boolean wholeWordSearchBool) {
|
||||||
|
if (allFoundTextsByPage.isEmpty()) {
|
||||||
|
log.info("No text found to redact");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
Set<String> allSearchTerms =
|
||||||
|
Arrays.stream(listOfText)
|
||||||
|
.map(String::trim)
|
||||||
|
.filter(s -> !s.isEmpty())
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Starting text replacement with {} search terms: {}",
|
||||||
|
allSearchTerms.size(),
|
||||||
|
allSearchTerms);
|
||||||
|
log.info("Total pages in document: {}", document.getNumberOfPages());
|
||||||
|
log.info("Initial text found on {} pages", allFoundTextsByPage.size());
|
||||||
|
|
||||||
|
int initialTotalInstances =
|
||||||
|
allFoundTextsByPage.values().stream().mapToInt(List::size).sum();
|
||||||
|
log.info("Total initial instances to redact: {}", initialTotalInstances);
|
||||||
|
|
||||||
|
int finalSweepCount = 0;
|
||||||
|
for (int sweep = 0; sweep < MAX_SWEEPS; sweep++) {
|
||||||
|
finalSweepCount = sweep + 1;
|
||||||
|
log.info("=== Starting sweep {} of {} ===", sweep + 1, MAX_SWEEPS);
|
||||||
|
int pagesProcessed = 0;
|
||||||
|
int totalModifications = 0;
|
||||||
|
|
||||||
|
for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) {
|
||||||
|
PDPage page = document.getPages().get(pageIndex);
|
||||||
|
List<PDFText> pageFoundTexts =
|
||||||
|
allFoundTextsByPage.getOrDefault(pageIndex, List.of());
|
||||||
|
|
||||||
|
log.debug(
|
||||||
|
"Processing page {} - found {} instances",
|
||||||
|
pageIndex + 1,
|
||||||
|
pageFoundTexts.size());
|
||||||
|
|
||||||
|
List<Object> filtered =
|
||||||
|
createTokensWithoutTargetText(
|
||||||
|
document, page, allSearchTerms, useRegex, wholeWordSearchBool);
|
||||||
|
writeFilteredContentStream(document, page, filtered);
|
||||||
|
|
||||||
|
int tokenDiff = Math.abs(filtered.size() - getOriginalTokenCount(page));
|
||||||
|
totalModifications += tokenDiff;
|
||||||
|
pagesProcessed++;
|
||||||
|
|
||||||
|
log.debug("Page {} - token modifications: {}", pageIndex + 1, tokenDiff);
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Sweep {} completed - processed {} pages, total modifications: {}",
|
||||||
|
sweep + 1,
|
||||||
|
pagesProcessed,
|
||||||
|
totalModifications);
|
||||||
|
|
||||||
|
boolean stillContainsTargets =
|
||||||
|
documentStillContainsTargets(
|
||||||
|
document, allSearchTerms, useRegex, wholeWordSearchBool);
|
||||||
|
|
||||||
|
if (!stillContainsTargets) {
|
||||||
|
log.info("SUCCESS: All targets removed after {} sweeps", sweep + 1);
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
log.warn(
|
||||||
|
"WARNING: Still contains targets after sweep {} - continuing...",
|
||||||
|
sweep + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean finalCheck = false;
|
||||||
|
for (int verifyAttempt = 0; verifyAttempt < 3; verifyAttempt++) {
|
||||||
|
log.info("Final verification attempt {} of 3", verifyAttempt + 1);
|
||||||
|
finalCheck =
|
||||||
|
documentStillContainsTargets(
|
||||||
|
document, allSearchTerms, useRegex, wholeWordSearchBool);
|
||||||
|
|
||||||
|
if (!finalCheck) {
|
||||||
|
log.info(
|
||||||
|
"Verification attempt {} passed - no targets found", verifyAttempt + 1);
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
log.warn("Verification attempt {} found remaining targets", verifyAttempt + 1);
|
||||||
|
if (verifyAttempt < 2) {
|
||||||
|
log.info("Performing additional cleanup sweep due to verification failure");
|
||||||
|
for (PDPage page : document.getPages()) {
|
||||||
|
List<Object> additionalFiltered =
|
||||||
|
createTokensWithoutTargetText(
|
||||||
|
document,
|
||||||
|
page,
|
||||||
|
allSearchTerms,
|
||||||
|
useRegex,
|
||||||
|
wholeWordSearchBool);
|
||||||
|
writeFilteredContentStream(document, page, additionalFiltered);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (finalCheck) {
|
||||||
|
log.error(
|
||||||
|
"FAILURE: Document still contains targets after {} sweeps and {} verification attempts. Falling back to visual redaction with OCR restoration.",
|
||||||
|
MAX_SWEEPS,
|
||||||
|
3);
|
||||||
|
log.error("Remaining search terms: {}", allSearchTerms);
|
||||||
|
|
||||||
|
log.error("=== DETAILED FAILURE ANALYSIS ===");
|
||||||
|
for (int pageIdx = 0; pageIdx < document.getNumberOfPages(); pageIdx++) {
|
||||||
|
for (String term : allSearchTerms) {
|
||||||
|
try {
|
||||||
|
TextFinder finder = new TextFinder(term, useRegex, wholeWordSearchBool);
|
||||||
|
finder.setStartPage(pageIdx + 1);
|
||||||
|
finder.setEndPage(pageIdx + 1);
|
||||||
|
finder.getText(document);
|
||||||
|
|
||||||
|
for (PDFText found : finder.getFoundTexts()) {
|
||||||
|
if (found.getPageIndex() == pageIdx) {
|
||||||
|
log.error(
|
||||||
|
"REMAINING: '{}' found on page {} at position ({}, {})",
|
||||||
|
term,
|
||||||
|
pageIdx + 1,
|
||||||
|
found.getX1(),
|
||||||
|
found.getY1());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error(
|
||||||
|
"Error during failure analysis for term '{}' on page {}: {}",
|
||||||
|
term,
|
||||||
|
pageIdx + 1,
|
||||||
|
e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.error("=== END FAILURE ANALYSIS ===");
|
||||||
|
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
log.info(
|
||||||
|
"SUCCESS: All text redaction completed successfully after {} sweeps",
|
||||||
|
finalSweepCount);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Exception during text replacement: {}", e.getMessage(), e);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private COSArray createRedactedTJArray(
|
private COSArray createRedactedTJArray(
|
||||||
@ -1905,7 +2002,6 @@ public class RedactionService {
|
|||||||
log.warn("3. Search terms not matching extracted text");
|
log.warn("3. Search terms not matching extracted text");
|
||||||
log.warn("4. Whole word search filtering out matches");
|
log.warn("4. Whole word search filtering out matches");
|
||||||
|
|
||||||
// Log some debugging info
|
|
||||||
if (!segments.isEmpty()) {
|
if (!segments.isEmpty()) {
|
||||||
log.warn("Sample segment text: '{}'", segments.get(0).getText());
|
log.warn("Sample segment text: '{}'", segments.get(0).getText());
|
||||||
log.warn("Target words: {}", targetWords);
|
log.warn("Target words: {}", targetWords);
|
||||||
@ -2010,7 +2106,6 @@ public class RedactionService {
|
|||||||
log.debug("Redacting TJ operator at token index {}", segment.tokenIndex);
|
log.debug("Redacting TJ operator at token index {}", segment.tokenIndex);
|
||||||
COSArray redacted =
|
COSArray redacted =
|
||||||
redactTJArrayByDecodedRanges(segment.font, arr, segMatches);
|
redactTJArrayByDecodedRanges(segment.font, arr, segMatches);
|
||||||
// Inject kerning adjustments per string element to preserve layout
|
|
||||||
COSArray withKerning = buildKerningAdjustedTJArray(arr, redacted, segment);
|
COSArray withKerning = buildKerningAdjustedTJArray(arr, redacted, segment);
|
||||||
newTokens.set(segment.tokenIndex, withKerning);
|
newTokens.set(segment.tokenIndex, withKerning);
|
||||||
totalModifications++;
|
totalModifications++;
|
||||||
@ -2529,7 +2624,6 @@ public class RedactionService {
|
|||||||
try {
|
try {
|
||||||
performEmergencyFallback(tokens, segment.tokenIndex);
|
performEmergencyFallback(tokens, segment.tokenIndex);
|
||||||
} catch (Exception emergencyError) {
|
} catch (Exception emergencyError) {
|
||||||
// Final fallback failed - continue processing
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2562,7 +2656,7 @@ public class RedactionService {
|
|||||||
if (!this.aggressiveMode
|
if (!this.aggressiveMode
|
||||||
&& segment.getFont() != null
|
&& segment.getFont() != null
|
||||||
&& !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), originalText)) {
|
&& !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), originalText)) {
|
||||||
newArray.add(cosString); // Keep original COSString to preserve encoding
|
newArray.add(cosString);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2572,7 +2666,7 @@ public class RedactionService {
|
|||||||
List<MatchRange> sortedMatches =
|
List<MatchRange> sortedMatches =
|
||||||
matches.stream().sorted(Comparator.comparingInt(MatchRange::getStartPos)).toList();
|
matches.stream().sorted(Comparator.comparingInt(MatchRange::getStartPos)).toList();
|
||||||
|
|
||||||
int cumulativeOffset = 0; // Track cumulative text changes
|
int cumulativeOffset = 0;
|
||||||
|
|
||||||
for (MatchRange match : sortedMatches) {
|
for (MatchRange match : sortedMatches) {
|
||||||
int stringStartInPage = segment.getStartPos() + textOffsetInSegment;
|
int stringStartInPage = segment.getStartPos() + textOffsetInSegment;
|
||||||
@ -2668,7 +2762,7 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private int wipeAllTextInResources(PDDocument document, PDResources resources) {
|
private int wipeAllTextInResources(PDDocument document, PDResources resources) {
|
||||||
int totalMods = 0; // aggregated but currently not returned to caller
|
int totalMods = 0;
|
||||||
try {
|
try {
|
||||||
totalMods += wipeAllSemanticTextInProperties(resources);
|
totalMods += wipeAllSemanticTextInProperties(resources);
|
||||||
for (COSName xobjName : resources.getXObjectNames()) {
|
for (COSName xobjName : resources.getXObjectNames()) {
|
||||||
@ -2685,7 +2779,6 @@ public class RedactionService {
|
|||||||
return totalMods;
|
return totalMods;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper classes
|
|
||||||
private record WidthMeasurement(float width, boolean valid) {
|
private record WidthMeasurement(float width, boolean valid) {
|
||||||
|
|
||||||
public static WidthMeasurement invalid() {
|
public static WidthMeasurement invalid() {
|
||||||
|
@ -4,6 +4,53 @@
|
|||||||
|
|
||||||
<head>
|
<head>
|
||||||
<th:block th:insert="~{fragments/common :: head(title=#{autoRedact.title}, header=#{autoRedact.header})}"></th:block>
|
<th:block th:insert="~{fragments/common :: head(title=#{autoRedact.title}, header=#{autoRedact.header})}"></th:block>
|
||||||
|
<style>
|
||||||
|
.redaction-options-group {
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.form-text.text-muted {
|
||||||
|
color: #6c757d !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-primary:focus {
|
||||||
|
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
|
||||||
|
outline: 2px solid #0d6efd;
|
||||||
|
outline-offset: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.form-check-input:focus {
|
||||||
|
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
|
||||||
|
outline: 2px solid #0d6efd;
|
||||||
|
outline-offset: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.form-control:focus, .form-select:focus {
|
||||||
|
border-color: #0d6efd;
|
||||||
|
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
|
||||||
|
outline: 2px solid #0d6efd;
|
||||||
|
outline-offset: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.form-check-input:checked {
|
||||||
|
background-color: #0d6efd;
|
||||||
|
border-color: #0d6efd;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.sr-only {
|
||||||
|
position: absolute;
|
||||||
|
width: 1px;
|
||||||
|
height: 1px;
|
||||||
|
padding: 0;
|
||||||
|
margin: -1px;
|
||||||
|
overflow: hidden;
|
||||||
|
clip: rect(0, 0, 0, 0);
|
||||||
|
white-space: nowrap;
|
||||||
|
border: 0;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
@ -18,35 +65,112 @@
|
|||||||
<svg class="material-symbols-rounded tool-header-icon security">
|
<svg class="material-symbols-rounded tool-header-icon security">
|
||||||
<use xlink:href="/images/redact-auto.svg#icon-redact-auto"></use>
|
<use xlink:href="/images/redact-auto.svg#icon-redact-auto"></use>
|
||||||
</svg>
|
</svg>
|
||||||
<span class="tool-header-text" th:text="#{autoRedact.header}"></span>
|
<span class="tool-header-text" id="form-title" th:text="#{autoRedact.header}"></span>
|
||||||
</div>
|
</div>
|
||||||
<form enctype="multipart/form-data" id="autoRedactForm" method="post" th:action="@{'api/v1/security/auto-redact'}">
|
<form aria-labelledby="form-title" enctype="multipart/form-data" id="autoRedactForm"
|
||||||
|
method="post" th:action="@{'api/v1/security/auto-redact'}">
|
||||||
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multipleInputsForSingleRequest=false, disableMultipleFiles=true, accept='application/pdf')}"></div>
|
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multipleInputsForSingleRequest=false, disableMultipleFiles=true, accept='application/pdf')}"></div>
|
||||||
|
|
||||||
<div class="mb-3">
|
<div class="mb-3">
|
||||||
<label for="listOfText" class="form-label" th:text="#{autoRedact.textsToRedactLabel}"></label>
|
<label class="form-label" for="listOfText" th:text="#{autoRedact.textsToRedactLabel}"></label>
|
||||||
<textarea class="form-control" id="listOfText" name="listOfText" rows="4" required
|
<textarea class="form-control" id="listOfText" name="listOfText" required rows="4"
|
||||||
th:placeholder="#{autoRedact.textsToRedactPlaceholder}"></textarea>
|
th:placeholder="#{autoRedact.textsToRedactPlaceholder}"></textarea>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="mb-3">
|
<div class="mb-3">
|
||||||
<label for="defaultColor" class="form-label" th:text="#{autoRedact.colorLabel}">Color</label>
|
<div class="form-check">
|
||||||
<select class="form-control" id="defaultColor" name="defaultColor"
|
<input class="form-check-input" id="useRegex" name="useRegex" type="checkbox">
|
||||||
onchange="handleColorChange(this.value)">
|
<label class="form-check-label" for="useRegex" th:text="#{autoRedact.useRegexLabel}"></label>
|
||||||
<option value="#000000" th:text="#{black}">Black</option>
|
</div>
|
||||||
<option value="#FFFFFF" th:text="#{white}">White</option>
|
<div class="form-check">
|
||||||
<option value="#FF0000" th:text="#{red}">Red</option>
|
<input class="form-check-input" id="wholeWordSearch" name="wholeWordSearch" type="checkbox">
|
||||||
<option value="#00FF00" th:text="#{green}">Green</option>
|
<label class="form-check-label" for="wholeWordSearch" th:text="#{autoRedact.wholeWordSearchLabel}"></label>
|
||||||
<option value="#0000FF" th:text="#{blue}">Blue</option>
|
</div>
|
||||||
<option value="custom" th:text="#{custom}">Custom...</option>
|
</div>
|
||||||
|
|
||||||
|
<div class="redaction-options-group">
|
||||||
|
<label class="form-label fw-bold mb-3">Redaction style</label>
|
||||||
|
<div class="form-check mb-2">
|
||||||
|
<input aria-describedby="visual-desc" class="form-check-input" id="visualImage" name="redactionMode" type="radio" value="visual">
|
||||||
|
<label class="form-check-label" for="visualImage">Visual</label>
|
||||||
|
<small class="form-text text-muted d-block mt-1" id="visual-desc">Converts to image with visual redactions for maximum security.</small>
|
||||||
|
</div>
|
||||||
|
<div class="form-check mb-2">
|
||||||
|
<input aria-describedby="delete-desc" class="form-check-input" id="deleteText" name="redactionMode" type="radio" value="aggressive">
|
||||||
|
<label class="form-check-label" for="deleteText">Delete Text</label>
|
||||||
|
<small class="form-text text-muted d-block mt-1" id="delete-desc">Removes the text completely. This may alter the original layout or leave a gap.</small>
|
||||||
|
</div>
|
||||||
|
<div class="form-check mb-3">
|
||||||
|
<input aria-describedby="keep-desc" checked class="form-check-input" id="keepLayout" name="redactionMode" type="radio" value="moderate">
|
||||||
|
<label class="form-check-label" for="keepLayout">Keep Layout</label>
|
||||||
|
<small class="form-text text-muted d-block mt-1" id="keep-desc">Covers text with a redaction box, preserving the page's original design.</small>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="form-check">
|
||||||
|
<input aria-describedby="guarantee-desc" class="form-check-input" id="guaranteeRedaction" name="convertPDFToImage" type="checkbox">
|
||||||
|
<label class="form-check-label" for="guaranteeRedaction">PDF image</label>
|
||||||
|
<small class="form-text text-muted d-block mt-1" id="guarantee-desc">For maximum security, uses an image-based method to ensure text is unrecoverable. May slightly affect document quality.</small>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<br>
|
||||||
|
|
||||||
|
<div class="mb-3">
|
||||||
|
<label class="form-label" for="defaultColor" th:text="#{autoRedact.colorLabel}"></label>
|
||||||
|
<select class="form-select" id="defaultColor" name="defaultColor" onchange="handleColorChange(this.value)">
|
||||||
|
<option th:text="#{black}" value="#000000">Black</option>
|
||||||
|
<option th:text="#{white}" value="#FFFFFF">White</option>
|
||||||
|
<option th:text="#{red}" value="#FF0000">Red</option>
|
||||||
|
<option th:text="#{green}" value="#00FF00">Green</option>
|
||||||
|
<option th:text="#{blue}" value="#0000FF">Blue</option>
|
||||||
|
<option th:text="#{custom}" value="custom">Custom...</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Custom Color Input -->
|
|
||||||
<div class="mb-3" id="customColorContainer" style="display: none;">
|
<div class="mb-3" id="customColorContainer" style="display: none;">
|
||||||
<label for="customColor" class="form-label" th:text="#{autoRedact.colorLabel}">Custom Color</label>
|
<label class="form-label" for="customColor">Custom Color (Hex)</label>
|
||||||
<input type="text" class="form-control" id="customColor" name="redactColor" placeholder="#FF00FF">
|
<input class="form-control" id="customColor" name="redactColor" placeholder="#FF00FF" type="text">
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="mb-3">
|
||||||
|
<label class="form-label" for="customPadding" th:text="#{autoRedact.customPaddingLabel}"></label>
|
||||||
|
<input class="form-control" id="customPadding" max="1" min="0" name="customPadding"
|
||||||
|
step="0.1" type="number" value="0.1">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<br>
|
||||||
|
|
||||||
|
<div class="mb-3">
|
||||||
|
<label class="form-label" for="ocrLanguage">OCR Language</label>
|
||||||
|
<select aria-describedby="ocr-desc" class="form-select" id="ocrLanguage" name="ocrLanguage">
|
||||||
|
<option value="eng">English</option>
|
||||||
|
<option value="spa">Spanish</option>
|
||||||
|
<option value="fra">French</option>
|
||||||
|
<option value="deu">German</option>
|
||||||
|
<option value="ita">Italian</option>
|
||||||
|
<option value="por">Portuguese</option>
|
||||||
|
<option value="rus">Russian</option>
|
||||||
|
<option value="ara">Arabic</option>
|
||||||
|
<option value="chi_sim">Chinese (Simplified)</option>
|
||||||
|
<option value="jpn">Japanese</option>
|
||||||
|
<option value="kor">Korean</option>
|
||||||
|
<option value="hin">Hindi</option>
|
||||||
|
</select>
|
||||||
|
<small class="form-text text-muted" id="ocr-desc">Used when OCR restoration is needed</small>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<input id="aggressiveMode" name="aggressiveMode" type="hidden" value="false">
|
||||||
|
|
||||||
|
<div class="mb-3 text-center">
|
||||||
|
<button class="btn btn-primary" id="submitBtn" th:text="#{autoRedact.submitButton}" type="submit"></button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
|
||||||
|
</div>
|
||||||
<script>
|
<script>
|
||||||
function handleColorChange(selectedValue) {
|
function handleColorChange(selectedValue) {
|
||||||
const container = document.getElementById('customColorContainer');
|
const container = document.getElementById('customColorContainer');
|
||||||
@ -63,74 +187,43 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
document.addEventListener('DOMContentLoaded', function () {
|
document.addEventListener('DOMContentLoaded', function () {
|
||||||
const redactionModeSelect = document.getElementById('redactionMode');
|
const redactionModeRadios = document.querySelectorAll('input[name="redactionMode"]');
|
||||||
const aggressiveModeHidden = document.getElementById('aggressiveMode');
|
const aggressiveModeHidden = document.getElementById('aggressiveMode');
|
||||||
const convertToImageCheckbox = document.getElementById('convertPDFToImage');
|
const guaranteeRedactionCheckbox = document.getElementById('guaranteeRedaction');
|
||||||
const defaultColor = document.getElementById('defaultColor');
|
const defaultColor = document.getElementById('defaultColor');
|
||||||
|
|
||||||
redactionModeSelect.addEventListener('change', function () {
|
function updateMode() {
|
||||||
const mode = redactionModeSelect.value;
|
const selectedMode = document.querySelector('input[name="redactionMode"]:checked');
|
||||||
aggressiveModeHidden.value = (mode === 'aggressive') ? 'true' : 'false';
|
if (selectedMode) {
|
||||||
if (mode === 'visual') {
|
// Set aggressive mode for delete text option
|
||||||
convertToImageCheckbox.checked = true;
|
aggressiveModeHidden.value = selectedMode.value === 'aggressive' ? 'true' : 'false';
|
||||||
|
|
||||||
|
// Handle PDF image checkbox based on selection
|
||||||
|
if (selectedMode.value === 'visual') {
|
||||||
|
// Visual mode automatically enables PDF image for maximum security
|
||||||
|
guaranteeRedactionCheckbox.checked = true;
|
||||||
|
} else {
|
||||||
|
// Delete Text and Keep Layout modes disable PDF image
|
||||||
|
guaranteeRedactionCheckbox.checked = false;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
redactionModeRadios.forEach(radio => {
|
||||||
|
radio.addEventListener('change', updateMode);
|
||||||
});
|
});
|
||||||
|
|
||||||
if (defaultColor) {
|
if (defaultColor) {
|
||||||
handleColorChange(defaultColor.value);
|
handleColorChange(defaultColor.value);
|
||||||
|
// Set initial value for customColor input when a pre-defined color is selected
|
||||||
|
const customColorInput = document.getElementById('customColor');
|
||||||
|
if (defaultColor.value !== 'custom') {
|
||||||
|
customColorInput.value = defaultColor.value;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
aggressiveModeHidden.value = (redactionModeSelect.value === 'aggressive') ? 'true' : 'false';
|
updateMode();
|
||||||
if (redactionModeSelect.value === 'visual') {
|
|
||||||
convertToImageCheckbox.checked = true;
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
<div class="mb-3 form-check">
|
|
||||||
<input type="checkbox" id="useRegex" name="useRegex">
|
|
||||||
<label for="useRegex" th:text="#{autoRedact.useRegexLabel}"></label>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="mb-3 form-check">
|
|
||||||
<input type="checkbox" id="wholeWordSearch" name="wholeWordSearch">
|
|
||||||
<label for="wholeWordSearch" th:text="#{autoRedact.wholeWordSearchLabel}"></label>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="mb-3">
|
|
||||||
<label for="customPadding" class="form-label" th:text="#{autoRedact.customPaddingLabel}"></label>
|
|
||||||
<input type="number" step="0.1" class="form-control" id="customPadding" name="customPadding"
|
|
||||||
value="0.1">
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="mb-3 form-check">
|
|
||||||
<input id="convertPDFToImage" name="convertPDFToImage" type="checkbox">
|
|
||||||
<label for="convertPDFToImage" th:text="#{autoRedact.convertPDFToImageLabel}"></label>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="mb-3">
|
|
||||||
<label class="form-label" for="redactionMode" th:text="#{autoRedact.redactionModeLabel}">Redaction Mode</label>
|
|
||||||
<select class="form-control" id="redactionMode" name="redactionMode">
|
|
||||||
<option th:text="#{autoRedact.redactionMode.moderate}" value="moderate">Moderate - Smart text removal with
|
|
||||||
fallback
|
|
||||||
</option>
|
|
||||||
<option th:text="#{autoRedact.redactionMode.visual}" value="visual">Visual - Black boxes only</option>
|
|
||||||
<option th:text="#{autoRedact.redactionMode.aggressive}" value="aggressive">Aggressive - Force text removal
|
|
||||||
</option>
|
|
||||||
</select>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- Keep for backward compatibility -->
|
|
||||||
<input id="aggressiveMode" name="aggressiveMode" type="hidden" value="false">
|
|
||||||
|
|
||||||
<button type="submit" id="submitBtn" class="btn btn-primary"
|
|
||||||
th:text="#{autoRedact.submitButton}"></button>
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
|
|
||||||
</div>
|
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
Loading…
Reference in New Issue
Block a user