mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
Add multiple redaction strategies for PDF processing
Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
d23c2eaa30
commit
39a2b5054e
File diff suppressed because it is too large
Load Diff
@ -46,4 +46,11 @@ public class RedactPdfRequest extends PDFFile {
|
||||
defaultValue = "false",
|
||||
requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private Boolean convertPDFToImage;
|
||||
|
||||
@Schema(
|
||||
description = "Redaction mode: moderate, visual, or aggressive",
|
||||
defaultValue = "moderate",
|
||||
allowableValues = {"moderate", "visual", "aggressive"},
|
||||
requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||
private String redactionMode;
|
||||
}
|
||||
|
@ -0,0 +1,85 @@
|
||||
package stirling.software.SPDF.service;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
|
||||
import stirling.software.SPDF.model.PDFText;
|
||||
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
|
||||
class AggressiveRedactionService implements RedactionModeStrategy {
|
||||
|
||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
private final RedactionService helper;
|
||||
|
||||
AggressiveRedactionService(
|
||||
CustomPDFDocumentFactory pdfDocumentFactory, RedactionService helper) {
|
||||
this.pdfDocumentFactory = pdfDocumentFactory;
|
||||
this.helper = helper;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] redact(RedactPdfRequest request) throws IOException {
|
||||
String[] listOfText = request.getListOfText().split("\n");
|
||||
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
|
||||
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
|
||||
|
||||
PDDocument doc = null;
|
||||
PDDocument fb = null;
|
||||
try {
|
||||
doc = pdfDocumentFactory.load(request.getFileInput());
|
||||
Map<Integer, List<PDFText>> allFound =
|
||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||
if (allFound.isEmpty()) {
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
doc.save(baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
}
|
||||
helper.performTextReplacementAggressive(doc, allFound, listOfText, useRegex, wholeWord);
|
||||
Map<Integer, List<PDFText>> residual =
|
||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||
boolean residualExists = residual.values().stream().mapToInt(List::size).sum() > 0;
|
||||
String effectiveColor =
|
||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||
? "#000000"
|
||||
: request.getRedactColor();
|
||||
if (residualExists) {
|
||||
fb = pdfDocumentFactory.load(request.getFileInput());
|
||||
Map<Integer, List<PDFText>> fbFound =
|
||||
RedactionService.findTextToRedact(fb, listOfText, useRegex, wholeWord);
|
||||
return RedactionService.finalizeRedaction(
|
||||
fb,
|
||||
fbFound,
|
||||
effectiveColor,
|
||||
request.getCustomPadding(), /*force*/
|
||||
true,
|
||||
false);
|
||||
}
|
||||
return RedactionService.finalizeRedaction(
|
||||
doc,
|
||||
allFound,
|
||||
request.getRedactColor(),
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(), /*text removal*/
|
||||
true);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Aggressive redaction failed: " + e.getMessage(), e);
|
||||
} finally {
|
||||
if (doc != null)
|
||||
try {
|
||||
doc.close();
|
||||
} catch (IOException ignore) {
|
||||
}
|
||||
if (fb != null)
|
||||
try {
|
||||
fb.close();
|
||||
} catch (IOException ignore) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,83 @@
|
||||
package stirling.software.SPDF.service;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
|
||||
import stirling.software.SPDF.model.PDFText;
|
||||
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
|
||||
class ModerateRedactionService implements RedactionModeStrategy {
|
||||
|
||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
private final RedactionService helper;
|
||||
|
||||
ModerateRedactionService(CustomPDFDocumentFactory pdfDocumentFactory, RedactionService helper) {
|
||||
this.pdfDocumentFactory = pdfDocumentFactory;
|
||||
this.helper = helper;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] redact(RedactPdfRequest request) throws IOException {
|
||||
String[] listOfText = request.getListOfText().split("\n");
|
||||
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
|
||||
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
|
||||
|
||||
PDDocument doc = null;
|
||||
PDDocument fallback = null;
|
||||
try {
|
||||
doc = pdfDocumentFactory.load(request.getFileInput());
|
||||
Map<Integer, List<PDFText>> allFound =
|
||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||
if (allFound.isEmpty()) {
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
doc.save(baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
}
|
||||
boolean fallbackToBoxOnly =
|
||||
helper.performTextReplacement(doc, allFound, listOfText, useRegex, wholeWord);
|
||||
String effectiveColor =
|
||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||
? "#000000"
|
||||
: request.getRedactColor();
|
||||
if (fallbackToBoxOnly) {
|
||||
fallback = pdfDocumentFactory.load(request.getFileInput());
|
||||
allFound =
|
||||
RedactionService.findTextToRedact(
|
||||
fallback, listOfText, useRegex, wholeWord);
|
||||
return RedactionService.finalizeRedaction(
|
||||
fallback,
|
||||
allFound,
|
||||
effectiveColor,
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(),
|
||||
false);
|
||||
}
|
||||
return RedactionService.finalizeRedaction(
|
||||
doc,
|
||||
allFound,
|
||||
effectiveColor,
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(),
|
||||
false);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Moderate redaction failed: " + e.getMessage(), e);
|
||||
} finally {
|
||||
if (doc != null)
|
||||
try {
|
||||
doc.close();
|
||||
} catch (IOException ignore) {
|
||||
}
|
||||
if (fallback != null)
|
||||
try {
|
||||
fallback.close();
|
||||
} catch (IOException ignore) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
package stirling.software.SPDF.service;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
||||
|
||||
public interface RedactionModeStrategy {
|
||||
byte[] redact(RedactPdfRequest request) throws IOException;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,50 @@
|
||||
package stirling.software.SPDF.service;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
|
||||
import stirling.software.SPDF.model.PDFText;
|
||||
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
|
||||
class VisualRedactionService implements RedactionModeStrategy {
|
||||
|
||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
|
||||
VisualRedactionService(CustomPDFDocumentFactory pdfDocumentFactory, RedactionService helper) {
|
||||
this.pdfDocumentFactory = pdfDocumentFactory;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] redact(RedactPdfRequest request) throws IOException {
|
||||
String[] listOfText = request.getListOfText().split("\n");
|
||||
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
|
||||
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
|
||||
|
||||
try (PDDocument document = pdfDocumentFactory.load(request.getFileInput())) {
|
||||
Map<Integer, List<PDFText>> allFound =
|
||||
RedactionService.findTextToRedact(document, listOfText, useRegex, wholeWord);
|
||||
if (allFound.isEmpty()) {
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
document.save(baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
}
|
||||
String effectiveColor =
|
||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||
? "#000000"
|
||||
: request.getRedactColor();
|
||||
return RedactionService.finalizeRedaction(
|
||||
document,
|
||||
allFound,
|
||||
effectiveColor,
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(),
|
||||
false);
|
||||
}
|
||||
}
|
||||
}
|
@ -20,7 +20,7 @@
|
||||
</svg>
|
||||
<span class="tool-header-text" th:text="#{autoRedact.header}"></span>
|
||||
</div>
|
||||
<form th:action="@{'api/v1/security/auto-redact'}" method="post" enctype="multipart/form-data">
|
||||
<form enctype="multipart/form-data" id="autoRedactForm" method="post" th:action="@{'api/v1/security/auto-redact'}">
|
||||
<div class="mb-3">
|
||||
<input type="file" class="form-control" id="fileInput" name="fileInput" required
|
||||
accept="application/pdf">
|
||||
@ -53,13 +53,42 @@
|
||||
|
||||
<script>
|
||||
function handleColorChange(selectedValue) {
|
||||
const container = document.getElementById('customColorContainer');
|
||||
const input = document.getElementById('customColor');
|
||||
if (selectedValue === "custom") {
|
||||
document.getElementById('customColorContainer').style.display = 'block';
|
||||
container.style.display = 'block';
|
||||
if (!input.value) {
|
||||
input.value = '#000000';
|
||||
}
|
||||
} else {
|
||||
document.getElementById('customColorContainer').style.display = 'none';
|
||||
document.getElementById('customColor').value = selectedValue;
|
||||
container.style.display = 'none';
|
||||
input.value = selectedValue;
|
||||
}
|
||||
}
|
||||
|
||||
document.addEventListener('DOMContentLoaded', function () {
|
||||
const redactionModeSelect = document.getElementById('redactionMode');
|
||||
const aggressiveModeHidden = document.getElementById('aggressiveMode');
|
||||
const convertToImageCheckbox = document.getElementById('convertPDFToImage');
|
||||
const defaultColor = document.getElementById('defaultColor');
|
||||
|
||||
redactionModeSelect.addEventListener('change', function () {
|
||||
const mode = redactionModeSelect.value;
|
||||
aggressiveModeHidden.value = (mode === 'aggressive') ? 'true' : 'false';
|
||||
if (mode === 'visual') {
|
||||
convertToImageCheckbox.checked = true;
|
||||
}
|
||||
});
|
||||
|
||||
if (defaultColor) {
|
||||
handleColorChange(defaultColor.value);
|
||||
}
|
||||
|
||||
aggressiveModeHidden.value = (redactionModeSelect.value === 'aggressive') ? 'true' : 'false';
|
||||
if (redactionModeSelect.value === 'visual') {
|
||||
convertToImageCheckbox.checked = true;
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<div class="mb-3 form-check">
|
||||
<input type="checkbox" id="useRegex" name="useRegex">
|
||||
@ -82,6 +111,21 @@
|
||||
<label for="convertPDFToImage" th:text="#{autoRedact.convertPDFToImageLabel}"></label>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label" for="redactionMode" th:text="#{autoRedact.redactionModeLabel}">Redaction Mode</label>
|
||||
<select class="form-control" id="redactionMode" name="redactionMode">
|
||||
<option th:text="#{autoRedact.redactionMode.moderate}" value="moderate">Moderate - Smart text removal with
|
||||
fallback
|
||||
</option>
|
||||
<option th:text="#{autoRedact.redactionMode.visual}" value="visual">Visual - Black boxes only</option>
|
||||
<option th:text="#{autoRedact.redactionMode.aggressive}" value="aggressive">Aggressive - Force text removal
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<!-- Keep for backward compatibility -->
|
||||
<input id="aggressiveMode" name="aggressiveMode" type="hidden" value="false">
|
||||
|
||||
<button type="submit" id="submitBtn" class="btn btn-primary"
|
||||
th:text="#{autoRedact.submitButton}"></button>
|
||||
</form>
|
||||
|
Loading…
Reference in New Issue
Block a user