mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
Add multiple redaction strategies for PDF processing
Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
d23c2eaa30
commit
39a2b5054e
File diff suppressed because it is too large
Load Diff
@ -46,4 +46,11 @@ public class RedactPdfRequest extends PDFFile {
|
|||||||
defaultValue = "false",
|
defaultValue = "false",
|
||||||
requiredMode = Schema.RequiredMode.REQUIRED)
|
requiredMode = Schema.RequiredMode.REQUIRED)
|
||||||
private Boolean convertPDFToImage;
|
private Boolean convertPDFToImage;
|
||||||
|
|
||||||
|
@Schema(
|
||||||
|
description = "Redaction mode: moderate, visual, or aggressive",
|
||||||
|
defaultValue = "moderate",
|
||||||
|
allowableValues = {"moderate", "visual", "aggressive"},
|
||||||
|
requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||||
|
private String redactionMode;
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,85 @@
|
|||||||
|
package stirling.software.SPDF.service;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
|
||||||
|
import stirling.software.SPDF.model.PDFText;
|
||||||
|
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
||||||
|
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||||
|
|
||||||
|
class AggressiveRedactionService implements RedactionModeStrategy {
|
||||||
|
|
||||||
|
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||||
|
private final RedactionService helper;
|
||||||
|
|
||||||
|
AggressiveRedactionService(
|
||||||
|
CustomPDFDocumentFactory pdfDocumentFactory, RedactionService helper) {
|
||||||
|
this.pdfDocumentFactory = pdfDocumentFactory;
|
||||||
|
this.helper = helper;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] redact(RedactPdfRequest request) throws IOException {
|
||||||
|
String[] listOfText = request.getListOfText().split("\n");
|
||||||
|
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
|
||||||
|
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
|
||||||
|
|
||||||
|
PDDocument doc = null;
|
||||||
|
PDDocument fb = null;
|
||||||
|
try {
|
||||||
|
doc = pdfDocumentFactory.load(request.getFileInput());
|
||||||
|
Map<Integer, List<PDFText>> allFound =
|
||||||
|
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||||
|
if (allFound.isEmpty()) {
|
||||||
|
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||||
|
doc.save(baos);
|
||||||
|
return baos.toByteArray();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
helper.performTextReplacementAggressive(doc, allFound, listOfText, useRegex, wholeWord);
|
||||||
|
Map<Integer, List<PDFText>> residual =
|
||||||
|
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||||
|
boolean residualExists = residual.values().stream().mapToInt(List::size).sum() > 0;
|
||||||
|
String effectiveColor =
|
||||||
|
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||||
|
? "#000000"
|
||||||
|
: request.getRedactColor();
|
||||||
|
if (residualExists) {
|
||||||
|
fb = pdfDocumentFactory.load(request.getFileInput());
|
||||||
|
Map<Integer, List<PDFText>> fbFound =
|
||||||
|
RedactionService.findTextToRedact(fb, listOfText, useRegex, wholeWord);
|
||||||
|
return RedactionService.finalizeRedaction(
|
||||||
|
fb,
|
||||||
|
fbFound,
|
||||||
|
effectiveColor,
|
||||||
|
request.getCustomPadding(), /*force*/
|
||||||
|
true,
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
return RedactionService.finalizeRedaction(
|
||||||
|
doc,
|
||||||
|
allFound,
|
||||||
|
request.getRedactColor(),
|
||||||
|
request.getCustomPadding(),
|
||||||
|
request.getConvertPDFToImage(), /*text removal*/
|
||||||
|
true);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IOException("Aggressive redaction failed: " + e.getMessage(), e);
|
||||||
|
} finally {
|
||||||
|
if (doc != null)
|
||||||
|
try {
|
||||||
|
doc.close();
|
||||||
|
} catch (IOException ignore) {
|
||||||
|
}
|
||||||
|
if (fb != null)
|
||||||
|
try {
|
||||||
|
fb.close();
|
||||||
|
} catch (IOException ignore) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,83 @@
|
|||||||
|
package stirling.software.SPDF.service;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
|
||||||
|
import stirling.software.SPDF.model.PDFText;
|
||||||
|
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
||||||
|
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||||
|
|
||||||
|
class ModerateRedactionService implements RedactionModeStrategy {
|
||||||
|
|
||||||
|
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||||
|
private final RedactionService helper;
|
||||||
|
|
||||||
|
ModerateRedactionService(CustomPDFDocumentFactory pdfDocumentFactory, RedactionService helper) {
|
||||||
|
this.pdfDocumentFactory = pdfDocumentFactory;
|
||||||
|
this.helper = helper;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] redact(RedactPdfRequest request) throws IOException {
|
||||||
|
String[] listOfText = request.getListOfText().split("\n");
|
||||||
|
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
|
||||||
|
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
|
||||||
|
|
||||||
|
PDDocument doc = null;
|
||||||
|
PDDocument fallback = null;
|
||||||
|
try {
|
||||||
|
doc = pdfDocumentFactory.load(request.getFileInput());
|
||||||
|
Map<Integer, List<PDFText>> allFound =
|
||||||
|
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||||
|
if (allFound.isEmpty()) {
|
||||||
|
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||||
|
doc.save(baos);
|
||||||
|
return baos.toByteArray();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
boolean fallbackToBoxOnly =
|
||||||
|
helper.performTextReplacement(doc, allFound, listOfText, useRegex, wholeWord);
|
||||||
|
String effectiveColor =
|
||||||
|
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||||
|
? "#000000"
|
||||||
|
: request.getRedactColor();
|
||||||
|
if (fallbackToBoxOnly) {
|
||||||
|
fallback = pdfDocumentFactory.load(request.getFileInput());
|
||||||
|
allFound =
|
||||||
|
RedactionService.findTextToRedact(
|
||||||
|
fallback, listOfText, useRegex, wholeWord);
|
||||||
|
return RedactionService.finalizeRedaction(
|
||||||
|
fallback,
|
||||||
|
allFound,
|
||||||
|
effectiveColor,
|
||||||
|
request.getCustomPadding(),
|
||||||
|
request.getConvertPDFToImage(),
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
return RedactionService.finalizeRedaction(
|
||||||
|
doc,
|
||||||
|
allFound,
|
||||||
|
effectiveColor,
|
||||||
|
request.getCustomPadding(),
|
||||||
|
request.getConvertPDFToImage(),
|
||||||
|
false);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IOException("Moderate redaction failed: " + e.getMessage(), e);
|
||||||
|
} finally {
|
||||||
|
if (doc != null)
|
||||||
|
try {
|
||||||
|
doc.close();
|
||||||
|
} catch (IOException ignore) {
|
||||||
|
}
|
||||||
|
if (fallback != null)
|
||||||
|
try {
|
||||||
|
fallback.close();
|
||||||
|
} catch (IOException ignore) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,9 @@
|
|||||||
|
package stirling.software.SPDF.service;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
||||||
|
|
||||||
|
public interface RedactionModeStrategy {
|
||||||
|
byte[] redact(RedactPdfRequest request) throws IOException;
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,50 @@
|
|||||||
|
package stirling.software.SPDF.service;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
|
||||||
|
import stirling.software.SPDF.model.PDFText;
|
||||||
|
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
||||||
|
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||||
|
|
||||||
|
class VisualRedactionService implements RedactionModeStrategy {
|
||||||
|
|
||||||
|
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||||
|
|
||||||
|
VisualRedactionService(CustomPDFDocumentFactory pdfDocumentFactory, RedactionService helper) {
|
||||||
|
this.pdfDocumentFactory = pdfDocumentFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] redact(RedactPdfRequest request) throws IOException {
|
||||||
|
String[] listOfText = request.getListOfText().split("\n");
|
||||||
|
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
|
||||||
|
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
|
||||||
|
|
||||||
|
try (PDDocument document = pdfDocumentFactory.load(request.getFileInput())) {
|
||||||
|
Map<Integer, List<PDFText>> allFound =
|
||||||
|
RedactionService.findTextToRedact(document, listOfText, useRegex, wholeWord);
|
||||||
|
if (allFound.isEmpty()) {
|
||||||
|
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||||
|
document.save(baos);
|
||||||
|
return baos.toByteArray();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String effectiveColor =
|
||||||
|
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||||
|
? "#000000"
|
||||||
|
: request.getRedactColor();
|
||||||
|
return RedactionService.finalizeRedaction(
|
||||||
|
document,
|
||||||
|
allFound,
|
||||||
|
effectiveColor,
|
||||||
|
request.getCustomPadding(),
|
||||||
|
request.getConvertPDFToImage(),
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -20,7 +20,7 @@
|
|||||||
</svg>
|
</svg>
|
||||||
<span class="tool-header-text" th:text="#{autoRedact.header}"></span>
|
<span class="tool-header-text" th:text="#{autoRedact.header}"></span>
|
||||||
</div>
|
</div>
|
||||||
<form th:action="@{'api/v1/security/auto-redact'}" method="post" enctype="multipart/form-data">
|
<form enctype="multipart/form-data" id="autoRedactForm" method="post" th:action="@{'api/v1/security/auto-redact'}">
|
||||||
<div class="mb-3">
|
<div class="mb-3">
|
||||||
<input type="file" class="form-control" id="fileInput" name="fileInput" required
|
<input type="file" class="form-control" id="fileInput" name="fileInput" required
|
||||||
accept="application/pdf">
|
accept="application/pdf">
|
||||||
@ -53,13 +53,42 @@
|
|||||||
|
|
||||||
<script>
|
<script>
|
||||||
function handleColorChange(selectedValue) {
|
function handleColorChange(selectedValue) {
|
||||||
|
const container = document.getElementById('customColorContainer');
|
||||||
|
const input = document.getElementById('customColor');
|
||||||
if (selectedValue === "custom") {
|
if (selectedValue === "custom") {
|
||||||
document.getElementById('customColorContainer').style.display = 'block';
|
container.style.display = 'block';
|
||||||
|
if (!input.value) {
|
||||||
|
input.value = '#000000';
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
document.getElementById('customColorContainer').style.display = 'none';
|
container.style.display = 'none';
|
||||||
document.getElementById('customColor').value = selectedValue;
|
input.value = selectedValue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
document.addEventListener('DOMContentLoaded', function () {
|
||||||
|
const redactionModeSelect = document.getElementById('redactionMode');
|
||||||
|
const aggressiveModeHidden = document.getElementById('aggressiveMode');
|
||||||
|
const convertToImageCheckbox = document.getElementById('convertPDFToImage');
|
||||||
|
const defaultColor = document.getElementById('defaultColor');
|
||||||
|
|
||||||
|
redactionModeSelect.addEventListener('change', function () {
|
||||||
|
const mode = redactionModeSelect.value;
|
||||||
|
aggressiveModeHidden.value = (mode === 'aggressive') ? 'true' : 'false';
|
||||||
|
if (mode === 'visual') {
|
||||||
|
convertToImageCheckbox.checked = true;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (defaultColor) {
|
||||||
|
handleColorChange(defaultColor.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
aggressiveModeHidden.value = (redactionModeSelect.value === 'aggressive') ? 'true' : 'false';
|
||||||
|
if (redactionModeSelect.value === 'visual') {
|
||||||
|
convertToImageCheckbox.checked = true;
|
||||||
|
}
|
||||||
|
});
|
||||||
</script>
|
</script>
|
||||||
<div class="mb-3 form-check">
|
<div class="mb-3 form-check">
|
||||||
<input type="checkbox" id="useRegex" name="useRegex">
|
<input type="checkbox" id="useRegex" name="useRegex">
|
||||||
@ -82,6 +111,21 @@
|
|||||||
<label for="convertPDFToImage" th:text="#{autoRedact.convertPDFToImageLabel}"></label>
|
<label for="convertPDFToImage" th:text="#{autoRedact.convertPDFToImageLabel}"></label>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="mb-3">
|
||||||
|
<label class="form-label" for="redactionMode" th:text="#{autoRedact.redactionModeLabel}">Redaction Mode</label>
|
||||||
|
<select class="form-control" id="redactionMode" name="redactionMode">
|
||||||
|
<option th:text="#{autoRedact.redactionMode.moderate}" value="moderate">Moderate - Smart text removal with
|
||||||
|
fallback
|
||||||
|
</option>
|
||||||
|
<option th:text="#{autoRedact.redactionMode.visual}" value="visual">Visual - Black boxes only</option>
|
||||||
|
<option th:text="#{autoRedact.redactionMode.aggressive}" value="aggressive">Aggressive - Force text removal
|
||||||
|
</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Keep for backward compatibility -->
|
||||||
|
<input id="aggressiveMode" name="aggressiveMode" type="hidden" value="false">
|
||||||
|
|
||||||
<button type="submit" id="submitBtn" class="btn btn-primary"
|
<button type="submit" id="submitBtn" class="btn btn-primary"
|
||||||
th:text="#{autoRedact.submitButton}"></button>
|
th:text="#{autoRedact.submitButton}"></button>
|
||||||
</form>
|
</form>
|
||||||
|
Loading…
Reference in New Issue
Block a user