mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
Refactor redaction services and utilities for improved readability and maintainability
- Adjusted indentation and formatting across multiple files for consistency. - Improved imports ordering in utility classes for better organization. - Enhanced `performTextReplacementAggressive` method with multi-sweep logic to handle residual text more effectively. - Added helper methods for verifying document text targets to streamline aggressive redaction. - Simplified logic and formatting in `RedactionService` and related classes. Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
8f19369c58
commit
1fac74a3ca
@ -53,12 +53,12 @@ public class RedactController {
|
||||
throws IOException {
|
||||
byte[] pdfContent = redactionService.redactPDF(request);
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
pdfContent,
|
||||
removeFileExtension(
|
||||
Objects.requireNonNull(
|
||||
Filenames.toSimpleFileName(
|
||||
request.getFileInput().getOriginalFilename())))
|
||||
+ "_redacted.pdf");
|
||||
pdfContent,
|
||||
removeFileExtension(
|
||||
Objects.requireNonNull(
|
||||
Filenames.toSimpleFileName(
|
||||
request.getFileInput().getOriginalFilename())))
|
||||
+ "_redacted.pdf");
|
||||
}
|
||||
|
||||
@PostMapping(value = "/auto-redact", consumes = "multipart/form-data")
|
||||
@ -69,14 +69,14 @@ public class RedactController {
|
||||
+ "Users can provide text patterns to redact, with options for regex and whole word matching. "
|
||||
+ "Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> redactPdf(@ModelAttribute RedactPdfRequest request)
|
||||
throws IOException {
|
||||
throws IOException {
|
||||
byte[] pdfContent = redactionService.redactPdf(request);
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
pdfContent,
|
||||
removeFileExtension(
|
||||
Objects.requireNonNull(
|
||||
Filenames.toSimpleFileName(
|
||||
request.getFileInput().getOriginalFilename())))
|
||||
+ "_redacted.pdf");
|
||||
pdfContent,
|
||||
removeFileExtension(
|
||||
Objects.requireNonNull(
|
||||
Filenames.toSimpleFileName(
|
||||
request.getFileInput().getOriginalFilename())))
|
||||
+ "_redacted.pdf");
|
||||
}
|
||||
}
|
||||
|
@ -48,9 +48,9 @@ public class RedactPdfRequest extends PDFFile {
|
||||
private Boolean convertPDFToImage;
|
||||
|
||||
@Schema(
|
||||
description = "Redaction mode: moderate, visual, or aggressive",
|
||||
defaultValue = "moderate",
|
||||
allowableValues = {"moderate", "visual", "aggressive"},
|
||||
requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||
description = "Redaction mode: moderate, visual, or aggressive",
|
||||
defaultValue = "moderate",
|
||||
allowableValues = {"moderate", "visual", "aggressive"},
|
||||
requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||
private String redactionMode;
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ class AggressiveRedactionService implements RedactionModeStrategy {
|
||||
private final RedactionService helper;
|
||||
|
||||
AggressiveRedactionService(
|
||||
CustomPDFDocumentFactory pdfDocumentFactory, RedactionService helper) {
|
||||
CustomPDFDocumentFactory pdfDocumentFactory, RedactionService helper) {
|
||||
this.pdfDocumentFactory = pdfDocumentFactory;
|
||||
this.helper = helper;
|
||||
}
|
||||
@ -33,7 +33,7 @@ class AggressiveRedactionService implements RedactionModeStrategy {
|
||||
try {
|
||||
doc = pdfDocumentFactory.load(request.getFileInput());
|
||||
Map<Integer, List<PDFText>> allFound =
|
||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||
if (allFound.isEmpty()) {
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
doc.save(baos);
|
||||
@ -42,31 +42,31 @@ class AggressiveRedactionService implements RedactionModeStrategy {
|
||||
}
|
||||
helper.performTextReplacementAggressive(doc, allFound, listOfText, useRegex, wholeWord);
|
||||
Map<Integer, List<PDFText>> residual =
|
||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||
boolean residualExists = residual.values().stream().mapToInt(List::size).sum() > 0;
|
||||
String effectiveColor =
|
||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||
? "#000000"
|
||||
: request.getRedactColor();
|
||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||
? "#000000"
|
||||
: request.getRedactColor();
|
||||
if (residualExists) {
|
||||
fb = pdfDocumentFactory.load(request.getFileInput());
|
||||
Map<Integer, List<PDFText>> fbFound =
|
||||
RedactionService.findTextToRedact(fb, listOfText, useRegex, wholeWord);
|
||||
RedactionService.findTextToRedact(fb, listOfText, useRegex, wholeWord);
|
||||
return RedactionService.finalizeRedaction(
|
||||
fb,
|
||||
fbFound,
|
||||
effectiveColor,
|
||||
request.getCustomPadding(), /*force*/
|
||||
true,
|
||||
false);
|
||||
fb,
|
||||
fbFound,
|
||||
effectiveColor,
|
||||
request.getCustomPadding(), /*force*/
|
||||
true,
|
||||
false);
|
||||
}
|
||||
return RedactionService.finalizeRedaction(
|
||||
doc,
|
||||
allFound,
|
||||
request.getRedactColor(),
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(), /*text removal*/
|
||||
true);
|
||||
doc,
|
||||
allFound,
|
||||
request.getRedactColor(),
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(), /*text removal*/
|
||||
true);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Aggressive redaction failed: " + e.getMessage(), e);
|
||||
} finally {
|
||||
|
@ -32,7 +32,7 @@ class ModerateRedactionService implements RedactionModeStrategy {
|
||||
try {
|
||||
doc = pdfDocumentFactory.load(request.getFileInput());
|
||||
Map<Integer, List<PDFText>> allFound =
|
||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||
if (allFound.isEmpty()) {
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
doc.save(baos);
|
||||
@ -40,31 +40,31 @@ class ModerateRedactionService implements RedactionModeStrategy {
|
||||
}
|
||||
}
|
||||
boolean fallbackToBoxOnly =
|
||||
helper.performTextReplacement(doc, allFound, listOfText, useRegex, wholeWord);
|
||||
helper.performTextReplacement(doc, allFound, listOfText, useRegex, wholeWord);
|
||||
String effectiveColor =
|
||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||
? "#000000"
|
||||
: request.getRedactColor();
|
||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||
? "#000000"
|
||||
: request.getRedactColor();
|
||||
if (fallbackToBoxOnly) {
|
||||
fallback = pdfDocumentFactory.load(request.getFileInput());
|
||||
allFound =
|
||||
RedactionService.findTextToRedact(
|
||||
fallback, listOfText, useRegex, wholeWord);
|
||||
RedactionService.findTextToRedact(
|
||||
fallback, listOfText, useRegex, wholeWord);
|
||||
return RedactionService.finalizeRedaction(
|
||||
fallback,
|
||||
fallback,
|
||||
allFound,
|
||||
effectiveColor,
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(),
|
||||
false);
|
||||
}
|
||||
return RedactionService.finalizeRedaction(
|
||||
doc,
|
||||
allFound,
|
||||
effectiveColor,
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(),
|
||||
false);
|
||||
}
|
||||
return RedactionService.finalizeRedaction(
|
||||
doc,
|
||||
allFound,
|
||||
effectiveColor,
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(),
|
||||
false);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Moderate redaction failed: " + e.getMessage(), e);
|
||||
} finally {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -27,7 +27,7 @@ class VisualRedactionService implements RedactionModeStrategy {
|
||||
|
||||
try (PDDocument document = pdfDocumentFactory.load(request.getFileInput())) {
|
||||
Map<Integer, List<PDFText>> allFound =
|
||||
RedactionService.findTextToRedact(document, listOfText, useRegex, wholeWord);
|
||||
RedactionService.findTextToRedact(document, listOfText, useRegex, wholeWord);
|
||||
if (allFound.isEmpty()) {
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
document.save(baos);
|
||||
@ -35,16 +35,16 @@ class VisualRedactionService implements RedactionModeStrategy {
|
||||
}
|
||||
}
|
||||
String effectiveColor =
|
||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||
? "#000000"
|
||||
: request.getRedactColor();
|
||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||
? "#000000"
|
||||
: request.getRedactColor();
|
||||
return RedactionService.finalizeRedaction(
|
||||
document,
|
||||
allFound,
|
||||
effectiveColor,
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(),
|
||||
false);
|
||||
document,
|
||||
allFound,
|
||||
effectiveColor,
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(),
|
||||
false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,17 +1,17 @@
|
||||
package stirling.software.SPDF.utils.text;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
import org.apache.pdfbox.cos.COSString;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.pdfbox.cos.COSString;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@UtilityClass
|
||||
public class TextDecodingHelper {
|
||||
@ -34,8 +34,8 @@ public class TextDecodingHelper {
|
||||
|
||||
String basicDecoded = tryDecodeWithFont(font, cosString);
|
||||
if (basicDecoded != null
|
||||
&& !basicDecoded.contains("?")
|
||||
&& !basicDecoded.trim().isEmpty()) {
|
||||
&& !basicDecoded.contains("?")
|
||||
&& !basicDecoded.trim().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -89,8 +89,7 @@ public class TextDecodingHelper {
|
||||
} catch (Exception ignored) {
|
||||
}
|
||||
|
||||
if (charStr == null
|
||||
&& font instanceof PDType0Font type0Font) {
|
||||
if (charStr == null && font instanceof PDType0Font type0Font) {
|
||||
try {
|
||||
int cid = (bytes.length > 1) ? ((bytes[0] & 0xFF) << 8) | (bytes[1] & 0xFF) : code;
|
||||
charStr = type0Font.toUnicode(cid);
|
||||
@ -129,12 +128,12 @@ public class TextDecodingHelper {
|
||||
if (fontName != null) {
|
||||
String lowerName = fontName.toLowerCase();
|
||||
if (lowerName.contains("cjk")
|
||||
|| lowerName.contains("gb")
|
||||
|| lowerName.contains("jp")) {
|
||||
|| lowerName.contains("gb")
|
||||
|| lowerName.contains("jp")) {
|
||||
// Basic CJK fallback (expand with a lookup table if needed)
|
||||
if (code >= 0x4E00 && code <= 0x9FFF) {
|
||||
return String.valueOf(
|
||||
(char) code); // Unicode Basic Multilingual Plane for CJK
|
||||
(char) code); // Unicode Basic Multilingual Plane for CJK
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -143,8 +142,7 @@ public class TextDecodingHelper {
|
||||
try {
|
||||
if (bytes.length >= 2) {
|
||||
ByteBuffer buffer = ByteBuffer.wrap(bytes);
|
||||
CharsetDecoder decoder =
|
||||
StandardCharsets.UTF_16BE.newDecoder();
|
||||
CharsetDecoder decoder = StandardCharsets.UTF_16BE.newDecoder();
|
||||
CharBuffer charBuffer = decoder.decode(buffer);
|
||||
return charBuffer.toString();
|
||||
}
|
||||
|
@ -2,12 +2,12 @@ package stirling.software.SPDF.utils.text;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
|
||||
import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
|
||||
import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
|
@ -5,13 +5,13 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDResources;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@UtilityClass
|
||||
public class TextFinderUtils {
|
||||
@ -57,7 +57,7 @@ public class TextFinderUtils {
|
||||
}
|
||||
|
||||
public List<Pattern> createOptimizedSearchPatterns(
|
||||
Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
|
||||
Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
|
||||
List<Pattern> patterns = new ArrayList<>();
|
||||
|
||||
for (String term : searchTerms) {
|
||||
|
@ -1,9 +1,9 @@
|
||||
package stirling.software.SPDF.utils.text;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@ -44,8 +44,7 @@ public class WidthCalculator {
|
||||
}
|
||||
}
|
||||
|
||||
private float calculateWidthWithCharacterIteration(
|
||||
PDFont font, String text, float fontSize) {
|
||||
private float calculateWidthWithCharacterIteration(PDFont font, String text, float fontSize) {
|
||||
try {
|
||||
float totalWidth = 0;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user