Refactor redaction services and utilities for improved readability and maintainability

- Adjusted indentation and formatting across multiple files for consistency.
- Improved imports ordering in utility classes for better organization.
- Enhanced `performTextReplacementAggressive` method with multi-sweep logic to handle residual text more effectively.
- Added helper methods for verifying document text targets to streamline aggressive redaction.
- Simplified logic and formatting in `RedactionService` and related classes.

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-08-20 22:45:08 +02:00
parent 8f19369c58
commit 1fac74a3ca
10 changed files with 441 additions and 396 deletions

View File

@ -53,12 +53,12 @@ public class RedactController {
throws IOException {
byte[] pdfContent = redactionService.redactPDF(request);
return WebResponseUtils.bytesToWebResponse(
pdfContent,
removeFileExtension(
Objects.requireNonNull(
Filenames.toSimpleFileName(
request.getFileInput().getOriginalFilename())))
+ "_redacted.pdf");
pdfContent,
removeFileExtension(
Objects.requireNonNull(
Filenames.toSimpleFileName(
request.getFileInput().getOriginalFilename())))
+ "_redacted.pdf");
}
@PostMapping(value = "/auto-redact", consumes = "multipart/form-data")
@ -69,14 +69,14 @@ public class RedactController {
+ "Users can provide text patterns to redact, with options for regex and whole word matching. "
+ "Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> redactPdf(@ModelAttribute RedactPdfRequest request)
throws IOException {
throws IOException {
byte[] pdfContent = redactionService.redactPdf(request);
return WebResponseUtils.bytesToWebResponse(
pdfContent,
removeFileExtension(
Objects.requireNonNull(
Filenames.toSimpleFileName(
request.getFileInput().getOriginalFilename())))
+ "_redacted.pdf");
pdfContent,
removeFileExtension(
Objects.requireNonNull(
Filenames.toSimpleFileName(
request.getFileInput().getOriginalFilename())))
+ "_redacted.pdf");
}
}

View File

@ -48,9 +48,9 @@ public class RedactPdfRequest extends PDFFile {
private Boolean convertPDFToImage;
@Schema(
description = "Redaction mode: moderate, visual, or aggressive",
defaultValue = "moderate",
allowableValues = {"moderate", "visual", "aggressive"},
requiredMode = Schema.RequiredMode.NOT_REQUIRED)
description = "Redaction mode: moderate, visual, or aggressive",
defaultValue = "moderate",
allowableValues = {"moderate", "visual", "aggressive"},
requiredMode = Schema.RequiredMode.NOT_REQUIRED)
private String redactionMode;
}

View File

@ -17,7 +17,7 @@ class AggressiveRedactionService implements RedactionModeStrategy {
private final RedactionService helper;
AggressiveRedactionService(
CustomPDFDocumentFactory pdfDocumentFactory, RedactionService helper) {
CustomPDFDocumentFactory pdfDocumentFactory, RedactionService helper) {
this.pdfDocumentFactory = pdfDocumentFactory;
this.helper = helper;
}
@ -33,7 +33,7 @@ class AggressiveRedactionService implements RedactionModeStrategy {
try {
doc = pdfDocumentFactory.load(request.getFileInput());
Map<Integer, List<PDFText>> allFound =
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
if (allFound.isEmpty()) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
doc.save(baos);
@ -42,31 +42,31 @@ class AggressiveRedactionService implements RedactionModeStrategy {
}
helper.performTextReplacementAggressive(doc, allFound, listOfText, useRegex, wholeWord);
Map<Integer, List<PDFText>> residual =
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
boolean residualExists = residual.values().stream().mapToInt(List::size).sum() > 0;
String effectiveColor =
(request.getRedactColor() == null || request.getRedactColor().isBlank())
? "#000000"
: request.getRedactColor();
(request.getRedactColor() == null || request.getRedactColor().isBlank())
? "#000000"
: request.getRedactColor();
if (residualExists) {
fb = pdfDocumentFactory.load(request.getFileInput());
Map<Integer, List<PDFText>> fbFound =
RedactionService.findTextToRedact(fb, listOfText, useRegex, wholeWord);
RedactionService.findTextToRedact(fb, listOfText, useRegex, wholeWord);
return RedactionService.finalizeRedaction(
fb,
fbFound,
effectiveColor,
request.getCustomPadding(), /*force*/
true,
false);
fb,
fbFound,
effectiveColor,
request.getCustomPadding(), /*force*/
true,
false);
}
return RedactionService.finalizeRedaction(
doc,
allFound,
request.getRedactColor(),
request.getCustomPadding(),
request.getConvertPDFToImage(), /*text removal*/
true);
doc,
allFound,
request.getRedactColor(),
request.getCustomPadding(),
request.getConvertPDFToImage(), /*text removal*/
true);
} catch (Exception e) {
throw new IOException("Aggressive redaction failed: " + e.getMessage(), e);
} finally {

View File

@ -32,7 +32,7 @@ class ModerateRedactionService implements RedactionModeStrategy {
try {
doc = pdfDocumentFactory.load(request.getFileInput());
Map<Integer, List<PDFText>> allFound =
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
if (allFound.isEmpty()) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
doc.save(baos);
@ -40,31 +40,31 @@ class ModerateRedactionService implements RedactionModeStrategy {
}
}
boolean fallbackToBoxOnly =
helper.performTextReplacement(doc, allFound, listOfText, useRegex, wholeWord);
helper.performTextReplacement(doc, allFound, listOfText, useRegex, wholeWord);
String effectiveColor =
(request.getRedactColor() == null || request.getRedactColor().isBlank())
? "#000000"
: request.getRedactColor();
(request.getRedactColor() == null || request.getRedactColor().isBlank())
? "#000000"
: request.getRedactColor();
if (fallbackToBoxOnly) {
fallback = pdfDocumentFactory.load(request.getFileInput());
allFound =
RedactionService.findTextToRedact(
fallback, listOfText, useRegex, wholeWord);
RedactionService.findTextToRedact(
fallback, listOfText, useRegex, wholeWord);
return RedactionService.finalizeRedaction(
fallback,
fallback,
allFound,
effectiveColor,
request.getCustomPadding(),
request.getConvertPDFToImage(),
false);
}
return RedactionService.finalizeRedaction(
doc,
allFound,
effectiveColor,
request.getCustomPadding(),
request.getConvertPDFToImage(),
false);
}
return RedactionService.finalizeRedaction(
doc,
allFound,
effectiveColor,
request.getCustomPadding(),
request.getConvertPDFToImage(),
false);
} catch (Exception e) {
throw new IOException("Moderate redaction failed: " + e.getMessage(), e);
} finally {

View File

@ -27,7 +27,7 @@ class VisualRedactionService implements RedactionModeStrategy {
try (PDDocument document = pdfDocumentFactory.load(request.getFileInput())) {
Map<Integer, List<PDFText>> allFound =
RedactionService.findTextToRedact(document, listOfText, useRegex, wholeWord);
RedactionService.findTextToRedact(document, listOfText, useRegex, wholeWord);
if (allFound.isEmpty()) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
document.save(baos);
@ -35,16 +35,16 @@ class VisualRedactionService implements RedactionModeStrategy {
}
}
String effectiveColor =
(request.getRedactColor() == null || request.getRedactColor().isBlank())
? "#000000"
: request.getRedactColor();
(request.getRedactColor() == null || request.getRedactColor().isBlank())
? "#000000"
: request.getRedactColor();
return RedactionService.finalizeRedaction(
document,
allFound,
effectiveColor,
request.getCustomPadding(),
request.getConvertPDFToImage(),
false);
document,
allFound,
effectiveColor,
request.getCustomPadding(),
request.getConvertPDFToImage(),
false);
}
}
}

View File

@ -1,17 +1,17 @@
package stirling.software.SPDF.utils.text;
import lombok.experimental.UtilityClass;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import lombok.extern.slf4j.Slf4j;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.StandardCharsets;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@UtilityClass
public class TextDecodingHelper {
@ -34,8 +34,8 @@ public class TextDecodingHelper {
String basicDecoded = tryDecodeWithFont(font, cosString);
if (basicDecoded != null
&& !basicDecoded.contains("?")
&& !basicDecoded.trim().isEmpty()) {
&& !basicDecoded.contains("?")
&& !basicDecoded.trim().isEmpty()) {
return;
}
@ -89,8 +89,7 @@ public class TextDecodingHelper {
} catch (Exception ignored) {
}
if (charStr == null
&& font instanceof PDType0Font type0Font) {
if (charStr == null && font instanceof PDType0Font type0Font) {
try {
int cid = (bytes.length > 1) ? ((bytes[0] & 0xFF) << 8) | (bytes[1] & 0xFF) : code;
charStr = type0Font.toUnicode(cid);
@ -129,12 +128,12 @@ public class TextDecodingHelper {
if (fontName != null) {
String lowerName = fontName.toLowerCase();
if (lowerName.contains("cjk")
|| lowerName.contains("gb")
|| lowerName.contains("jp")) {
|| lowerName.contains("gb")
|| lowerName.contains("jp")) {
// Basic CJK fallback (expand with a lookup table if needed)
if (code >= 0x4E00 && code <= 0x9FFF) {
return String.valueOf(
(char) code); // Unicode Basic Multilingual Plane for CJK
(char) code); // Unicode Basic Multilingual Plane for CJK
}
}
}
@ -143,8 +142,7 @@ public class TextDecodingHelper {
try {
if (bytes.length >= 2) {
ByteBuffer buffer = ByteBuffer.wrap(bytes);
CharsetDecoder decoder =
StandardCharsets.UTF_16BE.newDecoder();
CharsetDecoder decoder = StandardCharsets.UTF_16BE.newDecoder();
CharBuffer charBuffer = decoder.decode(buffer);
return charBuffer.toString();
}

View File

@ -2,12 +2,12 @@ package stirling.software.SPDF.utils.text;
import java.io.IOException;
import lombok.experimental.UtilityClass;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
@Slf4j

View File

@ -5,13 +5,13 @@ import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import lombok.experimental.UtilityClass;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.font.PDFont;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@UtilityClass
public class TextFinderUtils {
@ -57,7 +57,7 @@ public class TextFinderUtils {
}
public List<Pattern> createOptimizedSearchPatterns(
Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
List<Pattern> patterns = new ArrayList<>();
for (String term : searchTerms) {

View File

@ -1,9 +1,9 @@
package stirling.software.SPDF.utils.text;
import lombok.experimental.UtilityClass;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@ -44,8 +44,7 @@ public class WidthCalculator {
}
}
private float calculateWidthWithCharacterIteration(
PDFont font, String text, float fontSize) {
private float calculateWidthWithCharacterIteration(PDFont font, String text, float fontSize) {
try {
float totalWidth = 0;