mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
Refactor redaction services and utilities for improved readability and maintainability
- Adjusted indentation and formatting across multiple files for consistency. - Improved imports ordering in utility classes for better organization. - Enhanced `performTextReplacementAggressive` method with multi-sweep logic to handle residual text more effectively. - Added helper methods for verifying document text targets to streamline aggressive redaction. - Simplified logic and formatting in `RedactionService` and related classes. Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
8f19369c58
commit
1fac74a3ca
@ -53,12 +53,12 @@ public class RedactController {
|
|||||||
throws IOException {
|
throws IOException {
|
||||||
byte[] pdfContent = redactionService.redactPDF(request);
|
byte[] pdfContent = redactionService.redactPDF(request);
|
||||||
return WebResponseUtils.bytesToWebResponse(
|
return WebResponseUtils.bytesToWebResponse(
|
||||||
pdfContent,
|
pdfContent,
|
||||||
removeFileExtension(
|
removeFileExtension(
|
||||||
Objects.requireNonNull(
|
Objects.requireNonNull(
|
||||||
Filenames.toSimpleFileName(
|
Filenames.toSimpleFileName(
|
||||||
request.getFileInput().getOriginalFilename())))
|
request.getFileInput().getOriginalFilename())))
|
||||||
+ "_redacted.pdf");
|
+ "_redacted.pdf");
|
||||||
}
|
}
|
||||||
|
|
||||||
@PostMapping(value = "/auto-redact", consumes = "multipart/form-data")
|
@PostMapping(value = "/auto-redact", consumes = "multipart/form-data")
|
||||||
@ -69,14 +69,14 @@ public class RedactController {
|
|||||||
+ "Users can provide text patterns to redact, with options for regex and whole word matching. "
|
+ "Users can provide text patterns to redact, with options for regex and whole word matching. "
|
||||||
+ "Input:PDF Output:PDF Type:SISO")
|
+ "Input:PDF Output:PDF Type:SISO")
|
||||||
public ResponseEntity<byte[]> redactPdf(@ModelAttribute RedactPdfRequest request)
|
public ResponseEntity<byte[]> redactPdf(@ModelAttribute RedactPdfRequest request)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
byte[] pdfContent = redactionService.redactPdf(request);
|
byte[] pdfContent = redactionService.redactPdf(request);
|
||||||
return WebResponseUtils.bytesToWebResponse(
|
return WebResponseUtils.bytesToWebResponse(
|
||||||
pdfContent,
|
pdfContent,
|
||||||
removeFileExtension(
|
removeFileExtension(
|
||||||
Objects.requireNonNull(
|
Objects.requireNonNull(
|
||||||
Filenames.toSimpleFileName(
|
Filenames.toSimpleFileName(
|
||||||
request.getFileInput().getOriginalFilename())))
|
request.getFileInput().getOriginalFilename())))
|
||||||
+ "_redacted.pdf");
|
+ "_redacted.pdf");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -48,9 +48,9 @@ public class RedactPdfRequest extends PDFFile {
|
|||||||
private Boolean convertPDFToImage;
|
private Boolean convertPDFToImage;
|
||||||
|
|
||||||
@Schema(
|
@Schema(
|
||||||
description = "Redaction mode: moderate, visual, or aggressive",
|
description = "Redaction mode: moderate, visual, or aggressive",
|
||||||
defaultValue = "moderate",
|
defaultValue = "moderate",
|
||||||
allowableValues = {"moderate", "visual", "aggressive"},
|
allowableValues = {"moderate", "visual", "aggressive"},
|
||||||
requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||||
private String redactionMode;
|
private String redactionMode;
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@ class AggressiveRedactionService implements RedactionModeStrategy {
|
|||||||
private final RedactionService helper;
|
private final RedactionService helper;
|
||||||
|
|
||||||
AggressiveRedactionService(
|
AggressiveRedactionService(
|
||||||
CustomPDFDocumentFactory pdfDocumentFactory, RedactionService helper) {
|
CustomPDFDocumentFactory pdfDocumentFactory, RedactionService helper) {
|
||||||
this.pdfDocumentFactory = pdfDocumentFactory;
|
this.pdfDocumentFactory = pdfDocumentFactory;
|
||||||
this.helper = helper;
|
this.helper = helper;
|
||||||
}
|
}
|
||||||
@ -33,7 +33,7 @@ class AggressiveRedactionService implements RedactionModeStrategy {
|
|||||||
try {
|
try {
|
||||||
doc = pdfDocumentFactory.load(request.getFileInput());
|
doc = pdfDocumentFactory.load(request.getFileInput());
|
||||||
Map<Integer, List<PDFText>> allFound =
|
Map<Integer, List<PDFText>> allFound =
|
||||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||||
if (allFound.isEmpty()) {
|
if (allFound.isEmpty()) {
|
||||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||||
doc.save(baos);
|
doc.save(baos);
|
||||||
@ -42,31 +42,31 @@ class AggressiveRedactionService implements RedactionModeStrategy {
|
|||||||
}
|
}
|
||||||
helper.performTextReplacementAggressive(doc, allFound, listOfText, useRegex, wholeWord);
|
helper.performTextReplacementAggressive(doc, allFound, listOfText, useRegex, wholeWord);
|
||||||
Map<Integer, List<PDFText>> residual =
|
Map<Integer, List<PDFText>> residual =
|
||||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||||
boolean residualExists = residual.values().stream().mapToInt(List::size).sum() > 0;
|
boolean residualExists = residual.values().stream().mapToInt(List::size).sum() > 0;
|
||||||
String effectiveColor =
|
String effectiveColor =
|
||||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||||
? "#000000"
|
? "#000000"
|
||||||
: request.getRedactColor();
|
: request.getRedactColor();
|
||||||
if (residualExists) {
|
if (residualExists) {
|
||||||
fb = pdfDocumentFactory.load(request.getFileInput());
|
fb = pdfDocumentFactory.load(request.getFileInput());
|
||||||
Map<Integer, List<PDFText>> fbFound =
|
Map<Integer, List<PDFText>> fbFound =
|
||||||
RedactionService.findTextToRedact(fb, listOfText, useRegex, wholeWord);
|
RedactionService.findTextToRedact(fb, listOfText, useRegex, wholeWord);
|
||||||
return RedactionService.finalizeRedaction(
|
return RedactionService.finalizeRedaction(
|
||||||
fb,
|
fb,
|
||||||
fbFound,
|
fbFound,
|
||||||
effectiveColor,
|
effectiveColor,
|
||||||
request.getCustomPadding(), /*force*/
|
request.getCustomPadding(), /*force*/
|
||||||
true,
|
true,
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
return RedactionService.finalizeRedaction(
|
return RedactionService.finalizeRedaction(
|
||||||
doc,
|
doc,
|
||||||
allFound,
|
allFound,
|
||||||
request.getRedactColor(),
|
request.getRedactColor(),
|
||||||
request.getCustomPadding(),
|
request.getCustomPadding(),
|
||||||
request.getConvertPDFToImage(), /*text removal*/
|
request.getConvertPDFToImage(), /*text removal*/
|
||||||
true);
|
true);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new IOException("Aggressive redaction failed: " + e.getMessage(), e);
|
throw new IOException("Aggressive redaction failed: " + e.getMessage(), e);
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -32,7 +32,7 @@ class ModerateRedactionService implements RedactionModeStrategy {
|
|||||||
try {
|
try {
|
||||||
doc = pdfDocumentFactory.load(request.getFileInput());
|
doc = pdfDocumentFactory.load(request.getFileInput());
|
||||||
Map<Integer, List<PDFText>> allFound =
|
Map<Integer, List<PDFText>> allFound =
|
||||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||||
if (allFound.isEmpty()) {
|
if (allFound.isEmpty()) {
|
||||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||||
doc.save(baos);
|
doc.save(baos);
|
||||||
@ -40,31 +40,31 @@ class ModerateRedactionService implements RedactionModeStrategy {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
boolean fallbackToBoxOnly =
|
boolean fallbackToBoxOnly =
|
||||||
helper.performTextReplacement(doc, allFound, listOfText, useRegex, wholeWord);
|
helper.performTextReplacement(doc, allFound, listOfText, useRegex, wholeWord);
|
||||||
String effectiveColor =
|
String effectiveColor =
|
||||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||||
? "#000000"
|
? "#000000"
|
||||||
: request.getRedactColor();
|
: request.getRedactColor();
|
||||||
if (fallbackToBoxOnly) {
|
if (fallbackToBoxOnly) {
|
||||||
fallback = pdfDocumentFactory.load(request.getFileInput());
|
fallback = pdfDocumentFactory.load(request.getFileInput());
|
||||||
allFound =
|
allFound =
|
||||||
RedactionService.findTextToRedact(
|
RedactionService.findTextToRedact(
|
||||||
fallback, listOfText, useRegex, wholeWord);
|
fallback, listOfText, useRegex, wholeWord);
|
||||||
return RedactionService.finalizeRedaction(
|
return RedactionService.finalizeRedaction(
|
||||||
fallback,
|
fallback,
|
||||||
|
allFound,
|
||||||
|
effectiveColor,
|
||||||
|
request.getCustomPadding(),
|
||||||
|
request.getConvertPDFToImage(),
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
return RedactionService.finalizeRedaction(
|
||||||
|
doc,
|
||||||
allFound,
|
allFound,
|
||||||
effectiveColor,
|
effectiveColor,
|
||||||
request.getCustomPadding(),
|
request.getCustomPadding(),
|
||||||
request.getConvertPDFToImage(),
|
request.getConvertPDFToImage(),
|
||||||
false);
|
false);
|
||||||
}
|
|
||||||
return RedactionService.finalizeRedaction(
|
|
||||||
doc,
|
|
||||||
allFound,
|
|
||||||
effectiveColor,
|
|
||||||
request.getCustomPadding(),
|
|
||||||
request.getConvertPDFToImage(),
|
|
||||||
false);
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new IOException("Moderate redaction failed: " + e.getMessage(), e);
|
throw new IOException("Moderate redaction failed: " + e.getMessage(), e);
|
||||||
} finally {
|
} finally {
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -27,7 +27,7 @@ class VisualRedactionService implements RedactionModeStrategy {
|
|||||||
|
|
||||||
try (PDDocument document = pdfDocumentFactory.load(request.getFileInput())) {
|
try (PDDocument document = pdfDocumentFactory.load(request.getFileInput())) {
|
||||||
Map<Integer, List<PDFText>> allFound =
|
Map<Integer, List<PDFText>> allFound =
|
||||||
RedactionService.findTextToRedact(document, listOfText, useRegex, wholeWord);
|
RedactionService.findTextToRedact(document, listOfText, useRegex, wholeWord);
|
||||||
if (allFound.isEmpty()) {
|
if (allFound.isEmpty()) {
|
||||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||||
document.save(baos);
|
document.save(baos);
|
||||||
@ -35,16 +35,16 @@ class VisualRedactionService implements RedactionModeStrategy {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
String effectiveColor =
|
String effectiveColor =
|
||||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||||
? "#000000"
|
? "#000000"
|
||||||
: request.getRedactColor();
|
: request.getRedactColor();
|
||||||
return RedactionService.finalizeRedaction(
|
return RedactionService.finalizeRedaction(
|
||||||
document,
|
document,
|
||||||
allFound,
|
allFound,
|
||||||
effectiveColor,
|
effectiveColor,
|
||||||
request.getCustomPadding(),
|
request.getCustomPadding(),
|
||||||
request.getConvertPDFToImage(),
|
request.getConvertPDFToImage(),
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,17 +1,17 @@
|
|||||||
package stirling.software.SPDF.utils.text;
|
package stirling.software.SPDF.utils.text;
|
||||||
|
|
||||||
import lombok.experimental.UtilityClass;
|
|
||||||
import org.apache.pdfbox.cos.COSString;
|
|
||||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
|
||||||
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.CharBuffer;
|
import java.nio.CharBuffer;
|
||||||
import java.nio.charset.CharsetDecoder;
|
import java.nio.charset.CharsetDecoder;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.cos.COSString;
|
||||||
|
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||||
|
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
||||||
|
|
||||||
|
import lombok.experimental.UtilityClass;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@UtilityClass
|
@UtilityClass
|
||||||
public class TextDecodingHelper {
|
public class TextDecodingHelper {
|
||||||
@ -34,8 +34,8 @@ public class TextDecodingHelper {
|
|||||||
|
|
||||||
String basicDecoded = tryDecodeWithFont(font, cosString);
|
String basicDecoded = tryDecodeWithFont(font, cosString);
|
||||||
if (basicDecoded != null
|
if (basicDecoded != null
|
||||||
&& !basicDecoded.contains("?")
|
&& !basicDecoded.contains("?")
|
||||||
&& !basicDecoded.trim().isEmpty()) {
|
&& !basicDecoded.trim().isEmpty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -89,8 +89,7 @@ public class TextDecodingHelper {
|
|||||||
} catch (Exception ignored) {
|
} catch (Exception ignored) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (charStr == null
|
if (charStr == null && font instanceof PDType0Font type0Font) {
|
||||||
&& font instanceof PDType0Font type0Font) {
|
|
||||||
try {
|
try {
|
||||||
int cid = (bytes.length > 1) ? ((bytes[0] & 0xFF) << 8) | (bytes[1] & 0xFF) : code;
|
int cid = (bytes.length > 1) ? ((bytes[0] & 0xFF) << 8) | (bytes[1] & 0xFF) : code;
|
||||||
charStr = type0Font.toUnicode(cid);
|
charStr = type0Font.toUnicode(cid);
|
||||||
@ -129,12 +128,12 @@ public class TextDecodingHelper {
|
|||||||
if (fontName != null) {
|
if (fontName != null) {
|
||||||
String lowerName = fontName.toLowerCase();
|
String lowerName = fontName.toLowerCase();
|
||||||
if (lowerName.contains("cjk")
|
if (lowerName.contains("cjk")
|
||||||
|| lowerName.contains("gb")
|
|| lowerName.contains("gb")
|
||||||
|| lowerName.contains("jp")) {
|
|| lowerName.contains("jp")) {
|
||||||
// Basic CJK fallback (expand with a lookup table if needed)
|
// Basic CJK fallback (expand with a lookup table if needed)
|
||||||
if (code >= 0x4E00 && code <= 0x9FFF) {
|
if (code >= 0x4E00 && code <= 0x9FFF) {
|
||||||
return String.valueOf(
|
return String.valueOf(
|
||||||
(char) code); // Unicode Basic Multilingual Plane for CJK
|
(char) code); // Unicode Basic Multilingual Plane for CJK
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -143,8 +142,7 @@ public class TextDecodingHelper {
|
|||||||
try {
|
try {
|
||||||
if (bytes.length >= 2) {
|
if (bytes.length >= 2) {
|
||||||
ByteBuffer buffer = ByteBuffer.wrap(bytes);
|
ByteBuffer buffer = ByteBuffer.wrap(bytes);
|
||||||
CharsetDecoder decoder =
|
CharsetDecoder decoder = StandardCharsets.UTF_16BE.newDecoder();
|
||||||
StandardCharsets.UTF_16BE.newDecoder();
|
|
||||||
CharBuffer charBuffer = decoder.decode(buffer);
|
CharBuffer charBuffer = decoder.decode(buffer);
|
||||||
return charBuffer.toString();
|
return charBuffer.toString();
|
||||||
}
|
}
|
||||||
|
@ -2,12 +2,12 @@ package stirling.software.SPDF.utils.text;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import lombok.experimental.UtilityClass;
|
|
||||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
|
import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
|
||||||
import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
|
import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding;
|
||||||
import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
|
import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
|
||||||
|
|
||||||
|
import lombok.experimental.UtilityClass;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
|
@ -5,13 +5,13 @@ import java.util.List;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import lombok.experimental.UtilityClass;
|
|
||||||
import org.apache.pdfbox.pdmodel.PDPage;
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
import org.apache.pdfbox.pdmodel.PDResources;
|
import org.apache.pdfbox.pdmodel.PDResources;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||||
|
|
||||||
|
import lombok.experimental.UtilityClass;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@UtilityClass
|
@UtilityClass
|
||||||
public class TextFinderUtils {
|
public class TextFinderUtils {
|
||||||
@ -57,7 +57,7 @@ public class TextFinderUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public List<Pattern> createOptimizedSearchPatterns(
|
public List<Pattern> createOptimizedSearchPatterns(
|
||||||
Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
|
Set<String> searchTerms, boolean useRegex, boolean wholeWordSearch) {
|
||||||
List<Pattern> patterns = new ArrayList<>();
|
List<Pattern> patterns = new ArrayList<>();
|
||||||
|
|
||||||
for (String term : searchTerms) {
|
for (String term : searchTerms) {
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
package stirling.software.SPDF.utils.text;
|
package stirling.software.SPDF.utils.text;
|
||||||
|
|
||||||
import lombok.experimental.UtilityClass;
|
|
||||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||||
|
|
||||||
|
import lombok.experimental.UtilityClass;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@ -44,8 +44,7 @@ public class WidthCalculator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private float calculateWidthWithCharacterIteration(
|
private float calculateWidthWithCharacterIteration(PDFont font, String text, float fontSize) {
|
||||||
PDFont font, String text, float fontSize) {
|
|
||||||
try {
|
try {
|
||||||
float totalWidth = 0;
|
float totalWidth = 0;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user