mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
refactor(RedactController, RedactionService): enhance method visibility and ensure service initialization
Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
9fe3f1b7fa
commit
8c38ecf899
@ -1,9 +1,12 @@
|
||||
package stirling.software.SPDF.controller.api.security;
|
||||
|
||||
import java.awt.*;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.WebDataBinder;
|
||||
import org.springframework.web.bind.annotation.InitBinder;
|
||||
@ -16,21 +19,29 @@ import io.github.pixee.security.Filenames;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
import stirling.software.SPDF.model.api.security.ManualRedactPdfRequest;
|
||||
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
|
||||
import stirling.software.SPDF.service.RedactionService;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.WebResponseUtils;
|
||||
import stirling.software.common.util.propertyeditor.StringToArrayListPropertyEditor;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/security")
|
||||
@Tag(name = "Security", description = "Security APIs")
|
||||
@RequiredArgsConstructor
|
||||
public class RedactController {
|
||||
private RedactionService redactionService;
|
||||
private CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
|
||||
private final RedactionService redactionService;
|
||||
public RedactController(
|
||||
RedactionService redactionService, CustomPDFDocumentFactory pdfDocumentFactory) {
|
||||
this.redactionService = redactionService;
|
||||
this.pdfDocumentFactory = pdfDocumentFactory;
|
||||
}
|
||||
|
||||
public static Color decodeOrDefault(String hex) {
|
||||
return RedactionService.decodeOrDefault(hex);
|
||||
}
|
||||
|
||||
private String removeFileExtension(String filename) {
|
||||
return filename.replaceFirst("[.][^.]+$", "");
|
||||
@ -42,6 +53,27 @@ public class RedactController {
|
||||
List.class, "redactions", new StringToArrayListPropertyEditor());
|
||||
}
|
||||
|
||||
public static String createPlaceholderWithFont(
|
||||
String originalWord, org.apache.pdfbox.pdmodel.font.PDFont font) {
|
||||
return RedactionService.createPlaceholderWithFont(originalWord, font);
|
||||
}
|
||||
|
||||
public static void writeFilteredContentStream(
|
||||
PDDocument document, PDPage page, java.util.List<Object> tokens) throws IOException {
|
||||
RedactionService.writeFilteredContentStream(document, page, tokens);
|
||||
}
|
||||
|
||||
private RedactionService ensureService() {
|
||||
if (redactionService == null) {
|
||||
if (pdfDocumentFactory == null) {
|
||||
throw new IllegalStateException(
|
||||
"RedactionService not available and pdfDocumentFactory is null");
|
||||
}
|
||||
redactionService = new RedactionService(pdfDocumentFactory, null);
|
||||
}
|
||||
return redactionService;
|
||||
}
|
||||
|
||||
@PostMapping(value = "/redact", consumes = "multipart/form-data")
|
||||
@Operation(
|
||||
summary = "Redact PDF manually",
|
||||
@ -51,7 +83,7 @@ public class RedactController {
|
||||
+ "Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> redactPDF(@ModelAttribute ManualRedactPdfRequest request)
|
||||
throws IOException {
|
||||
byte[] pdfContent = redactionService.redactPDF(request);
|
||||
byte[] pdfContent = ensureService().redactPDF(request);
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
pdfContent,
|
||||
removeFileExtension(
|
||||
@ -70,7 +102,7 @@ public class RedactController {
|
||||
+ "Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> redactPdf(@ModelAttribute RedactPdfRequest request)
|
||||
throws IOException {
|
||||
byte[] pdfContent = redactionService.redactPdf(request);
|
||||
byte[] pdfContent = ensureService().redactPdf(request);
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
pdfContent,
|
||||
removeFileExtension(
|
||||
@ -79,4 +111,20 @@ public class RedactController {
|
||||
request.getFileInput().getOriginalFilename())))
|
||||
+ "_redacted.pdf");
|
||||
}
|
||||
|
||||
public boolean isTextShowingOperator(String opName) {
|
||||
return RedactionService.isTextShowingOperator(opName);
|
||||
}
|
||||
|
||||
public java.util.List<Object> createTokensWithoutTargetText(
|
||||
PDDocument document,
|
||||
PDPage page,
|
||||
java.util.Set<String> targetWords,
|
||||
boolean useRegex,
|
||||
boolean wholeWordSearch)
|
||||
throws IOException {
|
||||
return ensureService()
|
||||
.createTokensWithoutTargetText(
|
||||
document, page, targetWords, useRegex, wholeWordSearch);
|
||||
}
|
||||
}
|
||||
|
@ -350,7 +350,7 @@ public class RedactionService {
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Color decodeOrDefault(String hex) {
|
||||
public static Color decodeOrDefault(String hex) {
|
||||
if (hex == null || hex.trim().isEmpty()) {
|
||||
return Color.BLACK;
|
||||
}
|
||||
@ -424,8 +424,8 @@ public class RedactionService {
|
||||
}
|
||||
}
|
||||
|
||||
static void writeFilteredContentStream(PDDocument document, PDPage page, List<Object> tokens)
|
||||
throws IOException {
|
||||
public static void writeFilteredContentStream(
|
||||
PDDocument document, PDPage page, List<Object> tokens) throws IOException {
|
||||
if (document == null || page == null || tokens == null) {
|
||||
throw new IllegalArgumentException("Document, page, and tokens cannot be null");
|
||||
}
|
||||
@ -437,7 +437,7 @@ public class RedactionService {
|
||||
page.setContents(newStream);
|
||||
}
|
||||
|
||||
static boolean isTextShowingOperator(String opName) {
|
||||
public static boolean isTextShowingOperator(String opName) {
|
||||
return TEXT_SHOWING_OPERATORS.contains(opName);
|
||||
}
|
||||
|
||||
@ -1133,120 +1133,23 @@ public class RedactionService {
|
||||
}
|
||||
}
|
||||
|
||||
List<Object> createTokensWithoutTargetText(
|
||||
PDDocument document,
|
||||
PDPage page,
|
||||
Set<String> targetWords,
|
||||
boolean useRegex,
|
||||
boolean wholeWordSearch)
|
||||
throws IOException {
|
||||
log.debug("Processing page with {} target words: {}", targetWords.size(), targetWords);
|
||||
|
||||
PDFStreamParser parser = new PDFStreamParser(page);
|
||||
List<Object> tokens = parseAllTokens(parser);
|
||||
int tokenCount = tokens.size();
|
||||
|
||||
log.debug("Parsed {} tokens from page content stream", tokenCount);
|
||||
|
||||
if (tokenCount == 0 && !targetWords.isEmpty()) {
|
||||
log.warn(
|
||||
"No tokens parsed from page content stream - this might indicate encoding issues");
|
||||
log.warn("Attempting alternative verification for target words: {}", targetWords);
|
||||
public static String createPlaceholderWithFont(String originalWord, PDFont font) {
|
||||
if (originalWord == null || originalWord.isEmpty()) return " ";
|
||||
|
||||
final String repeat = " ".repeat(Math.max(1, originalWord.length()));
|
||||
if (font != null && TextEncodingHelper.isFontSubset(font.getName())) {
|
||||
try {
|
||||
TextFinder directFinder = new TextFinder("", false, false);
|
||||
directFinder.setStartPage(document.getPages().indexOf(page) + 1);
|
||||
directFinder.setEndPage(document.getPages().indexOf(page) + 1);
|
||||
directFinder.getText(document);
|
||||
|
||||
StringBuilder pageText = new StringBuilder();
|
||||
for (PDFText pdfText : directFinder.getFoundTexts()) {
|
||||
if (pdfText.getText() != null) {
|
||||
pageText.append(pdfText.getText()).append(" ");
|
||||
}
|
||||
}
|
||||
|
||||
String extractedText = pageText.toString().trim();
|
||||
log.debug("Alternative text extraction found: '{}'", extractedText);
|
||||
|
||||
for (String word : targetWords) {
|
||||
if (extractedText.toLowerCase().contains(word.toLowerCase())) {
|
||||
log.warn("Found target word '{}' via alternative extraction method", word);
|
||||
}
|
||||
}
|
||||
|
||||
float originalWidth =
|
||||
WidthCalculator.calculateAccurateWidth(font, originalWord, 1.0f);
|
||||
String result =
|
||||
createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f);
|
||||
return result != null ? result : repeat;
|
||||
} catch (Exception e) {
|
||||
log.error("Alternative text extraction failed: {}", e.getMessage());
|
||||
return repeat;
|
||||
}
|
||||
}
|
||||
|
||||
PDResources resources = page.getResources();
|
||||
if (resources != null) {
|
||||
log.debug("Processing XObjects for page");
|
||||
processPageXObjects(
|
||||
document,
|
||||
resources,
|
||||
targetWords,
|
||||
useRegex,
|
||||
wholeWordSearch,
|
||||
this.aggressiveMode);
|
||||
}
|
||||
|
||||
List<TextSegment> textSegments =
|
||||
extractTextSegmentsFromTokens(page.getResources(), tokens, this.aggressiveMode);
|
||||
log.debug("Extracted {} text segments from tokens", textSegments.size());
|
||||
|
||||
if (!textSegments.isEmpty()) {
|
||||
StringBuilder allText = new StringBuilder();
|
||||
boolean hasProblematicChars = false;
|
||||
|
||||
for (TextSegment seg : textSegments) {
|
||||
if (seg.getText() != null && !seg.getText().trim().isEmpty()) {
|
||||
String segmentText = seg.getText();
|
||||
if (!isTextSafeForRedaction(segmentText)) {
|
||||
hasProblematicChars = true;
|
||||
segmentText = normalizeTextForRedaction(segmentText);
|
||||
log.debug(
|
||||
"Normalized problematic text in segment: original contained encoding issues");
|
||||
}
|
||||
allText.append(segmentText).append(" ");
|
||||
}
|
||||
}
|
||||
|
||||
String completeText = allText.toString().trim();
|
||||
if (!completeText.isEmpty()) {
|
||||
log.debug("Complete extracted text: '{}'", completeText);
|
||||
if (hasProblematicChars) {
|
||||
log.info("Applied character normalization to handle encoding issues");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<MatchRange> matches;
|
||||
if (this.aggressiveMode) {
|
||||
log.debug("Using aggressive mode for matching");
|
||||
matches =
|
||||
findAllMatchesAggressive(
|
||||
textSegments, tokens, targetWords, useRegex, wholeWordSearch);
|
||||
} else {
|
||||
log.debug("Using moderate mode for matching");
|
||||
matches = findMatchesInSegments(textSegments, targetWords, useRegex, wholeWordSearch);
|
||||
}
|
||||
|
||||
log.info("Found {} matches to redact", matches.size());
|
||||
if (!matches.isEmpty()) {
|
||||
log.debug("Match ranges: {}", matches);
|
||||
}
|
||||
|
||||
List<Object> resultTokens = applyRedactionsToTokens(tokens, textSegments, matches);
|
||||
int modifications = tokens.size() - resultTokens.size();
|
||||
log.debug(
|
||||
"Applied redactions - original tokens: {}, result tokens: {}, modifications: {}",
|
||||
tokens.size(),
|
||||
resultTokens.size(),
|
||||
modifications);
|
||||
|
||||
return resultTokens;
|
||||
return repeat;
|
||||
}
|
||||
|
||||
private static COSArray buildKerningAdjustedTJArray(
|
||||
@ -1810,23 +1713,120 @@ public class RedactionService {
|
||||
}
|
||||
}
|
||||
|
||||
static String createPlaceholderWithFont(String originalWord, PDFont font) {
|
||||
if (originalWord == null || originalWord.isEmpty()) return " ";
|
||||
public List<Object> createTokensWithoutTargetText(
|
||||
PDDocument document,
|
||||
PDPage page,
|
||||
Set<String> targetWords,
|
||||
boolean useRegex,
|
||||
boolean wholeWordSearch)
|
||||
throws IOException {
|
||||
log.debug("Processing page with {} target words: {}", targetWords.size(), targetWords);
|
||||
|
||||
PDFStreamParser parser = new PDFStreamParser(page);
|
||||
List<Object> tokens = parseAllTokens(parser);
|
||||
int tokenCount = tokens.size();
|
||||
|
||||
log.debug("Parsed {} tokens from page content stream", tokenCount);
|
||||
|
||||
if (tokenCount == 0 && !targetWords.isEmpty()) {
|
||||
log.warn(
|
||||
"No tokens parsed from page content stream - this might indicate encoding issues");
|
||||
log.warn("Attempting alternative verification for target words: {}", targetWords);
|
||||
|
||||
final String repeat = " ".repeat(Math.max(1, originalWord.length()));
|
||||
if (font != null && TextEncodingHelper.isFontSubset(font.getName())) {
|
||||
try {
|
||||
float originalWidth =
|
||||
WidthCalculator.calculateAccurateWidth(font, originalWord, 1.0f);
|
||||
String result =
|
||||
createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f);
|
||||
return result != null ? result : repeat;
|
||||
TextFinder directFinder = new TextFinder("", false, false);
|
||||
directFinder.setStartPage(document.getPages().indexOf(page) + 1);
|
||||
directFinder.setEndPage(document.getPages().indexOf(page) + 1);
|
||||
directFinder.getText(document);
|
||||
|
||||
StringBuilder pageText = new StringBuilder();
|
||||
for (PDFText pdfText : directFinder.getFoundTexts()) {
|
||||
if (pdfText.getText() != null) {
|
||||
pageText.append(pdfText.getText()).append(" ");
|
||||
}
|
||||
}
|
||||
|
||||
String extractedText = pageText.toString().trim();
|
||||
log.debug("Alternative text extraction found: '{}'", extractedText);
|
||||
|
||||
for (String word : targetWords) {
|
||||
if (extractedText.toLowerCase().contains(word.toLowerCase())) {
|
||||
log.warn("Found target word '{}' via alternative extraction method", word);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
return repeat;
|
||||
log.error("Alternative text extraction failed: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
return repeat;
|
||||
PDResources resources = page.getResources();
|
||||
if (resources != null) {
|
||||
log.debug("Processing XObjects for page");
|
||||
processPageXObjects(
|
||||
document,
|
||||
resources,
|
||||
targetWords,
|
||||
useRegex,
|
||||
wholeWordSearch,
|
||||
this.aggressiveMode);
|
||||
}
|
||||
|
||||
List<TextSegment> textSegments =
|
||||
extractTextSegmentsFromTokens(page.getResources(), tokens, this.aggressiveMode);
|
||||
log.debug("Extracted {} text segments from tokens", textSegments.size());
|
||||
|
||||
if (!textSegments.isEmpty()) {
|
||||
StringBuilder allText = new StringBuilder();
|
||||
boolean hasProblematicChars = false;
|
||||
|
||||
for (TextSegment seg : textSegments) {
|
||||
if (seg.getText() != null && !seg.getText().trim().isEmpty()) {
|
||||
String segmentText = seg.getText();
|
||||
if (!isTextSafeForRedaction(segmentText)) {
|
||||
hasProblematicChars = true;
|
||||
segmentText = normalizeTextForRedaction(segmentText);
|
||||
log.debug(
|
||||
"Normalized problematic text in segment: original contained encoding issues");
|
||||
}
|
||||
allText.append(segmentText).append(" ");
|
||||
}
|
||||
}
|
||||
|
||||
String completeText = allText.toString().trim();
|
||||
if (!completeText.isEmpty()) {
|
||||
log.debug("Complete extracted text: '{}'", completeText);
|
||||
if (hasProblematicChars) {
|
||||
log.info("Applied character normalization to handle encoding issues");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<MatchRange> matches;
|
||||
if (this.aggressiveMode) {
|
||||
log.debug("Using aggressive mode for matching");
|
||||
matches =
|
||||
findAllMatchesAggressive(
|
||||
textSegments, tokens, targetWords, useRegex, wholeWordSearch);
|
||||
} else {
|
||||
log.debug("Using moderate mode for matching");
|
||||
matches = findMatchesInSegments(textSegments, targetWords, useRegex, wholeWordSearch);
|
||||
}
|
||||
|
||||
log.info("Found {} matches to redact", matches.size());
|
||||
if (!matches.isEmpty()) {
|
||||
log.debug("Match ranges: {}", matches);
|
||||
}
|
||||
|
||||
List<Object> resultTokens = applyRedactionsToTokens(tokens, textSegments, matches);
|
||||
int modifications = tokens.size() - resultTokens.size();
|
||||
log.debug(
|
||||
"Applied redactions - original tokens: {}, result tokens: {}, modifications: {}",
|
||||
tokens.size(),
|
||||
resultTokens.size(),
|
||||
modifications);
|
||||
|
||||
return resultTokens;
|
||||
}
|
||||
|
||||
private static TokenModificationResult convertToTJWithAdjustment(
|
||||
|
@ -726,28 +726,28 @@ class RedactControllerTest {
|
||||
@Test
|
||||
@DisplayName("Should decode valid hex color with hash")
|
||||
void decodeValidHexColorWithHash() throws Exception {
|
||||
Color result = redactController.decodeOrDefault("#FF0000");
|
||||
Color result = RedactController.decodeOrDefault("#FF0000");
|
||||
assertEquals(Color.RED, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should decode valid hex color without hash")
|
||||
void decodeValidHexColorWithoutHash() throws Exception {
|
||||
Color result = redactController.decodeOrDefault("FF0000");
|
||||
Color result = RedactController.decodeOrDefault("FF0000");
|
||||
assertEquals(Color.RED, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should default to black for null color")
|
||||
void defaultToBlackForNullColor() throws Exception {
|
||||
Color result = redactController.decodeOrDefault(null);
|
||||
Color result = RedactController.decodeOrDefault(null);
|
||||
assertEquals(Color.BLACK, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should default to black for invalid color")
|
||||
void defaultToBlackForInvalidColor() throws Exception {
|
||||
Color result = redactController.decodeOrDefault("invalid-color");
|
||||
Color result = RedactController.decodeOrDefault("invalid-color");
|
||||
assertEquals(Color.BLACK, result);
|
||||
}
|
||||
|
||||
@ -759,7 +759,7 @@ class RedactControllerTest {
|
||||
})
|
||||
@DisplayName("Should handle various valid color formats")
|
||||
void handleVariousValidColorFormats(String colorInput) throws Exception {
|
||||
Color result = redactController.decodeOrDefault(colorInput);
|
||||
Color result = RedactController.decodeOrDefault(colorInput);
|
||||
assertNotNull(result);
|
||||
assertTrue(
|
||||
result.getRed() >= 0 && result.getRed() <= 255,
|
||||
@ -775,8 +775,8 @@ class RedactControllerTest {
|
||||
@Test
|
||||
@DisplayName("Should handle short hex codes appropriately")
|
||||
void handleShortHexCodes() throws Exception {
|
||||
Color result1 = redactController.decodeOrDefault("123");
|
||||
Color result2 = redactController.decodeOrDefault("#12");
|
||||
Color result1 = RedactController.decodeOrDefault("123");
|
||||
Color result2 = RedactController.decodeOrDefault("#12");
|
||||
|
||||
assertNotNull(result1);
|
||||
assertNotNull(result2);
|
||||
|
Loading…
Reference in New Issue
Block a user