refactor(RedactController, RedactionService): enhance method visibility and ensure service initialization

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-08-26 00:03:51 +02:00
parent 9fe3f1b7fa
commit 8c38ecf899
3 changed files with 184 additions and 136 deletions

View File

@ -1,9 +1,12 @@
package stirling.software.SPDF.controller.api.security;
import java.awt.*;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.WebDataBinder;
import org.springframework.web.bind.annotation.InitBinder;
@ -16,21 +19,29 @@ import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import stirling.software.SPDF.model.api.security.ManualRedactPdfRequest;
import stirling.software.SPDF.model.api.security.RedactPdfRequest;
import stirling.software.SPDF.service.RedactionService;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.WebResponseUtils;
import stirling.software.common.util.propertyeditor.StringToArrayListPropertyEditor;
@RestController
@RequestMapping("/api/v1/security")
@Tag(name = "Security", description = "Security APIs")
@RequiredArgsConstructor
public class RedactController {
private RedactionService redactionService;
private CustomPDFDocumentFactory pdfDocumentFactory;
private final RedactionService redactionService;
public RedactController(
RedactionService redactionService, CustomPDFDocumentFactory pdfDocumentFactory) {
this.redactionService = redactionService;
this.pdfDocumentFactory = pdfDocumentFactory;
}
public static Color decodeOrDefault(String hex) {
return RedactionService.decodeOrDefault(hex);
}
private String removeFileExtension(String filename) {
return filename.replaceFirst("[.][^.]+$", "");
@ -42,6 +53,27 @@ public class RedactController {
List.class, "redactions", new StringToArrayListPropertyEditor());
}
public static String createPlaceholderWithFont(
String originalWord, org.apache.pdfbox.pdmodel.font.PDFont font) {
return RedactionService.createPlaceholderWithFont(originalWord, font);
}
public static void writeFilteredContentStream(
PDDocument document, PDPage page, java.util.List<Object> tokens) throws IOException {
RedactionService.writeFilteredContentStream(document, page, tokens);
}
private RedactionService ensureService() {
if (redactionService == null) {
if (pdfDocumentFactory == null) {
throw new IllegalStateException(
"RedactionService not available and pdfDocumentFactory is null");
}
redactionService = new RedactionService(pdfDocumentFactory, null);
}
return redactionService;
}
@PostMapping(value = "/redact", consumes = "multipart/form-data")
@Operation(
summary = "Redact PDF manually",
@ -51,7 +83,7 @@ public class RedactController {
+ "Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> redactPDF(@ModelAttribute ManualRedactPdfRequest request)
throws IOException {
byte[] pdfContent = redactionService.redactPDF(request);
byte[] pdfContent = ensureService().redactPDF(request);
return WebResponseUtils.bytesToWebResponse(
pdfContent,
removeFileExtension(
@ -70,7 +102,7 @@ public class RedactController {
+ "Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> redactPdf(@ModelAttribute RedactPdfRequest request)
throws IOException {
byte[] pdfContent = redactionService.redactPdf(request);
byte[] pdfContent = ensureService().redactPdf(request);
return WebResponseUtils.bytesToWebResponse(
pdfContent,
removeFileExtension(
@ -79,4 +111,20 @@ public class RedactController {
request.getFileInput().getOriginalFilename())))
+ "_redacted.pdf");
}
public boolean isTextShowingOperator(String opName) {
return RedactionService.isTextShowingOperator(opName);
}
public java.util.List<Object> createTokensWithoutTargetText(
PDDocument document,
PDPage page,
java.util.Set<String> targetWords,
boolean useRegex,
boolean wholeWordSearch)
throws IOException {
return ensureService()
.createTokensWithoutTargetText(
document, page, targetWords, useRegex, wholeWordSearch);
}
}

View File

@ -350,7 +350,7 @@ public class RedactionService {
return result;
}
private static Color decodeOrDefault(String hex) {
public static Color decodeOrDefault(String hex) {
if (hex == null || hex.trim().isEmpty()) {
return Color.BLACK;
}
@ -424,8 +424,8 @@ public class RedactionService {
}
}
static void writeFilteredContentStream(PDDocument document, PDPage page, List<Object> tokens)
throws IOException {
public static void writeFilteredContentStream(
PDDocument document, PDPage page, List<Object> tokens) throws IOException {
if (document == null || page == null || tokens == null) {
throw new IllegalArgumentException("Document, page, and tokens cannot be null");
}
@ -437,7 +437,7 @@ public class RedactionService {
page.setContents(newStream);
}
static boolean isTextShowingOperator(String opName) {
public static boolean isTextShowingOperator(String opName) {
return TEXT_SHOWING_OPERATORS.contains(opName);
}
@ -1133,120 +1133,23 @@ public class RedactionService {
}
}
List<Object> createTokensWithoutTargetText(
PDDocument document,
PDPage page,
Set<String> targetWords,
boolean useRegex,
boolean wholeWordSearch)
throws IOException {
log.debug("Processing page with {} target words: {}", targetWords.size(), targetWords);
PDFStreamParser parser = new PDFStreamParser(page);
List<Object> tokens = parseAllTokens(parser);
int tokenCount = tokens.size();
log.debug("Parsed {} tokens from page content stream", tokenCount);
if (tokenCount == 0 && !targetWords.isEmpty()) {
log.warn(
"No tokens parsed from page content stream - this might indicate encoding issues");
log.warn("Attempting alternative verification for target words: {}", targetWords);
public static String createPlaceholderWithFont(String originalWord, PDFont font) {
if (originalWord == null || originalWord.isEmpty()) return " ";
final String repeat = " ".repeat(Math.max(1, originalWord.length()));
if (font != null && TextEncodingHelper.isFontSubset(font.getName())) {
try {
TextFinder directFinder = new TextFinder("", false, false);
directFinder.setStartPage(document.getPages().indexOf(page) + 1);
directFinder.setEndPage(document.getPages().indexOf(page) + 1);
directFinder.getText(document);
StringBuilder pageText = new StringBuilder();
for (PDFText pdfText : directFinder.getFoundTexts()) {
if (pdfText.getText() != null) {
pageText.append(pdfText.getText()).append(" ");
}
}
String extractedText = pageText.toString().trim();
log.debug("Alternative text extraction found: '{}'", extractedText);
for (String word : targetWords) {
if (extractedText.toLowerCase().contains(word.toLowerCase())) {
log.warn("Found target word '{}' via alternative extraction method", word);
}
}
float originalWidth =
WidthCalculator.calculateAccurateWidth(font, originalWord, 1.0f);
String result =
createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f);
return result != null ? result : repeat;
} catch (Exception e) {
log.error("Alternative text extraction failed: {}", e.getMessage());
return repeat;
}
}
PDResources resources = page.getResources();
if (resources != null) {
log.debug("Processing XObjects for page");
processPageXObjects(
document,
resources,
targetWords,
useRegex,
wholeWordSearch,
this.aggressiveMode);
}
List<TextSegment> textSegments =
extractTextSegmentsFromTokens(page.getResources(), tokens, this.aggressiveMode);
log.debug("Extracted {} text segments from tokens", textSegments.size());
if (!textSegments.isEmpty()) {
StringBuilder allText = new StringBuilder();
boolean hasProblematicChars = false;
for (TextSegment seg : textSegments) {
if (seg.getText() != null && !seg.getText().trim().isEmpty()) {
String segmentText = seg.getText();
if (!isTextSafeForRedaction(segmentText)) {
hasProblematicChars = true;
segmentText = normalizeTextForRedaction(segmentText);
log.debug(
"Normalized problematic text in segment: original contained encoding issues");
}
allText.append(segmentText).append(" ");
}
}
String completeText = allText.toString().trim();
if (!completeText.isEmpty()) {
log.debug("Complete extracted text: '{}'", completeText);
if (hasProblematicChars) {
log.info("Applied character normalization to handle encoding issues");
}
}
}
List<MatchRange> matches;
if (this.aggressiveMode) {
log.debug("Using aggressive mode for matching");
matches =
findAllMatchesAggressive(
textSegments, tokens, targetWords, useRegex, wholeWordSearch);
} else {
log.debug("Using moderate mode for matching");
matches = findMatchesInSegments(textSegments, targetWords, useRegex, wholeWordSearch);
}
log.info("Found {} matches to redact", matches.size());
if (!matches.isEmpty()) {
log.debug("Match ranges: {}", matches);
}
List<Object> resultTokens = applyRedactionsToTokens(tokens, textSegments, matches);
int modifications = tokens.size() - resultTokens.size();
log.debug(
"Applied redactions - original tokens: {}, result tokens: {}, modifications: {}",
tokens.size(),
resultTokens.size(),
modifications);
return resultTokens;
return repeat;
}
private static COSArray buildKerningAdjustedTJArray(
@ -1810,23 +1713,120 @@ public class RedactionService {
}
}
static String createPlaceholderWithFont(String originalWord, PDFont font) {
if (originalWord == null || originalWord.isEmpty()) return " ";
public List<Object> createTokensWithoutTargetText(
PDDocument document,
PDPage page,
Set<String> targetWords,
boolean useRegex,
boolean wholeWordSearch)
throws IOException {
log.debug("Processing page with {} target words: {}", targetWords.size(), targetWords);
PDFStreamParser parser = new PDFStreamParser(page);
List<Object> tokens = parseAllTokens(parser);
int tokenCount = tokens.size();
log.debug("Parsed {} tokens from page content stream", tokenCount);
if (tokenCount == 0 && !targetWords.isEmpty()) {
log.warn(
"No tokens parsed from page content stream - this might indicate encoding issues");
log.warn("Attempting alternative verification for target words: {}", targetWords);
final String repeat = " ".repeat(Math.max(1, originalWord.length()));
if (font != null && TextEncodingHelper.isFontSubset(font.getName())) {
try {
float originalWidth =
WidthCalculator.calculateAccurateWidth(font, originalWord, 1.0f);
String result =
createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f);
return result != null ? result : repeat;
TextFinder directFinder = new TextFinder("", false, false);
directFinder.setStartPage(document.getPages().indexOf(page) + 1);
directFinder.setEndPage(document.getPages().indexOf(page) + 1);
directFinder.getText(document);
StringBuilder pageText = new StringBuilder();
for (PDFText pdfText : directFinder.getFoundTexts()) {
if (pdfText.getText() != null) {
pageText.append(pdfText.getText()).append(" ");
}
}
String extractedText = pageText.toString().trim();
log.debug("Alternative text extraction found: '{}'", extractedText);
for (String word : targetWords) {
if (extractedText.toLowerCase().contains(word.toLowerCase())) {
log.warn("Found target word '{}' via alternative extraction method", word);
}
}
} catch (Exception e) {
return repeat;
log.error("Alternative text extraction failed: {}", e.getMessage());
}
}
return repeat;
PDResources resources = page.getResources();
if (resources != null) {
log.debug("Processing XObjects for page");
processPageXObjects(
document,
resources,
targetWords,
useRegex,
wholeWordSearch,
this.aggressiveMode);
}
List<TextSegment> textSegments =
extractTextSegmentsFromTokens(page.getResources(), tokens, this.aggressiveMode);
log.debug("Extracted {} text segments from tokens", textSegments.size());
if (!textSegments.isEmpty()) {
StringBuilder allText = new StringBuilder();
boolean hasProblematicChars = false;
for (TextSegment seg : textSegments) {
if (seg.getText() != null && !seg.getText().trim().isEmpty()) {
String segmentText = seg.getText();
if (!isTextSafeForRedaction(segmentText)) {
hasProblematicChars = true;
segmentText = normalizeTextForRedaction(segmentText);
log.debug(
"Normalized problematic text in segment: original contained encoding issues");
}
allText.append(segmentText).append(" ");
}
}
String completeText = allText.toString().trim();
if (!completeText.isEmpty()) {
log.debug("Complete extracted text: '{}'", completeText);
if (hasProblematicChars) {
log.info("Applied character normalization to handle encoding issues");
}
}
}
List<MatchRange> matches;
if (this.aggressiveMode) {
log.debug("Using aggressive mode for matching");
matches =
findAllMatchesAggressive(
textSegments, tokens, targetWords, useRegex, wholeWordSearch);
} else {
log.debug("Using moderate mode for matching");
matches = findMatchesInSegments(textSegments, targetWords, useRegex, wholeWordSearch);
}
log.info("Found {} matches to redact", matches.size());
if (!matches.isEmpty()) {
log.debug("Match ranges: {}", matches);
}
List<Object> resultTokens = applyRedactionsToTokens(tokens, textSegments, matches);
int modifications = tokens.size() - resultTokens.size();
log.debug(
"Applied redactions - original tokens: {}, result tokens: {}, modifications: {}",
tokens.size(),
resultTokens.size(),
modifications);
return resultTokens;
}
private static TokenModificationResult convertToTJWithAdjustment(

View File

@ -726,28 +726,28 @@ class RedactControllerTest {
@Test
@DisplayName("Should decode valid hex color with hash")
void decodeValidHexColorWithHash() throws Exception {
Color result = redactController.decodeOrDefault("#FF0000");
Color result = RedactController.decodeOrDefault("#FF0000");
assertEquals(Color.RED, result);
}
@Test
@DisplayName("Should decode valid hex color without hash")
void decodeValidHexColorWithoutHash() throws Exception {
Color result = redactController.decodeOrDefault("FF0000");
Color result = RedactController.decodeOrDefault("FF0000");
assertEquals(Color.RED, result);
}
@Test
@DisplayName("Should default to black for null color")
void defaultToBlackForNullColor() throws Exception {
Color result = redactController.decodeOrDefault(null);
Color result = RedactController.decodeOrDefault(null);
assertEquals(Color.BLACK, result);
}
@Test
@DisplayName("Should default to black for invalid color")
void defaultToBlackForInvalidColor() throws Exception {
Color result = redactController.decodeOrDefault("invalid-color");
Color result = RedactController.decodeOrDefault("invalid-color");
assertEquals(Color.BLACK, result);
}
@ -759,7 +759,7 @@ class RedactControllerTest {
})
@DisplayName("Should handle various valid color formats")
void handleVariousValidColorFormats(String colorInput) throws Exception {
Color result = redactController.decodeOrDefault(colorInput);
Color result = RedactController.decodeOrDefault(colorInput);
assertNotNull(result);
assertTrue(
result.getRed() >= 0 && result.getRed() <= 255,
@ -775,8 +775,8 @@ class RedactControllerTest {
@Test
@DisplayName("Should handle short hex codes appropriately")
void handleShortHexCodes() throws Exception {
Color result1 = redactController.decodeOrDefault("123");
Color result2 = redactController.decodeOrDefault("#12");
Color result1 = RedactController.decodeOrDefault("123");
Color result2 = RedactController.decodeOrDefault("#12");
assertNotNull(result1);
assertNotNull(result2);