diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/security/RedactController.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/security/RedactController.java index 78b572d8f..e4d1a7032 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/security/RedactController.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/security/RedactController.java @@ -1,9 +1,12 @@ package stirling.software.SPDF.controller.api.security; +import java.awt.*; import java.io.IOException; import java.util.List; import java.util.Objects; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.WebDataBinder; import org.springframework.web.bind.annotation.InitBinder; @@ -16,21 +19,29 @@ import io.github.pixee.security.Filenames; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; -import lombok.RequiredArgsConstructor; - import stirling.software.SPDF.model.api.security.ManualRedactPdfRequest; import stirling.software.SPDF.model.api.security.RedactPdfRequest; import stirling.software.SPDF.service.RedactionService; +import stirling.software.common.service.CustomPDFDocumentFactory; import stirling.software.common.util.WebResponseUtils; import stirling.software.common.util.propertyeditor.StringToArrayListPropertyEditor; @RestController @RequestMapping("/api/v1/security") @Tag(name = "Security", description = "Security APIs") -@RequiredArgsConstructor public class RedactController { + private RedactionService redactionService; + private CustomPDFDocumentFactory pdfDocumentFactory; - private final RedactionService redactionService; + public RedactController( + RedactionService redactionService, CustomPDFDocumentFactory pdfDocumentFactory) { + this.redactionService = redactionService; + this.pdfDocumentFactory = pdfDocumentFactory; + } + + public static Color decodeOrDefault(String hex) { + return RedactionService.decodeOrDefault(hex); + } private String removeFileExtension(String filename) { return filename.replaceFirst("[.][^.]+$", ""); @@ -42,6 +53,27 @@ public class RedactController { List.class, "redactions", new StringToArrayListPropertyEditor()); } + public static String createPlaceholderWithFont( + String originalWord, org.apache.pdfbox.pdmodel.font.PDFont font) { + return RedactionService.createPlaceholderWithFont(originalWord, font); + } + + public static void writeFilteredContentStream( + PDDocument document, PDPage page, java.util.List tokens) throws IOException { + RedactionService.writeFilteredContentStream(document, page, tokens); + } + + private RedactionService ensureService() { + if (redactionService == null) { + if (pdfDocumentFactory == null) { + throw new IllegalStateException( + "RedactionService not available and pdfDocumentFactory is null"); + } + redactionService = new RedactionService(pdfDocumentFactory, null); + } + return redactionService; + } + @PostMapping(value = "/redact", consumes = "multipart/form-data") @Operation( summary = "Redact PDF manually", @@ -51,7 +83,7 @@ public class RedactController { + "Input:PDF Output:PDF Type:SISO") public ResponseEntity redactPDF(@ModelAttribute ManualRedactPdfRequest request) throws IOException { - byte[] pdfContent = redactionService.redactPDF(request); + byte[] pdfContent = ensureService().redactPDF(request); return WebResponseUtils.bytesToWebResponse( pdfContent, removeFileExtension( @@ -70,7 +102,7 @@ public class RedactController { + "Input:PDF Output:PDF Type:SISO") public ResponseEntity redactPdf(@ModelAttribute RedactPdfRequest request) throws IOException { - byte[] pdfContent = redactionService.redactPdf(request); + byte[] pdfContent = ensureService().redactPdf(request); return WebResponseUtils.bytesToWebResponse( pdfContent, removeFileExtension( @@ -79,4 +111,20 @@ public class RedactController { request.getFileInput().getOriginalFilename()))) + "_redacted.pdf"); } + + public boolean isTextShowingOperator(String opName) { + return RedactionService.isTextShowingOperator(opName); + } + + public java.util.List createTokensWithoutTargetText( + PDDocument document, + PDPage page, + java.util.Set targetWords, + boolean useRegex, + boolean wholeWordSearch) + throws IOException { + return ensureService() + .createTokensWithoutTargetText( + document, page, targetWords, useRegex, wholeWordSearch); + } } diff --git a/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java b/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java index 4f99d3bda..87fe4885f 100644 --- a/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java +++ b/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java @@ -350,7 +350,7 @@ public class RedactionService { return result; } - private static Color decodeOrDefault(String hex) { + public static Color decodeOrDefault(String hex) { if (hex == null || hex.trim().isEmpty()) { return Color.BLACK; } @@ -424,8 +424,8 @@ public class RedactionService { } } - static void writeFilteredContentStream(PDDocument document, PDPage page, List tokens) - throws IOException { + public static void writeFilteredContentStream( + PDDocument document, PDPage page, List tokens) throws IOException { if (document == null || page == null || tokens == null) { throw new IllegalArgumentException("Document, page, and tokens cannot be null"); } @@ -437,7 +437,7 @@ public class RedactionService { page.setContents(newStream); } - static boolean isTextShowingOperator(String opName) { + public static boolean isTextShowingOperator(String opName) { return TEXT_SHOWING_OPERATORS.contains(opName); } @@ -1133,120 +1133,23 @@ public class RedactionService { } } - List createTokensWithoutTargetText( - PDDocument document, - PDPage page, - Set targetWords, - boolean useRegex, - boolean wholeWordSearch) - throws IOException { - log.debug("Processing page with {} target words: {}", targetWords.size(), targetWords); - - PDFStreamParser parser = new PDFStreamParser(page); - List tokens = parseAllTokens(parser); - int tokenCount = tokens.size(); - - log.debug("Parsed {} tokens from page content stream", tokenCount); - - if (tokenCount == 0 && !targetWords.isEmpty()) { - log.warn( - "No tokens parsed from page content stream - this might indicate encoding issues"); - log.warn("Attempting alternative verification for target words: {}", targetWords); + public static String createPlaceholderWithFont(String originalWord, PDFont font) { + if (originalWord == null || originalWord.isEmpty()) return " "; + final String repeat = " ".repeat(Math.max(1, originalWord.length())); + if (font != null && TextEncodingHelper.isFontSubset(font.getName())) { try { - TextFinder directFinder = new TextFinder("", false, false); - directFinder.setStartPage(document.getPages().indexOf(page) + 1); - directFinder.setEndPage(document.getPages().indexOf(page) + 1); - directFinder.getText(document); - - StringBuilder pageText = new StringBuilder(); - for (PDFText pdfText : directFinder.getFoundTexts()) { - if (pdfText.getText() != null) { - pageText.append(pdfText.getText()).append(" "); - } - } - - String extractedText = pageText.toString().trim(); - log.debug("Alternative text extraction found: '{}'", extractedText); - - for (String word : targetWords) { - if (extractedText.toLowerCase().contains(word.toLowerCase())) { - log.warn("Found target word '{}' via alternative extraction method", word); - } - } - + float originalWidth = + WidthCalculator.calculateAccurateWidth(font, originalWord, 1.0f); + String result = + createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f); + return result != null ? result : repeat; } catch (Exception e) { - log.error("Alternative text extraction failed: {}", e.getMessage()); + return repeat; } } - PDResources resources = page.getResources(); - if (resources != null) { - log.debug("Processing XObjects for page"); - processPageXObjects( - document, - resources, - targetWords, - useRegex, - wholeWordSearch, - this.aggressiveMode); - } - - List textSegments = - extractTextSegmentsFromTokens(page.getResources(), tokens, this.aggressiveMode); - log.debug("Extracted {} text segments from tokens", textSegments.size()); - - if (!textSegments.isEmpty()) { - StringBuilder allText = new StringBuilder(); - boolean hasProblematicChars = false; - - for (TextSegment seg : textSegments) { - if (seg.getText() != null && !seg.getText().trim().isEmpty()) { - String segmentText = seg.getText(); - if (!isTextSafeForRedaction(segmentText)) { - hasProblematicChars = true; - segmentText = normalizeTextForRedaction(segmentText); - log.debug( - "Normalized problematic text in segment: original contained encoding issues"); - } - allText.append(segmentText).append(" "); - } - } - - String completeText = allText.toString().trim(); - if (!completeText.isEmpty()) { - log.debug("Complete extracted text: '{}'", completeText); - if (hasProblematicChars) { - log.info("Applied character normalization to handle encoding issues"); - } - } - } - - List matches; - if (this.aggressiveMode) { - log.debug("Using aggressive mode for matching"); - matches = - findAllMatchesAggressive( - textSegments, tokens, targetWords, useRegex, wholeWordSearch); - } else { - log.debug("Using moderate mode for matching"); - matches = findMatchesInSegments(textSegments, targetWords, useRegex, wholeWordSearch); - } - - log.info("Found {} matches to redact", matches.size()); - if (!matches.isEmpty()) { - log.debug("Match ranges: {}", matches); - } - - List resultTokens = applyRedactionsToTokens(tokens, textSegments, matches); - int modifications = tokens.size() - resultTokens.size(); - log.debug( - "Applied redactions - original tokens: {}, result tokens: {}, modifications: {}", - tokens.size(), - resultTokens.size(), - modifications); - - return resultTokens; + return repeat; } private static COSArray buildKerningAdjustedTJArray( @@ -1810,23 +1713,120 @@ public class RedactionService { } } - static String createPlaceholderWithFont(String originalWord, PDFont font) { - if (originalWord == null || originalWord.isEmpty()) return " "; + public List createTokensWithoutTargetText( + PDDocument document, + PDPage page, + Set targetWords, + boolean useRegex, + boolean wholeWordSearch) + throws IOException { + log.debug("Processing page with {} target words: {}", targetWords.size(), targetWords); + + PDFStreamParser parser = new PDFStreamParser(page); + List tokens = parseAllTokens(parser); + int tokenCount = tokens.size(); + + log.debug("Parsed {} tokens from page content stream", tokenCount); + + if (tokenCount == 0 && !targetWords.isEmpty()) { + log.warn( + "No tokens parsed from page content stream - this might indicate encoding issues"); + log.warn("Attempting alternative verification for target words: {}", targetWords); - final String repeat = " ".repeat(Math.max(1, originalWord.length())); - if (font != null && TextEncodingHelper.isFontSubset(font.getName())) { try { - float originalWidth = - WidthCalculator.calculateAccurateWidth(font, originalWord, 1.0f); - String result = - createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f); - return result != null ? result : repeat; + TextFinder directFinder = new TextFinder("", false, false); + directFinder.setStartPage(document.getPages().indexOf(page) + 1); + directFinder.setEndPage(document.getPages().indexOf(page) + 1); + directFinder.getText(document); + + StringBuilder pageText = new StringBuilder(); + for (PDFText pdfText : directFinder.getFoundTexts()) { + if (pdfText.getText() != null) { + pageText.append(pdfText.getText()).append(" "); + } + } + + String extractedText = pageText.toString().trim(); + log.debug("Alternative text extraction found: '{}'", extractedText); + + for (String word : targetWords) { + if (extractedText.toLowerCase().contains(word.toLowerCase())) { + log.warn("Found target word '{}' via alternative extraction method", word); + } + } + } catch (Exception e) { - return repeat; + log.error("Alternative text extraction failed: {}", e.getMessage()); } } - return repeat; + PDResources resources = page.getResources(); + if (resources != null) { + log.debug("Processing XObjects for page"); + processPageXObjects( + document, + resources, + targetWords, + useRegex, + wholeWordSearch, + this.aggressiveMode); + } + + List textSegments = + extractTextSegmentsFromTokens(page.getResources(), tokens, this.aggressiveMode); + log.debug("Extracted {} text segments from tokens", textSegments.size()); + + if (!textSegments.isEmpty()) { + StringBuilder allText = new StringBuilder(); + boolean hasProblematicChars = false; + + for (TextSegment seg : textSegments) { + if (seg.getText() != null && !seg.getText().trim().isEmpty()) { + String segmentText = seg.getText(); + if (!isTextSafeForRedaction(segmentText)) { + hasProblematicChars = true; + segmentText = normalizeTextForRedaction(segmentText); + log.debug( + "Normalized problematic text in segment: original contained encoding issues"); + } + allText.append(segmentText).append(" "); + } + } + + String completeText = allText.toString().trim(); + if (!completeText.isEmpty()) { + log.debug("Complete extracted text: '{}'", completeText); + if (hasProblematicChars) { + log.info("Applied character normalization to handle encoding issues"); + } + } + } + + List matches; + if (this.aggressiveMode) { + log.debug("Using aggressive mode for matching"); + matches = + findAllMatchesAggressive( + textSegments, tokens, targetWords, useRegex, wholeWordSearch); + } else { + log.debug("Using moderate mode for matching"); + matches = findMatchesInSegments(textSegments, targetWords, useRegex, wholeWordSearch); + } + + log.info("Found {} matches to redact", matches.size()); + if (!matches.isEmpty()) { + log.debug("Match ranges: {}", matches); + } + + List resultTokens = applyRedactionsToTokens(tokens, textSegments, matches); + int modifications = tokens.size() - resultTokens.size(); + log.debug( + "Applied redactions - original tokens: {}, result tokens: {}, modifications: {}", + tokens.size(), + resultTokens.size(), + modifications); + + return resultTokens; } private static TokenModificationResult convertToTJWithAdjustment( diff --git a/app/core/src/test/java/stirling/software/SPDF/controller/api/security/RedactControllerTest.java b/app/core/src/test/java/stirling/software/SPDF/controller/api/security/RedactControllerTest.java index 9d835042a..067d75b68 100644 --- a/app/core/src/test/java/stirling/software/SPDF/controller/api/security/RedactControllerTest.java +++ b/app/core/src/test/java/stirling/software/SPDF/controller/api/security/RedactControllerTest.java @@ -726,28 +726,28 @@ class RedactControllerTest { @Test @DisplayName("Should decode valid hex color with hash") void decodeValidHexColorWithHash() throws Exception { - Color result = redactController.decodeOrDefault("#FF0000"); + Color result = RedactController.decodeOrDefault("#FF0000"); assertEquals(Color.RED, result); } @Test @DisplayName("Should decode valid hex color without hash") void decodeValidHexColorWithoutHash() throws Exception { - Color result = redactController.decodeOrDefault("FF0000"); + Color result = RedactController.decodeOrDefault("FF0000"); assertEquals(Color.RED, result); } @Test @DisplayName("Should default to black for null color") void defaultToBlackForNullColor() throws Exception { - Color result = redactController.decodeOrDefault(null); + Color result = RedactController.decodeOrDefault(null); assertEquals(Color.BLACK, result); } @Test @DisplayName("Should default to black for invalid color") void defaultToBlackForInvalidColor() throws Exception { - Color result = redactController.decodeOrDefault("invalid-color"); + Color result = RedactController.decodeOrDefault("invalid-color"); assertEquals(Color.BLACK, result); } @@ -759,7 +759,7 @@ class RedactControllerTest { }) @DisplayName("Should handle various valid color formats") void handleVariousValidColorFormats(String colorInput) throws Exception { - Color result = redactController.decodeOrDefault(colorInput); + Color result = RedactController.decodeOrDefault(colorInput); assertNotNull(result); assertTrue( result.getRed() >= 0 && result.getRed() <= 255, @@ -775,8 +775,8 @@ class RedactControllerTest { @Test @DisplayName("Should handle short hex codes appropriately") void handleShortHexCodes() throws Exception { - Color result1 = redactController.decodeOrDefault("123"); - Color result2 = redactController.decodeOrDefault("#12"); + Color result1 = RedactController.decodeOrDefault("123"); + Color result2 = RedactController.decodeOrDefault("#12"); assertNotNull(result1); assertNotNull(result2);