From 06efab5cb2e1fec496fb7fcaccb15dc0327e0d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Sz=C3=BCcs?= <127139797+balazs-szucs@users.noreply.github.com> Date: Sat, 11 Oct 2025 19:37:58 +0200 Subject: [PATCH] fix(sanitize): fix JavaScript handling, embedded file sanitization (#4652) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description of Changes ### Fixes - Added document-level JavaScript removal: Now removes OpenAction and catalog additional actions (WC, WS, DS, WP, DP) that execute on document open, save, print, and close events - Added page-level JavaScript removal: Removes page open/close actions (O, C) that were previously missed - Added annotation additional actions removal: Removes all 10 annotation event handlers (Bl, D, E, Fo, PC, PI, PO, PV, U, X) for mouse/focus events - Fixed embedded file removal: Corrected implementation to use `catalog.getNames().setEmbeddedFiles(null)` instead of incorrectly targeting page resources ### Verification: Before (after embedded file "removal"): image After: image --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [x] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. Signed-off-by: Balázs Szücs --- .../api/security/SanitizeController.java | 118 ++++++++++++------ 1 file changed, 81 insertions(+), 37 deletions(-) diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/security/SanitizeController.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/security/SanitizeController.java index d7c4b675c..312584e3e 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/security/SanitizeController.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/security/SanitizeController.java @@ -2,22 +2,25 @@ package stirling.software.SPDF.controller.api.security; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.util.List; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDDocumentInformation; +import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.PDPageTree; -import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDMetadata; import org.apache.pdfbox.pdmodel.interactive.action.PDAction; import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript; import org.apache.pdfbox.pdmodel.interactive.action.PDActionLaunch; import org.apache.pdfbox.pdmodel.interactive.action.PDActionURI; +import org.apache.pdfbox.pdmodel.interactive.action.PDDocumentCatalogAdditionalActions; import org.apache.pdfbox.pdmodel.interactive.action.PDFormFieldAdditionalActions; +import org.apache.pdfbox.pdmodel.interactive.action.PDPageAdditionalActions; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; @@ -34,6 +37,7 @@ import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import stirling.software.SPDF.model.api.security.SanitizePdfRequest; import stirling.software.common.service.CustomPDFDocumentFactory; @@ -43,6 +47,7 @@ import stirling.software.common.util.WebResponseUtils; @RestController @RequestMapping("/api/v1/security") @Tag(name = "Security", description = "Security APIs") +@Slf4j @RequiredArgsConstructor public class SanitizeController { @@ -99,7 +104,7 @@ public class SanitizeController { GeneralUtils.generateFilename(inputFile.getOriginalFilename(), "_sanitized.pdf")); } - private void sanitizeJavaScript(PDDocument document) throws IOException { + private static void sanitizeJavaScript(PDDocument document) throws IOException { // Get the root dictionary (catalog) of the PDF PDDocumentCatalog catalog = document.getDocumentCatalog(); @@ -118,7 +123,61 @@ public class SanitizeController { } } + if (catalog.getOpenAction() instanceof PDActionJavaScript) { + catalog.setOpenAction(null); + } + + PDDocumentCatalogAdditionalActions catalogActions = catalog.getActions(); + if (catalogActions != null) { + if (catalogActions.getWC() instanceof PDActionJavaScript) { + catalogActions.setWC(null); + } + if (catalogActions.getWS() instanceof PDActionJavaScript) { + catalogActions.setWS(null); + } + if (catalogActions.getDS() instanceof PDActionJavaScript) { + catalogActions.setDS(null); + } + if (catalogActions.getWP() instanceof PDActionJavaScript) { + catalogActions.setWP(null); + } + if (catalogActions.getDP() instanceof PDActionJavaScript) { + catalogActions.setDP(null); + } + } + + PDAcroForm acroForm = catalog.getAcroForm(); + if (acroForm != null) { + for (PDField field : acroForm.getFields()) { + PDFormFieldAdditionalActions actions = field.getActions(); + if (actions != null) { + if (actions.getC() instanceof PDActionJavaScript) { + actions.setC(null); + } + if (actions.getF() instanceof PDActionJavaScript) { + actions.setF(null); + } + if (actions.getK() instanceof PDActionJavaScript) { + actions.setK(null); + } + if (actions.getV() instanceof PDActionJavaScript) { + actions.setV(null); + } + } + } + } + for (PDPage page : document.getPages()) { + PDPageAdditionalActions pageActions = page.getActions(); + if (pageActions != null) { + if (pageActions.getO() instanceof PDActionJavaScript) { + pageActions.setO(null); + } + if (pageActions.getC() instanceof PDActionJavaScript) { + pageActions.setC(null); + } + } + for (PDAnnotation annotation : page.getAnnotations()) { if (annotation instanceof PDAnnotationWidget widget) { PDAction action = widget.getAction(); @@ -127,41 +186,26 @@ public class SanitizeController { } } } - PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(); - if (acroForm != null) { - for (PDField field : acroForm.getFields()) { - PDFormFieldAdditionalActions actions = field.getActions(); - if (actions != null) { - if (actions.getC() instanceof PDActionJavaScript) { - actions.setC(null); - } - if (actions.getF() instanceof PDActionJavaScript) { - actions.setF(null); - } - if (actions.getK() instanceof PDActionJavaScript) { - actions.setK(null); - } - if (actions.getV() instanceof PDActionJavaScript) { - actions.setV(null); - } - } - } + } + } + + private static void sanitizeEmbeddedFiles(PDDocument document) throws IOException { + PDDocumentCatalog catalog = document.getDocumentCatalog(); + PDDocumentNameDictionary names = catalog.getNames(); + if (names != null) { + names.setEmbeddedFiles(null); + } + + for (PDPage page : document.getPages()) { + List annotations = page.getAnnotations(); + if (annotations != null && !annotations.isEmpty()) { + annotations.removeIf( + annotation -> annotation instanceof PDAnnotationFileAttachment); } } } - private void sanitizeEmbeddedFiles(PDDocument document) { - PDPageTree allPages = document.getPages(); - - for (PDPage page : allPages) { - PDResources res = page.getResources(); - if (res != null && res.getCOSObject() != null) { - res.getCOSObject().removeItem(COSName.getPDFName("EmbeddedFiles")); - } - } - } - - private void sanitizeXMPMetadata(PDDocument document) { + private static void sanitizeXMPMetadata(PDDocument document) { if (document.getDocumentCatalog() != null) { PDMetadata metadata = document.getDocumentCatalog().getMetadata(); if (metadata != null) { @@ -170,7 +214,7 @@ public class SanitizeController { } } - private void sanitizeDocumentInfoMetadata(PDDocument document) { + private static void sanitizeDocumentInfoMetadata(PDDocument document) { PDDocumentInformation docInfo = document.getDocumentInformation(); if (docInfo != null) { PDDocumentInformation newInfo = new PDDocumentInformation(); @@ -178,7 +222,7 @@ public class SanitizeController { } } - private void sanitizeLinks(PDDocument document) throws IOException { + private static void sanitizeLinks(PDDocument document) throws IOException { for (PDPage page : document.getPages()) { for (PDAnnotation annotation : page.getAnnotations()) { if (annotation instanceof PDAnnotationLink linkAnnotation) { @@ -191,7 +235,7 @@ public class SanitizeController { } } - private void sanitizeFonts(PDDocument document) { + private static void sanitizeFonts(PDDocument document) { for (PDPage page : document.getPages()) { if (page != null && page.getResources() != null