fix(sanitize): fix JavaScript handling, embedded file sanitization (#4652)

# Description of Changes

### Fixes
- Added document-level JavaScript removal: Now removes OpenAction and
catalog additional actions (WC, WS, DS, WP, DP) that execute on document
open, save, print, and close events
- Added page-level JavaScript removal: Removes page open/close actions
(O, C) that were previously missed
- Added annotation additional actions removal: Removes all 10 annotation
event handlers (Bl, D, E, Fo, PC, PI, PO, PV, U, X) for mouse/focus
events
- Fixed embedded file removal: Corrected implementation to use
`catalog.getNames().setEmbeddedFiles(null)` instead of incorrectly
targeting page resources

### Verification:

Before (after embedded file "removal"):
<img width="706" height="671" alt="image"
src="https://github.com/user-attachments/assets/7d10e1ba-78bc-4094-b28a-0eae3613db3c"
/>
After:
<img width="706" height="671" alt="image"
src="https://github.com/user-attachments/assets/b278c2a6-1605-483f-b39c-1c3aa047acc2"
/>


<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [x] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-10-11 19:37:58 +02:00 committed by GitHub
parent 085b8795d5
commit 06efab5cb2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,22 +2,25 @@ package stirling.software.SPDF.controller.api.security;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.interactive.action.PDAction;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionLaunch;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionURI;
import org.apache.pdfbox.pdmodel.interactive.action.PDDocumentCatalogAdditionalActions;
import org.apache.pdfbox.pdmodel.interactive.action.PDFormFieldAdditionalActions;
import org.apache.pdfbox.pdmodel.interactive.action.PDPageAdditionalActions;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
@ -34,6 +37,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.security.SanitizePdfRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
@ -43,6 +47,7 @@ import stirling.software.common.util.WebResponseUtils;
@RestController
@RequestMapping("/api/v1/security")
@Tag(name = "Security", description = "Security APIs")
@Slf4j
@RequiredArgsConstructor
public class SanitizeController {
@ -99,7 +104,7 @@ public class SanitizeController {
GeneralUtils.generateFilename(inputFile.getOriginalFilename(), "_sanitized.pdf"));
}
private void sanitizeJavaScript(PDDocument document) throws IOException {
private static void sanitizeJavaScript(PDDocument document) throws IOException {
// Get the root dictionary (catalog) of the PDF
PDDocumentCatalog catalog = document.getDocumentCatalog();
@ -118,16 +123,30 @@ public class SanitizeController {
}
}
for (PDPage page : document.getPages()) {
for (PDAnnotation annotation : page.getAnnotations()) {
if (annotation instanceof PDAnnotationWidget widget) {
PDAction action = widget.getAction();
if (action instanceof PDActionJavaScript) {
widget.setAction(null);
if (catalog.getOpenAction() instanceof PDActionJavaScript) {
catalog.setOpenAction(null);
}
PDDocumentCatalogAdditionalActions catalogActions = catalog.getActions();
if (catalogActions != null) {
if (catalogActions.getWC() instanceof PDActionJavaScript) {
catalogActions.setWC(null);
}
if (catalogActions.getWS() instanceof PDActionJavaScript) {
catalogActions.setWS(null);
}
if (catalogActions.getDS() instanceof PDActionJavaScript) {
catalogActions.setDS(null);
}
if (catalogActions.getWP() instanceof PDActionJavaScript) {
catalogActions.setWP(null);
}
if (catalogActions.getDP() instanceof PDActionJavaScript) {
catalogActions.setDP(null);
}
}
}
PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm();
PDAcroForm acroForm = catalog.getAcroForm();
if (acroForm != null) {
for (PDField field : acroForm.getFields()) {
PDFormFieldAdditionalActions actions = field.getActions();
@ -147,21 +166,46 @@ public class SanitizeController {
}
}
}
for (PDPage page : document.getPages()) {
PDPageAdditionalActions pageActions = page.getActions();
if (pageActions != null) {
if (pageActions.getO() instanceof PDActionJavaScript) {
pageActions.setO(null);
}
if (pageActions.getC() instanceof PDActionJavaScript) {
pageActions.setC(null);
}
}
private void sanitizeEmbeddedFiles(PDDocument document) {
PDPageTree allPages = document.getPages();
for (PDPage page : allPages) {
PDResources res = page.getResources();
if (res != null && res.getCOSObject() != null) {
res.getCOSObject().removeItem(COSName.getPDFName("EmbeddedFiles"));
for (PDAnnotation annotation : page.getAnnotations()) {
if (annotation instanceof PDAnnotationWidget widget) {
PDAction action = widget.getAction();
if (action instanceof PDActionJavaScript) {
widget.setAction(null);
}
}
}
}
}
private void sanitizeXMPMetadata(PDDocument document) {
private static void sanitizeEmbeddedFiles(PDDocument document) throws IOException {
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDDocumentNameDictionary names = catalog.getNames();
if (names != null) {
names.setEmbeddedFiles(null);
}
for (PDPage page : document.getPages()) {
List<PDAnnotation> annotations = page.getAnnotations();
if (annotations != null && !annotations.isEmpty()) {
annotations.removeIf(
annotation -> annotation instanceof PDAnnotationFileAttachment);
}
}
}
private static void sanitizeXMPMetadata(PDDocument document) {
if (document.getDocumentCatalog() != null) {
PDMetadata metadata = document.getDocumentCatalog().getMetadata();
if (metadata != null) {
@ -170,7 +214,7 @@ public class SanitizeController {
}
}
private void sanitizeDocumentInfoMetadata(PDDocument document) {
private static void sanitizeDocumentInfoMetadata(PDDocument document) {
PDDocumentInformation docInfo = document.getDocumentInformation();
if (docInfo != null) {
PDDocumentInformation newInfo = new PDDocumentInformation();
@ -178,7 +222,7 @@ public class SanitizeController {
}
}
private void sanitizeLinks(PDDocument document) throws IOException {
private static void sanitizeLinks(PDDocument document) throws IOException {
for (PDPage page : document.getPages()) {
for (PDAnnotation annotation : page.getAnnotations()) {
if (annotation instanceof PDAnnotationLink linkAnnotation) {
@ -191,7 +235,7 @@ public class SanitizeController {
}
}
private void sanitizeFonts(PDDocument document) {
private static void sanitizeFonts(PDDocument document) {
for (PDPage page : document.getPages()) {
if (page != null
&& page.getResources() != null