From 06af6be14b26a81bc33220aa75dbdb1610be0cb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Sz=C3=BCcs?= <127139797+balazs-szucs@users.noreply.github.com> Date: Thu, 20 Nov 2025 15:59:13 +0100 Subject: [PATCH] [V2] feat(pipeline): add pre-publish sanitization workflow (#4910) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description of Changes TLDR - Created `Pre-publish-sanitization.json` default pipeline configuration - Added sanitization operations removing metadata, JavaScript, embedded files, and annotations - Registered new pipeline in `GeneralUtils` - Included "Pre-publish Sanitization" in the suggested automations list This pull request introduces a new "Pre-publish Sanitization" workflow for PDF files, designed to help users remove sensitive metadata and content before publishing documents online. The changes include backend and frontend updates to support this workflow, as well as a minor bug fix in form data handling. **New Pre-publish Sanitization Workflow:** * Added a new default configuration file `Pre-publish-sanitization.json` that defines a pipeline for sanitizing PDFs by removing JavaScript, embedded files, metadata, annotations, flattening forms, and compressing the document. * Registered the new `Pre-publish-sanitization.json` config in the set of default web UI configurations in `GeneralUtils.java`, making it available in the application. **Frontend Integration:** * Added a new suggested automation called "Pre-publish Sanitization" in the `useSuggestedAutomations` hook, including its name, description, operations, and a new privacy icon for better UI representation. --- ## Checklist ### General - [X] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [X] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [X] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Translations (if applicable) - [ ] I ran [`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [X] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Signed-off-by: Balázs Szücs --- .../software/common/util/GeneralUtils.java | 1 + .../Pre-publish-sanitization.json | 54 +++++++++++++++++ .../public/locales/en-GB/translation.json | 4 +- .../tools/automate/useSuggestedAutomations.ts | 58 +++++++++++++++++++ .../useChangeMetadataOperation.ts | 16 ++--- 5 files changed, 125 insertions(+), 8 deletions(-) create mode 100644 app/core/src/main/resources/static/pipeline/defaultWebUIConfigs/Pre-publish-sanitization.json diff --git a/app/common/src/main/java/stirling/software/common/util/GeneralUtils.java b/app/common/src/main/java/stirling/software/common/util/GeneralUtils.java index 10ac8b595..ecf0d75d4 100644 --- a/app/common/src/main/java/stirling/software/common/util/GeneralUtils.java +++ b/app/common/src/main/java/stirling/software/common/util/GeneralUtils.java @@ -38,6 +38,7 @@ public class GeneralUtils { Set.of( "OCR images.json", "Prepare-pdfs-for-email.json", + "Pre-publish-sanitization.json", "split-rotate-auto-rename.json"); private final String DEFAULT_WEBUI_CONFIGS_DIR = "defaultWebUIConfigs"; diff --git a/app/core/src/main/resources/static/pipeline/defaultWebUIConfigs/Pre-publish-sanitization.json b/app/core/src/main/resources/static/pipeline/defaultWebUIConfigs/Pre-publish-sanitization.json new file mode 100644 index 000000000..2024f3003 --- /dev/null +++ b/app/core/src/main/resources/static/pipeline/defaultWebUIConfigs/Pre-publish-sanitization.json @@ -0,0 +1,54 @@ +{ + "name": "Pre-publish-sanitization", + "pipeline": [ + { + "operation": "/api/v1/security/sanitize-pdf", + "parameters": { + "removeJavaScript": true, + "removeEmbeddedFiles": true, + "removeXMPMetadata": true, + "removeMetadata": true, + "removeLinks": true, + "removeFonts": false + } + }, + { + "operation": "/api/v1/misc/flatten", + "parameters": { + "flattenOnlyForms": true + } + }, + { + "operation": "/api/v1/general/remove-annotations", + "parameters": {} + }, + { + "operation": "/api/v1/misc/update-metadata", + "parameters": { + "deleteAll": true, + "author": "", + "creationDate": "", + "creator": "", + "keywords": "", + "modificationDate": "", + "producer": "", + "subject": "", + "title": "", + "trapped": "" + } + }, + { + "operation": "/api/v1/misc/compress-pdf", + "parameters": { + "optimizeLevel": 3, + "expectedOutputSize": "" + } + } + ], + "_examples": { + "outputDir": "{outputFolder}/{folderName}", + "outputFileName": "{filename}-{pipelineName}-{date}-{time}" + }, + "outputDir": "{outputFolder}", + "outputFileName": "pre_publish_{filename}.PDF" +} diff --git a/frontend/public/locales/en-GB/translation.json b/frontend/public/locales/en-GB/translation.json index 321129b67..0a115f1c6 100644 --- a/frontend/public/locales/en-GB/translation.json +++ b/frontend/public/locales/en-GB/translation.json @@ -4840,7 +4840,9 @@ "secureWorkflow": "Security Workflow", "secureWorkflowDesc": "Secures PDF documents by removing potentially malicious content like JavaScript and embedded files, then adds password protection to prevent unauthorised access. Password is set to 'password' by default.", "processImages": "Process Images", - "processImagesDesc": "Converts multiple image files into a single PDF document, then applies OCR technology to extract searchable text from the images." + "processImagesDesc": "Converts multiple image files into a single PDF document, then applies OCR technology to extract searchable text from the images.", + "prePublishSanitization": "Pre-publish Sanitization", + "prePublishSanitizationDesc": "Sanitization workflow that removes all hidden metadata, JavaScript, embedded files, annotations, and flattens forms to prevent data leakage before publishing PDFs online." } }, "colorPicker": { diff --git a/frontend/src/core/hooks/tools/automate/useSuggestedAutomations.ts b/frontend/src/core/hooks/tools/automate/useSuggestedAutomations.ts index 378380050..c13ab3211 100644 --- a/frontend/src/core/hooks/tools/automate/useSuggestedAutomations.ts +++ b/frontend/src/core/hooks/tools/automate/useSuggestedAutomations.ts @@ -9,6 +9,7 @@ import { SPLIT_METHODS } from '@app/constants/splitConstants'; const CompressIcon = () => React.createElement(LocalIcon, { icon: 'compress', width: '1.5rem', height: '1.5rem' }); const SecurityIcon = () => React.createElement(LocalIcon, { icon: 'security', width: '1.5rem', height: '1.5rem' }); const StarIcon = () => React.createElement(LocalIcon, { icon: 'star', width: '1.5rem', height: '1.5rem' }); +const PrivacyIcon = () => React.createElement(LocalIcon, { icon: 'shield-lock', width: '1.5rem', height: '1.5rem' }); export function useSuggestedAutomations(): SuggestedAutomation[] { const { t } = useTranslation(); @@ -67,6 +68,63 @@ export function useSuggestedAutomations(): SuggestedAutomation[] { updatedAt: now, icon: SecurityIcon, }, + { + id: "pre-publish-sanitization", + name: t("automation.suggested.prePublishSanitization", "Pre-publish Sanitization"), + description: t("automation.suggested.prePublishSanitizationDesc", "Sanitization workflow that removes all hidden metadata, JavaScript, embedded files, annotations, and flattens forms to prevent data leakage before publishing PDFs online."), + operations: [ + { + operation: "sanitize", + parameters: { + removeJavaScript: true, + removeEmbeddedFiles: true, + removeXMPMetadata: true, + removeMetadata: true, + removeLinks: true, + removeFonts: false, + } + }, + { + operation: "flatten", + parameters: { + flattenOnlyForms: true, + } + }, + { + operation: "removeAnnotations", + parameters: {} + }, + { + operation: "changeMetadata", + parameters: { + deleteAll: true, + author: '', + creationDate: '', + creator: '', + keywords: '', + modificationDate: '', + producer: '', + subject: '', + title: '', + trapped: '', + } + }, + { + operation: "compress", + parameters: { + compressionLevel: 3, + grayscale: false, + expectedSize: '', + compressionMethod: 'quality', + fileSizeValue: '', + fileSizeUnit: 'MB', + } + }, + ], + createdAt: now, + updatedAt: now, + icon: PrivacyIcon, + }, { id: "email-preparation", name: t("automation.suggested.emailPreparation", "Email Preparation"), diff --git a/frontend/src/core/hooks/tools/changeMetadata/useChangeMetadataOperation.ts b/frontend/src/core/hooks/tools/changeMetadata/useChangeMetadataOperation.ts index 828fa01ab..b0692e5d6 100644 --- a/frontend/src/core/hooks/tools/changeMetadata/useChangeMetadataOperation.ts +++ b/frontend/src/core/hooks/tools/changeMetadata/useChangeMetadataOperation.ts @@ -40,13 +40,15 @@ export const buildChangeMetadataFormData = (parameters: ChangeMetadataParameters // Custom metadata - backend expects them as values to 'allRequestParams[customKeyX/customValueX]' let keyNumber = 0; - parameters.customMetadata.forEach((entry) => { - if (entry.key.trim() && entry.value.trim()) { - keyNumber += 1; - formData.append(`allRequestParams[customKey${keyNumber}]`, entry.key.trim()); - formData.append(`allRequestParams[customValue${keyNumber}]`, entry.value.trim()); - } - }); + if (parameters.customMetadata && Array.isArray(parameters.customMetadata)) { + parameters.customMetadata.forEach((entry) => { + if (entry.key.trim() && entry.value.trim()) { + keyNumber += 1; + formData.append(`allRequestParams[customKey${keyNumber}]`, entry.key.trim()); + formData.append(`allRequestParams[customValue${keyNumber}]`, entry.value.trim()); + } + }); + } return formData; };