[V2] feat(pipeline): add pre-publish sanitization workflow (#4910)

# Description of Changes
TLDR
- Created `Pre-publish-sanitization.json` default pipeline configuration
- Added sanitization operations removing metadata, JavaScript, embedded
files, and annotations
- Registered new pipeline in `GeneralUtils`
- Included "Pre-publish Sanitization" in the suggested automations list

This pull request introduces a new "Pre-publish Sanitization" workflow
for PDF files, designed to help users remove sensitive metadata and
content before publishing documents online. The changes include backend
and frontend updates to support this workflow, as well as a minor bug
fix in form data handling.

**New Pre-publish Sanitization Workflow:**

* Added a new default configuration file `Pre-publish-sanitization.json`
that defines a pipeline for sanitizing PDFs by removing JavaScript,
embedded files, metadata, annotations, flattening forms, and compressing
the document.
* Registered the new `Pre-publish-sanitization.json` config in the set
of default web UI configurations in `GeneralUtils.java`, making it
available in the application.

**Frontend Integration:**

* Added a new suggested automation called "Pre-publish Sanitization" in
the `useSuggestedAutomations` hook, including its name, description,
operations, and a new privacy icon for better UI representation.

<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [X] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [X] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [X] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Translations (if applicable)

- [ ] I ran
[`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [X] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs
2025-11-20 15:59:13 +01:00
committed by GitHub
parent c87da6d5cc
commit 06af6be14b
5 changed files with 125 additions and 8 deletions

View File

@@ -38,6 +38,7 @@ public class GeneralUtils {
Set.of(
"OCR images.json",
"Prepare-pdfs-for-email.json",
"Pre-publish-sanitization.json",
"split-rotate-auto-rename.json");
private final String DEFAULT_WEBUI_CONFIGS_DIR = "defaultWebUIConfigs";

View File

@@ -0,0 +1,54 @@
{
"name": "Pre-publish-sanitization",
"pipeline": [
{
"operation": "/api/v1/security/sanitize-pdf",
"parameters": {
"removeJavaScript": true,
"removeEmbeddedFiles": true,
"removeXMPMetadata": true,
"removeMetadata": true,
"removeLinks": true,
"removeFonts": false
}
},
{
"operation": "/api/v1/misc/flatten",
"parameters": {
"flattenOnlyForms": true
}
},
{
"operation": "/api/v1/general/remove-annotations",
"parameters": {}
},
{
"operation": "/api/v1/misc/update-metadata",
"parameters": {
"deleteAll": true,
"author": "",
"creationDate": "",
"creator": "",
"keywords": "",
"modificationDate": "",
"producer": "",
"subject": "",
"title": "",
"trapped": ""
}
},
{
"operation": "/api/v1/misc/compress-pdf",
"parameters": {
"optimizeLevel": 3,
"expectedOutputSize": ""
}
}
],
"_examples": {
"outputDir": "{outputFolder}/{folderName}",
"outputFileName": "{filename}-{pipelineName}-{date}-{time}"
},
"outputDir": "{outputFolder}",
"outputFileName": "pre_publish_{filename}.PDF"
}

View File

@@ -4840,7 +4840,9 @@
"secureWorkflow": "Security Workflow",
"secureWorkflowDesc": "Secures PDF documents by removing potentially malicious content like JavaScript and embedded files, then adds password protection to prevent unauthorised access. Password is set to 'password' by default.",
"processImages": "Process Images",
"processImagesDesc": "Converts multiple image files into a single PDF document, then applies OCR technology to extract searchable text from the images."
"processImagesDesc": "Converts multiple image files into a single PDF document, then applies OCR technology to extract searchable text from the images.",
"prePublishSanitization": "Pre-publish Sanitization",
"prePublishSanitizationDesc": "Sanitization workflow that removes all hidden metadata, JavaScript, embedded files, annotations, and flattens forms to prevent data leakage before publishing PDFs online."
}
},
"colorPicker": {

View File

@@ -9,6 +9,7 @@ import { SPLIT_METHODS } from '@app/constants/splitConstants';
const CompressIcon = () => React.createElement(LocalIcon, { icon: 'compress', width: '1.5rem', height: '1.5rem' });
const SecurityIcon = () => React.createElement(LocalIcon, { icon: 'security', width: '1.5rem', height: '1.5rem' });
const StarIcon = () => React.createElement(LocalIcon, { icon: 'star', width: '1.5rem', height: '1.5rem' });
const PrivacyIcon = () => React.createElement(LocalIcon, { icon: 'shield-lock', width: '1.5rem', height: '1.5rem' });
export function useSuggestedAutomations(): SuggestedAutomation[] {
const { t } = useTranslation();
@@ -67,6 +68,63 @@ export function useSuggestedAutomations(): SuggestedAutomation[] {
updatedAt: now,
icon: SecurityIcon,
},
{
id: "pre-publish-sanitization",
name: t("automation.suggested.prePublishSanitization", "Pre-publish Sanitization"),
description: t("automation.suggested.prePublishSanitizationDesc", "Sanitization workflow that removes all hidden metadata, JavaScript, embedded files, annotations, and flattens forms to prevent data leakage before publishing PDFs online."),
operations: [
{
operation: "sanitize",
parameters: {
removeJavaScript: true,
removeEmbeddedFiles: true,
removeXMPMetadata: true,
removeMetadata: true,
removeLinks: true,
removeFonts: false,
}
},
{
operation: "flatten",
parameters: {
flattenOnlyForms: true,
}
},
{
operation: "removeAnnotations",
parameters: {}
},
{
operation: "changeMetadata",
parameters: {
deleteAll: true,
author: '',
creationDate: '',
creator: '',
keywords: '',
modificationDate: '',
producer: '',
subject: '',
title: '',
trapped: '',
}
},
{
operation: "compress",
parameters: {
compressionLevel: 3,
grayscale: false,
expectedSize: '',
compressionMethod: 'quality',
fileSizeValue: '',
fileSizeUnit: 'MB',
}
},
],
createdAt: now,
updatedAt: now,
icon: PrivacyIcon,
},
{
id: "email-preparation",
name: t("automation.suggested.emailPreparation", "Email Preparation"),

View File

@@ -40,13 +40,15 @@ export const buildChangeMetadataFormData = (parameters: ChangeMetadataParameters
// Custom metadata - backend expects them as values to 'allRequestParams[customKeyX/customValueX]'
let keyNumber = 0;
parameters.customMetadata.forEach((entry) => {
if (entry.key.trim() && entry.value.trim()) {
keyNumber += 1;
formData.append(`allRequestParams[customKey${keyNumber}]`, entry.key.trim());
formData.append(`allRequestParams[customValue${keyNumber}]`, entry.value.trim());
}
});
if (parameters.customMetadata && Array.isArray(parameters.customMetadata)) {
parameters.customMetadata.forEach((entry) => {
if (entry.key.trim() && entry.value.trim()) {
keyNumber += 1;
formData.append(`allRequestParams[customKey${keyNumber}]`, entry.key.trim());
formData.append(`allRequestParams[customValue${keyNumber}]`, entry.value.trim());
}
});
}
return formData;
};