mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-02-17 13:52:14 +01:00
feat:(pdfa-conversion) Implement Strict PDF/A Mode with Verification (#5663)
# Description of Changes This PR introduces a new "Strict Mode" for the PDF to PDF/A conversion tool. When enabled, the application will use VeraPDF to verify that the resulting file is perfectly compliant with the selected PDF/A standard. If validation fails, the system will return a descriptive error instead of a non-compliant file. <!-- Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --> <img width="371" height="993" alt="image" src="https://github.com/user-attachments/assets/a22d50b0-ad7c-46b0-be79-b79c2bc80d92" /> --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Translations (if applicable) - [ ] I ran [`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
00136f9e20
commit
0f5a0e694a
@ -71,10 +71,12 @@ import org.apache.xmpbox.schema.PDFAIdentificationSchema;
|
||||
import org.apache.xmpbox.schema.XMPBasicSchema;
|
||||
import org.apache.xmpbox.xml.DomXmpParser;
|
||||
import org.apache.xmpbox.xml.XmpSerializer;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.server.ResponseStatusException;
|
||||
|
||||
import io.github.pixee.security.Filenames;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
@ -99,6 +101,7 @@ public class ConvertPDFToPDFA {
|
||||
|
||||
private static final Pattern NON_PRINTABLE_ASCII = Pattern.compile("[^\\x20-\\x7E]");
|
||||
private final RuntimePathConfig runtimePathConfig;
|
||||
private final stirling.software.SPDF.service.VeraPDFService veraPDFService;
|
||||
|
||||
private static final String ICC_RESOURCE_PATH = "/icc/sRGB2014.icc";
|
||||
private static final int PDFA_COMPATIBILITY_POLICY = 1;
|
||||
@ -587,7 +590,8 @@ public class ConvertPDFToPDFA {
|
||||
if (isPdfX) {
|
||||
return handlePdfXConversion(inputFile, outputFormat);
|
||||
} else {
|
||||
return handlePdfAConversion(inputFile, outputFormat);
|
||||
return handlePdfAConversion(
|
||||
inputFile, outputFormat, request.getStrict() != null && request.getStrict());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1793,7 +1797,7 @@ public class ConvertPDFToPDFA {
|
||||
}
|
||||
|
||||
private ResponseEntity<byte[]> handlePdfAConversion(
|
||||
MultipartFile inputFile, String outputFormat) throws Exception {
|
||||
MultipartFile inputFile, String outputFormat, boolean strict) throws Exception {
|
||||
PdfaProfile profile = PdfaProfile.fromRequest(outputFormat);
|
||||
|
||||
// Get the original filename without extension
|
||||
@ -1822,6 +1826,10 @@ public class ConvertPDFToPDFA {
|
||||
|
||||
validateAndWarnPdfA(converted, profile, "Ghostscript");
|
||||
|
||||
if (strict) {
|
||||
verifyStrictCompliance(converted);
|
||||
}
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
converted, outputFilename, MediaType.APPLICATION_PDF);
|
||||
} catch (IOException | InterruptedException e) {
|
||||
@ -1839,14 +1847,42 @@ public class ConvertPDFToPDFA {
|
||||
// Validate with PDFBox preflight and warn if issues found
|
||||
validateAndWarnPdfA(converted, profile, "PDFBox/LibreOffice");
|
||||
|
||||
if (strict) {
|
||||
verifyStrictCompliance(converted);
|
||||
}
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
converted, outputFilename, MediaType.APPLICATION_PDF);
|
||||
|
||||
} finally {
|
||||
deleteQuietly(workingDir);
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyStrictCompliance(byte[] pdfBytes) throws IOException {
|
||||
try (InputStream is = new ByteArrayInputStream(pdfBytes)) {
|
||||
List<stirling.software.SPDF.model.api.security.PDFVerificationResult> results =
|
||||
veraPDFService.validatePDF(is);
|
||||
boolean isCompliant = results.stream().anyMatch(result -> result.isCompliant());
|
||||
if (!isCompliant) {
|
||||
String details =
|
||||
results.stream()
|
||||
.map(r -> r.getStandard() + ": " + r.getComplianceSummary())
|
||||
.collect(Collectors.joining("; "));
|
||||
throw new ResponseStatusException(
|
||||
HttpStatus.BAD_REQUEST,
|
||||
"Strict PDF/A mode enabled: Conversion is not perfectly compliant. Details: "
|
||||
+ details);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
if (e instanceof ResponseStatusException) {
|
||||
throw (ResponseStatusException) e;
|
||||
}
|
||||
log.error("Error during strict PDF/A verification", e);
|
||||
throw new ResponseStatusException(
|
||||
HttpStatus.INTERNAL_SERVER_ERROR, "Error during strict PDF/A verification");
|
||||
}
|
||||
}
|
||||
|
||||
private Path sanitizePdfWithPdfBox(Path inputPdf, boolean addWhiteBackground) {
|
||||
try {
|
||||
Path sanitizedPath =
|
||||
|
||||
@ -16,4 +16,9 @@ public class PdfToPdfARequest extends PDFFile {
|
||||
requiredMode = Schema.RequiredMode.REQUIRED,
|
||||
allowableValues = {"pdfa", "pdfa-1", "pdfa-2", "pdfa-2b", "pdfa-3", "pdfa-3b", "pdfx"})
|
||||
private String outputFormat;
|
||||
|
||||
@Schema(
|
||||
description =
|
||||
"If true, the conversion will fail if the output is not perfectly compliant")
|
||||
private Boolean strict = false;
|
||||
}
|
||||
|
||||
@ -1303,6 +1303,8 @@ outputFormat = "Output Format"
|
||||
pdfaNote = "PDF/A-1b is more compatible, PDF/A-2b supports more features, PDF/A-3b supports embedded files."
|
||||
pdfaFormat = "PDF/A Format"
|
||||
pdfaDigitalSignatureWarning = "The PDF contains a digital signature. This will be removed in the next step."
|
||||
strictMode = "Strict Mode"
|
||||
strictModeDesc = "Error if conversion is not perfect (uses VeraPDF verification)"
|
||||
pdfxDigitalSignatureWarning = "The PDF contains a digital signature. This will be removed in the next step."
|
||||
pdfxDescription = "PDF/X is an ISO standard PDF subset for reliable printing and graphics exchange."
|
||||
fileFormat = "File Format"
|
||||
|
||||
@ -146,7 +146,8 @@ const ConvertSettings = ({
|
||||
includeAllRecipients: false,
|
||||
});
|
||||
onParameterChange('pdfaOptions', {
|
||||
outputFormat: 'pdfa-1',
|
||||
outputFormat: 'pdfa-2b',
|
||||
strict: false,
|
||||
});
|
||||
onParameterChange('pdfxOptions', {
|
||||
outputFormat: 'pdfx',
|
||||
@ -234,7 +235,8 @@ const ConvertSettings = ({
|
||||
includeAllRecipients: false,
|
||||
});
|
||||
onParameterChange('pdfaOptions', {
|
||||
outputFormat: 'pdfa-1',
|
||||
outputFormat: 'pdfa-2b',
|
||||
strict: false,
|
||||
});
|
||||
onParameterChange('pdfxOptions', {
|
||||
outputFormat: 'pdfx',
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { Stack, Text, Select, Alert } from '@mantine/core';
|
||||
import { Stack, Text, Select, Alert, Checkbox } from '@mantine/core';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { ConvertParameters } from '@app/hooks/tools/convert/useConvertParameters';
|
||||
import { usePdfSignatureDetection } from '@app/hooks/usePdfSignatureDetection';
|
||||
@ -23,8 +23,8 @@ const ConvertToPdfaSettings = ({
|
||||
|
||||
const pdfaFormatOptions = [
|
||||
{ value: 'pdfa-1', label: 'PDF/A-1b' },
|
||||
{ value: 'pdfa', label: 'PDF/A-2b' },
|
||||
{ value: 'pdfa-3', label: 'PDF/A-3b' }
|
||||
{ value: 'pdfa-2b', label: 'PDF/A-2b' },
|
||||
{ value: 'pdfa-3b', label: 'PDF/A-3b' }
|
||||
];
|
||||
|
||||
return (
|
||||
@ -45,7 +45,7 @@ const ConvertToPdfaSettings = ({
|
||||
value={parameters.pdfaOptions.outputFormat}
|
||||
onChange={(value) => onParameterChange('pdfaOptions', {
|
||||
...parameters.pdfaOptions,
|
||||
outputFormat: value || 'pdfa-1'
|
||||
outputFormat: value || 'pdfa-2b'
|
||||
})}
|
||||
data={pdfaFormatOptions}
|
||||
disabled={disabled || isChecking}
|
||||
@ -56,6 +56,17 @@ const ConvertToPdfaSettings = ({
|
||||
{t("convert.pdfaNote", "PDF/A-1b is more compatible, PDF/A-2b supports more features, PDF/A-3b supports embedded files.")}
|
||||
</Text>
|
||||
</Stack>
|
||||
|
||||
<Checkbox
|
||||
label={t("convert.strictMode", "Strict Mode")}
|
||||
description={t("convert.strictModeDesc", "Error if conversion is not perfect (uses VeraPDF verification)")}
|
||||
checked={parameters.pdfaOptions.strict}
|
||||
onChange={(event) => onParameterChange('pdfaOptions', {
|
||||
...parameters.pdfaOptions,
|
||||
strict: event.currentTarget.checked
|
||||
})}
|
||||
disabled={disabled || isChecking}
|
||||
/>
|
||||
</Stack>
|
||||
);
|
||||
};
|
||||
|
||||
@ -48,7 +48,8 @@ export function useSuggestedAutomations(): SuggestedAutomation[] {
|
||||
fromExtension: 'pdf',
|
||||
toExtension: 'pdfa',
|
||||
pdfaOptions: {
|
||||
outputFormat: 'pdfa-1',
|
||||
outputFormat: 'pdfa-2b',
|
||||
strict: false,
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@ -79,6 +79,7 @@ export const buildConvertFormData = (parameters: ConvertParameters, selectedFile
|
||||
formData.append("includeAllRecipients", emailOptions.includeAllRecipients.toString());
|
||||
} else if (fromExtension === 'pdf' && toExtension === 'pdfa') {
|
||||
formData.append("outputFormat", pdfaOptions.outputFormat);
|
||||
formData.append("strict", String(!!pdfaOptions.strict));
|
||||
} else if (fromExtension === 'pdf' && toExtension === 'pdfx') {
|
||||
// Use PDF/A endpoint with PDF/X format parameter
|
||||
formData.append("outputFormat", pdfxOptions?.outputFormat || 'pdfx');
|
||||
|
||||
@ -24,7 +24,7 @@ describe('useConvertParameters', () => {
|
||||
expect(result.current.parameters.emailOptions.maxAttachmentSizeMB).toBe(10);
|
||||
expect(result.current.parameters.emailOptions.downloadHtml).toBe(false);
|
||||
expect(result.current.parameters.emailOptions.includeAllRecipients).toBe(false);
|
||||
expect(result.current.parameters.pdfaOptions.outputFormat).toBe('pdfa-1');
|
||||
expect(result.current.parameters.pdfaOptions.outputFormat).toBe('pdfa-2b');
|
||||
});
|
||||
|
||||
test('should update individual parameters', () => {
|
||||
@ -95,7 +95,8 @@ describe('useConvertParameters', () => {
|
||||
|
||||
act(() => {
|
||||
result.current.updateParameter('pdfaOptions', {
|
||||
outputFormat: 'pdfa'
|
||||
outputFormat: 'pdfa',
|
||||
strict: false
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@ -35,6 +35,7 @@ export interface ConvertParameters extends BaseParameters {
|
||||
};
|
||||
pdfaOptions: {
|
||||
outputFormat: string;
|
||||
strict?: boolean;
|
||||
};
|
||||
pdfxOptions: {
|
||||
outputFormat: string;
|
||||
@ -93,7 +94,8 @@ export const defaultParameters: ConvertParameters = {
|
||||
includeAllRecipients: false,
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: 'pdfa-1',
|
||||
outputFormat: 'pdfa-2b',
|
||||
strict: false,
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx',
|
||||
|
||||
@ -105,7 +105,7 @@ describe('Convert Tool Integration Tests', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
// Setup default apiClient mock
|
||||
mockedApiClient.post = vi.fn();
|
||||
mockedApiClient.post = vi.fn() as any;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@ -150,7 +150,8 @@ describe('Convert Tool Integration Tests', () => {
|
||||
includeAllRecipients: false
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: ''
|
||||
outputFormat: '',
|
||||
strict: false
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx'
|
||||
@ -232,7 +233,8 @@ describe('Convert Tool Integration Tests', () => {
|
||||
includeAllRecipients: false
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: ''
|
||||
outputFormat: '',
|
||||
strict: false
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx'
|
||||
@ -292,7 +294,8 @@ describe('Convert Tool Integration Tests', () => {
|
||||
includeAllRecipients: false
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: ''
|
||||
outputFormat: '',
|
||||
strict: false
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx'
|
||||
@ -361,7 +364,8 @@ describe('Convert Tool Integration Tests', () => {
|
||||
includeAllRecipients: false
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: ''
|
||||
outputFormat: '',
|
||||
strict: false
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx'
|
||||
@ -434,7 +438,8 @@ describe('Convert Tool Integration Tests', () => {
|
||||
includeAllRecipients: false
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: ''
|
||||
outputFormat: '',
|
||||
strict: false
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx'
|
||||
@ -505,7 +510,8 @@ describe('Convert Tool Integration Tests', () => {
|
||||
includeAllRecipients: false
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: ''
|
||||
outputFormat: '',
|
||||
strict: false
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx'
|
||||
@ -572,7 +578,8 @@ describe('Convert Tool Integration Tests', () => {
|
||||
includeAllRecipients: false
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: ''
|
||||
outputFormat: '',
|
||||
strict: false
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx'
|
||||
@ -636,7 +643,8 @@ describe('Convert Tool Integration Tests', () => {
|
||||
includeAllRecipients: false
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: ''
|
||||
outputFormat: '',
|
||||
strict: false
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx'
|
||||
@ -702,7 +710,8 @@ describe('Convert Tool Integration Tests', () => {
|
||||
includeAllRecipients: false
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: ''
|
||||
outputFormat: '',
|
||||
strict: false
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx'
|
||||
@ -765,7 +774,8 @@ describe('Convert Tool Integration Tests', () => {
|
||||
includeAllRecipients: false
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: ''
|
||||
outputFormat: '',
|
||||
strict: false
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx'
|
||||
@ -834,7 +844,8 @@ describe('Convert Tool Integration Tests', () => {
|
||||
includeAllRecipients: false
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: ''
|
||||
outputFormat: '',
|
||||
strict: false
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx'
|
||||
@ -902,7 +913,8 @@ describe('Convert Tool Integration Tests', () => {
|
||||
includeAllRecipients: false
|
||||
},
|
||||
pdfaOptions: {
|
||||
outputFormat: ''
|
||||
outputFormat: '',
|
||||
strict: false
|
||||
},
|
||||
pdfxOptions: {
|
||||
outputFormat: 'pdfx'
|
||||
|
||||
@ -396,7 +396,8 @@ describe('Convert Tool - Smart Detection Integration Tests', () => {
|
||||
paramsResult.current.updateParameter('fromExtension', 'pdf');
|
||||
paramsResult.current.updateParameter('toExtension', 'pdfa');
|
||||
paramsResult.current.updateParameter('pdfaOptions', {
|
||||
outputFormat: 'pdfa'
|
||||
outputFormat: 'pdfa',
|
||||
strict: false
|
||||
});
|
||||
});
|
||||
|
||||
@ -409,6 +410,7 @@ describe('Convert Tool - Smart Detection Integration Tests', () => {
|
||||
|
||||
const formData = (mockedApiClient.post as Mock).mock.calls[0][1] as FormData;
|
||||
expect(formData.get('outputFormat')).toBe('pdfa');
|
||||
expect(formData.get('strict')).toBe('false');
|
||||
expect(mockedApiClient.post).toHaveBeenCalledWith('/api/v1/convert/pdf/pdfa', expect.any(FormData), {
|
||||
responseType: 'blob'
|
||||
});
|
||||
|
||||
Loading…
Reference in New Issue
Block a user