mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-03-13 02:18:16 +01:00
feat:(pdfa-conversion) Implement Strict PDF/A Mode with Verification (#5663)
# Description of Changes This PR introduces a new "Strict Mode" for the PDF to PDF/A conversion tool. When enabled, the application will use VeraPDF to verify that the resulting file is perfectly compliant with the selected PDF/A standard. If validation fails, the system will return a descriptive error instead of a non-compliant file. <!-- Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --> <img width="371" height="993" alt="image" src="https://github.com/user-attachments/assets/a22d50b0-ad7c-46b0-be79-b79c2bc80d92" /> --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Translations (if applicable) - [ ] I ran [`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
@@ -71,10 +71,12 @@ import org.apache.xmpbox.schema.PDFAIdentificationSchema;
|
||||
import org.apache.xmpbox.schema.XMPBasicSchema;
|
||||
import org.apache.xmpbox.xml.DomXmpParser;
|
||||
import org.apache.xmpbox.xml.XmpSerializer;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.server.ResponseStatusException;
|
||||
|
||||
import io.github.pixee.security.Filenames;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
@@ -99,6 +101,7 @@ public class ConvertPDFToPDFA {
|
||||
|
||||
private static final Pattern NON_PRINTABLE_ASCII = Pattern.compile("[^\\x20-\\x7E]");
|
||||
private final RuntimePathConfig runtimePathConfig;
|
||||
private final stirling.software.SPDF.service.VeraPDFService veraPDFService;
|
||||
|
||||
private static final String ICC_RESOURCE_PATH = "/icc/sRGB2014.icc";
|
||||
private static final int PDFA_COMPATIBILITY_POLICY = 1;
|
||||
@@ -587,7 +590,8 @@ public class ConvertPDFToPDFA {
|
||||
if (isPdfX) {
|
||||
return handlePdfXConversion(inputFile, outputFormat);
|
||||
} else {
|
||||
return handlePdfAConversion(inputFile, outputFormat);
|
||||
return handlePdfAConversion(
|
||||
inputFile, outputFormat, request.getStrict() != null && request.getStrict());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1793,7 +1797,7 @@ public class ConvertPDFToPDFA {
|
||||
}
|
||||
|
||||
private ResponseEntity<byte[]> handlePdfAConversion(
|
||||
MultipartFile inputFile, String outputFormat) throws Exception {
|
||||
MultipartFile inputFile, String outputFormat, boolean strict) throws Exception {
|
||||
PdfaProfile profile = PdfaProfile.fromRequest(outputFormat);
|
||||
|
||||
// Get the original filename without extension
|
||||
@@ -1822,6 +1826,10 @@ public class ConvertPDFToPDFA {
|
||||
|
||||
validateAndWarnPdfA(converted, profile, "Ghostscript");
|
||||
|
||||
if (strict) {
|
||||
verifyStrictCompliance(converted);
|
||||
}
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
converted, outputFilename, MediaType.APPLICATION_PDF);
|
||||
} catch (IOException | InterruptedException e) {
|
||||
@@ -1839,14 +1847,42 @@ public class ConvertPDFToPDFA {
|
||||
// Validate with PDFBox preflight and warn if issues found
|
||||
validateAndWarnPdfA(converted, profile, "PDFBox/LibreOffice");
|
||||
|
||||
if (strict) {
|
||||
verifyStrictCompliance(converted);
|
||||
}
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
converted, outputFilename, MediaType.APPLICATION_PDF);
|
||||
|
||||
} finally {
|
||||
deleteQuietly(workingDir);
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyStrictCompliance(byte[] pdfBytes) throws IOException {
|
||||
try (InputStream is = new ByteArrayInputStream(pdfBytes)) {
|
||||
List<stirling.software.SPDF.model.api.security.PDFVerificationResult> results =
|
||||
veraPDFService.validatePDF(is);
|
||||
boolean isCompliant = results.stream().anyMatch(result -> result.isCompliant());
|
||||
if (!isCompliant) {
|
||||
String details =
|
||||
results.stream()
|
||||
.map(r -> r.getStandard() + ": " + r.getComplianceSummary())
|
||||
.collect(Collectors.joining("; "));
|
||||
throw new ResponseStatusException(
|
||||
HttpStatus.BAD_REQUEST,
|
||||
"Strict PDF/A mode enabled: Conversion is not perfectly compliant. Details: "
|
||||
+ details);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
if (e instanceof ResponseStatusException) {
|
||||
throw (ResponseStatusException) e;
|
||||
}
|
||||
log.error("Error during strict PDF/A verification", e);
|
||||
throw new ResponseStatusException(
|
||||
HttpStatus.INTERNAL_SERVER_ERROR, "Error during strict PDF/A verification");
|
||||
}
|
||||
}
|
||||
|
||||
private Path sanitizePdfWithPdfBox(Path inputPdf, boolean addWhiteBackground) {
|
||||
try {
|
||||
Path sanitizedPath =
|
||||
|
||||
@@ -16,4 +16,9 @@ public class PdfToPdfARequest extends PDFFile {
|
||||
requiredMode = Schema.RequiredMode.REQUIRED,
|
||||
allowableValues = {"pdfa", "pdfa-1", "pdfa-2", "pdfa-2b", "pdfa-3", "pdfa-3b", "pdfx"})
|
||||
private String outputFormat;
|
||||
|
||||
@Schema(
|
||||
description =
|
||||
"If true, the conversion will fail if the output is not perfectly compliant")
|
||||
private Boolean strict = false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user