From c7b713ac804a6503e3db269b06db48861d518761 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Sz=C3=BCcs?= <127139797+balazs-szucs@users.noreply.github.com> Date: Mon, 12 Jan 2026 20:39:54 +0100 Subject: [PATCH] fix(verify-pdf): verification to properly detect non-PDF/A documents with XMP metadata (#5397) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description of Changes Fixed an issue where PDFs containing XMP metadata but lacking PDF/A identification schema were incorrectly being validated as PDF/A documents and reporting "PDF/A-1b with errors" instead of "NOT PDF/A". ### Changes Made - Improved the PDF/A detection logic in `VeraPDFService.java` to check for both missing XMP metadata and missing PDF/A identification schema - Added validation for clause 6.7.11 (PDF/A Identification extension schema requirement) in addition to clause 6.7.2 (XMP metadata presence) - Documents with XMP metadata but without proper PDF/A identification now correctly return "NOT PDF/A" ### Root Cause The previous implementation only checked for missing XMP metadata (clause 6.7.2) but didn't verify that the XMP contained the required PDF/A identification schema (clause 6.7.11). This caused documents with generic XMP metadata to be incorrectly treated as declared PDF/A files. Fixes issue where non-PDF/A documents with XMP metadata were incorrectly showing PDF/A validation errors. --- ## Checklist ### General - [X] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [X] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [X] I have performed a self-review of my own code - [X] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Translations (if applicable) - [ ] I ran [`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md) ### UI Changes (if applicable) - [X] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [X] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. Signed-off-by: Balázs Szücs --- .../software/SPDF/service/VeraPDFService.java | 58 ++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/app/core/src/main/java/stirling/software/SPDF/service/VeraPDFService.java b/app/core/src/main/java/stirling/software/SPDF/service/VeraPDFService.java index f590e4373..9e082ba82 100644 --- a/app/core/src/main/java/stirling/software/SPDF/service/VeraPDFService.java +++ b/app/core/src/main/java/stirling/software/SPDF/service/VeraPDFService.java @@ -204,11 +204,65 @@ public class VeraPDFService { detectedFlavours = detectionParser.getFlavours(); } + // For PDF/A flavours, we need to validate first to check if PDF/A identification exists in + // XMP + // If declaredFlavour is PDF/A, do a quick validation to check for PDF/A identification + // schema + boolean hasValidPdfaMetadata = false; + if (isPdfaFlavour(declaredFlavour)) { + try (PDFAParser quickParser = + Foundries.defaultInstance() + .createParser(new ByteArrayInputStream(pdfBytes), declaredFlavour)) { + PDFAValidator quickValidator = + Foundries.defaultInstance().createValidator(declaredFlavour, false); + ValidationResult quickResult = quickValidator.validate(quickParser); + + // Check if the document has the PDF/A Identification extension schema (clause + // 6.7.11, test 1) + // OR if it lacks XMP metadata entirely (clause 6.7.2, test 1) + // If either of these errors is present, the document is NOT a declared PDF/A + hasValidPdfaMetadata = true; + for (TestAssertion assertion : quickResult.getTestAssertions()) { + if (assertion.getStatus() == TestAssertion.Status.FAILED + && assertion.getRuleId() != null) { + String clause = assertion.getRuleId().getClause(); + int testNumber = assertion.getRuleId().getTestNumber(); + + // Missing XMP metadata entirely (clause 6.7.2, test 1) + if ("6.7.2".equals(clause) && testNumber == 1) { + hasValidPdfaMetadata = false; + log.debug( + "Document lacks XMP metadata (6.7.2): {}", + assertion.getMessage()); + break; + } + + // Missing PDF/A identification schema in XMP (clause 6.7.11, test 1) + if ("6.7.11".equals(clause) && testNumber == 1) { + hasValidPdfaMetadata = false; + log.debug( + "Document lacks PDF/A identification in XMP (6.7.11): {}", + assertion.getMessage()); + break; + } + } + } + } catch (Exception e) { + log.debug("Error checking for PDF/A identification: {}", e.getMessage()); + hasValidPdfaMetadata = false; + } + } + List flavoursToValidate = new ArrayList<>(); - boolean hasPdfaDeclaration = isPdfaFlavour(declaredFlavour); + boolean hasPdfaDeclaration = isPdfaFlavour(declaredFlavour) && hasValidPdfaMetadata; if (declaredFlavour != null) { - flavoursToValidate.add(declaredFlavour); + boolean isDeclaredPdfa = isPdfaFlavour(declaredFlavour); + if (isDeclaredPdfa && hasPdfaDeclaration) { + flavoursToValidate.add(declaredFlavour); + } else if (!isDeclaredPdfa) { + flavoursToValidate.add(declaredFlavour); + } } for (PDFAFlavour flavour : detectedFlavours) {