feat(verification): enhance PDF/A and compliance validation using veraPDF

- Expanded `PDFVerificationResult` model to include additional fields: validationProfile, validationProfileName, complianceSummary, and declaredPdfa
- Improved `VeraPDFService` to support PDF/A detection and validation by parsing XMP metadata
- Added warning classification logic for validation issues based on rules, messages, and clauses
- Refactored validation logic to handle multi-standard PDFs with precise error and warning tracking
- Enhanced debug logs for better traceability during PDF verification operations

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-11-14 20:59:09 +01:00
parent ff5029560e
commit 9a5a9963b7
2 changed files with 359 additions and 86 deletions

View File

@ -14,6 +14,10 @@ public class PDFVerificationResult {
private String standard; private String standard;
private String standardName; private String standardName;
private String validationProfile;
private String validationProfileName;
private String complianceSummary;
private boolean declaredPdfa;
private boolean compliant; private boolean compliant;
private int totalFailures; private int totalFailures;
private int totalWarnings; private int totalWarnings;

View File

@ -1,10 +1,21 @@
package stirling.software.SPDF.service; package stirling.software.SPDF.service;
import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Optional;
import java.util.Set;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.xml.DomXmpParser;
import org.apache.xmpbox.xml.XmpParsingException;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.verapdf.core.EncryptedPdfException; import org.verapdf.core.EncryptedPdfException;
import org.verapdf.core.ModelParsingException; import org.verapdf.core.ModelParsingException;
@ -28,6 +39,18 @@ import stirling.software.SPDF.model.api.security.PDFVerificationResult;
@Slf4j @Slf4j
public class VeraPDFService { public class VeraPDFService {
private static final Set<String> WARNING_RULES =
Set.of(
"6.1.13-1", // Recommended metadata properties
"6.7.3-1", // Optional XMP metadata
"6.2.2-2" // Non-critical font issues
);
private static final Set<String> CRITICAL_CLAUSE_PREFIXES = Set.of("6.1", "6.2", "6.3", "6.4");
private static final String NOT_PDFA_STANDARD_ID = "not-pdfa";
private static final String NOT_PDFA_STANDARD_NAME =
"Not PDF/A (no PDF/A identification metadata)";
@PostConstruct @PostConstruct
public void initialize() { public void initialize() {
try { try {
@ -38,138 +61,384 @@ public class VeraPDFService {
} }
} }
public PDFVerificationResult validatePDF(InputStream pdfStream, String standardString) public static PDFVerificationResult validatePDF(InputStream pdfStream, String standardString)
throws IOException, ValidationException, ModelParsingException, EncryptedPdfException { throws IOException, ValidationException, ModelParsingException, EncryptedPdfException {
PDFAFlavour flavour = PDFAFlavour.fromString(standardString); byte[] pdfBytes = pdfStream.readAllBytes();
PDFAFlavour validationFlavour = PDFAFlavour.fromString(standardString);
Optional<PDFAFlavour> declaredPdfaFlavour = extractDeclaredPdfaFlavour(pdfBytes);
try (PDFAParser parser = Foundries.defaultInstance().createParser(pdfStream, flavour)) { try (PDFAParser parser =
PDFAValidator validator = Foundries.defaultInstance().createValidator(flavour, false); Foundries.defaultInstance()
.createParser(new ByteArrayInputStream(pdfBytes), validationFlavour)) {
PDFAValidator validator =
Foundries.defaultInstance().createValidator(validationFlavour, false);
ValidationResult result = validator.validate(parser); ValidationResult result = validator.validate(parser);
return convertToVerificationResult(result); return convertToVerificationResult(
result, declaredPdfaFlavour.orElse(null), validationFlavour);
} }
} }
public List<PDFVerificationResult> validateAllDeclaredStandards(InputStream pdfStream) public static List<PDFVerificationResult> validateAllDeclaredStandards(InputStream pdfStream)
throws IOException, ValidationException, ModelParsingException, EncryptedPdfException { throws IOException, ValidationException, ModelParsingException, EncryptedPdfException {
byte[] pdfBytes = pdfStream.readAllBytes();
Optional<PDFAFlavour> declaredPdfaFlavour = extractDeclaredPdfaFlavour(pdfBytes);
List<PDFVerificationResult> results = new ArrayList<>(); List<PDFVerificationResult> results = new ArrayList<>();
try (PDFAParser parser = Foundries.defaultInstance().createParser(pdfStream)) { List<PDFAFlavour> detectedFlavours;
List<PDFAFlavour> detectedFlavours = parser.getFlavours(); try (PDFAParser detectionParser =
List<PDFAFlavour> flavoursToValidate = new ArrayList<>(); Foundries.defaultInstance().createParser(new ByteArrayInputStream(pdfBytes))) {
detectedFlavours = detectionParser.getFlavours();
}
// Filter for PDF/A, PDF/UA, and WTPDF standards List<PDFAFlavour> flavoursToValidate = new ArrayList<>();
for (PDFAFlavour flavour : detectedFlavours) {
if (PDFFlavours.isFlavourFamily(flavour, PDFAFlavour.SpecificationFamily.PDF_A) declaredPdfaFlavour.ifPresent(flavoursToValidate::add);
|| PDFFlavours.isFlavourFamily(
flavour, PDFAFlavour.SpecificationFamily.PDF_UA) for (PDFAFlavour flavour : detectedFlavours) {
|| PDFFlavours.isFlavourFamily( if (PDFFlavours.isFlavourFamily(flavour, PDFAFlavour.SpecificationFamily.PDF_A)) {
flavour, PDFAFlavour.SpecificationFamily.WTPDF)) { if (declaredPdfaFlavour.isPresent() && !declaredPdfaFlavour.get().equals(flavour)) {
flavoursToValidate.add(flavour); flavoursToValidate.add(flavour);
} else if (declaredPdfaFlavour.isEmpty()) {
log.debug(
"Ignoring detected PDF/A flavour {} because no PDF/A declaration exists in XMP",
flavour.getId());
} }
} else if (PDFFlavours.isFlavourFamily(flavour, PDFAFlavour.SpecificationFamily.PDF_UA)
|| PDFFlavours.isFlavourFamily(
flavour, PDFAFlavour.SpecificationFamily.WTPDF)) {
flavoursToValidate.add(flavour);
} }
}
if (flavoursToValidate.isEmpty()) { if (declaredPdfaFlavour.isEmpty()) {
log.info("No PDF/A, PDF/UA, or WTPDF standards declared in the document"); results.add(createNoPdfaDeclarationResult());
PDFVerificationResult noStandardResult = new PDFVerificationResult(); }
noStandardResult.setStandard("none");
noStandardResult.setStandardName("No standards declared");
noStandardResult.setCompliant(false);
noStandardResult.setTotalFailures(0);
noStandardResult.setTotalWarnings(0);
results.add(noStandardResult);
return results;
}
for (PDFAFlavour flavour : flavoursToValidate) { if (flavoursToValidate.isEmpty()) {
try { log.info("No verifiable PDF/A, PDF/UA, or WTPDF standards declared via XMP metadata");
PDFAValidator validator = return results;
Foundries.defaultInstance().createValidator(flavour, false); }
ValidationResult result = validator.validate(parser);
results.add(convertToVerificationResult(result)); for (PDFAFlavour flavour : flavoursToValidate) {
} catch (Exception e) { try (PDFAParser parser =
log.error("Error validating standard {}: {}", flavour.getId(), e.getMessage()); Foundries.defaultInstance()
PDFVerificationResult errorResult = new PDFVerificationResult(); .createParser(new ByteArrayInputStream(pdfBytes), flavour)) {
errorResult.setStandard(flavour.getId()); PDFAValidator validator =
errorResult.setStandardName(getStandardName(flavour)); Foundries.defaultInstance().createValidator(flavour, false);
errorResult.setCompliant(false); ValidationResult result = validator.validate(parser);
errorResult.setTotalFailures(1); PDFAFlavour declaredForResult =
errorResult.setTotalWarnings(0); PDFFlavours.isFlavourFamily(flavour, PDFAFlavour.SpecificationFamily.PDF_A)
PDFVerificationResult.ValidationIssue failure = ? declaredPdfaFlavour.orElse(null)
new PDFVerificationResult.ValidationIssue(); : flavour;
failure.setMessage("Validation error: " + e.getMessage()); results.add(convertToVerificationResult(result, declaredForResult, flavour));
errorResult.addFailure(failure); } catch (Exception e) {
results.add(errorResult); log.error("Error validating standard {}: {}", flavour.getId(), e.getMessage());
} results.add(
buildErrorResult(
declaredPdfaFlavour,
flavour,
"Validation error: " + e.getMessage()));
} }
} }
return results; return results;
} }
private PDFVerificationResult convertToVerificationResult(ValidationResult result) { private static PDFVerificationResult convertToVerificationResult(
ValidationResult result, PDFAFlavour declaredFlavour, PDFAFlavour validationFlavour) {
PDFVerificationResult verificationResult = new PDFVerificationResult(); PDFVerificationResult verificationResult = new PDFVerificationResult();
PDFAFlavour flavour = result.getPDFAFlavour(); PDFAFlavour validationProfile =
verificationResult.setStandard(flavour.getId()); validationFlavour != null ? validationFlavour : result.getPDFAFlavour();
verificationResult.setStandardName(getStandardName(flavour)); boolean validationIsPdfa = isPdfaFlavour(validationProfile);
verificationResult.setCompliant(result.isCompliant());
if (validationProfile != null) {
verificationResult.setValidationProfile(validationProfile.getId());
verificationResult.setValidationProfileName(getStandardName(validationProfile));
}
if (declaredFlavour != null) {
verificationResult.setStandard(declaredFlavour.getId());
verificationResult.setDeclaredPdfa(isPdfaFlavour(declaredFlavour));
} else if (validationIsPdfa) {
verificationResult.setStandard(NOT_PDFA_STANDARD_ID);
verificationResult.setDeclaredPdfa(false);
} else if (validationProfile != null) {
verificationResult.setStandard(validationProfile.getId());
verificationResult.setDeclaredPdfa(false);
} else {
verificationResult.setStandard(NOT_PDFA_STANDARD_ID);
verificationResult.setDeclaredPdfa(false);
}
// Process all assertions and separate errors from warnings
List<TestAssertion> assertions = result.getTestAssertions(); List<TestAssertion> assertions = result.getTestAssertions();
int errorCount = 0;
int warningCount = 0;
for (TestAssertion assertion : assertions) { for (TestAssertion assertion : assertions) {
TestAssertion.Status status = assertion.getStatus(); TestAssertion.Status status = assertion.getStatus();
// Only process FAILED assertions (PASSED assertions are successful checks)
if (status == TestAssertion.Status.FAILED) { if (status == TestAssertion.Status.FAILED) {
classifyAssertion(assertion, verificationResult);
PDFVerificationResult.ValidationIssue issue =
new PDFVerificationResult.ValidationIssue();
issue.setRuleId(assertion.getRuleId().toString());
issue.setMessage(assertion.getMessage());
issue.setLocation(
assertion.getLocation() != null
? assertion.getLocation().toString()
: "Unknown");
issue.setSpecification(
assertion.getRuleId().getSpecification() != null
? assertion.getRuleId().getSpecification().toString()
: "");
issue.setClause(assertion.getRuleId().getClause());
int testNumber = assertion.getRuleId().getTestNumber();
issue.setTestNumber(testNumber > 0 ? String.valueOf(testNumber) : "");
verificationResult.addFailure(issue);
errorCount++;
} }
} }
verificationResult.setTotalFailures(errorCount); verificationResult.setCompliant(verificationResult.getTotalFailures() == 0);
verificationResult.setTotalWarnings(warningCount);
String baseName;
if (declaredFlavour != null) {
baseName = getStandardName(declaredFlavour);
} else if (validationIsPdfa) {
baseName = NOT_PDFA_STANDARD_NAME;
} else if (validationProfile != null) {
baseName = getStandardName(validationProfile);
} else {
baseName = "Unknown standard";
}
String standardDisplay =
formatStandardDisplay(
baseName,
verificationResult.getTotalFailures(),
verificationResult.getTotalWarnings(),
isPdfaFlavour(declaredFlavour),
validationIsPdfa && declaredFlavour == null);
verificationResult.setStandardName(standardDisplay);
verificationResult.setComplianceSummary(standardDisplay);
log.debug( log.debug(
"Validation complete for {}: {} errors, {} warnings", "Validation complete for profile {} (declared: {}): {} errors, {} warnings",
flavour.getId(), validationProfile != null ? validationProfile.getId() : "unknown",
errorCount, declaredFlavour != null ? declaredFlavour.getId() : NOT_PDFA_STANDARD_ID,
warningCount); verificationResult.getTotalFailures(),
verificationResult.getTotalWarnings());
return verificationResult; return verificationResult;
} }
private String getStandardName(PDFAFlavour flavour) { private static void classifyAssertion(
TestAssertion assertion, PDFVerificationResult verificationResult) {
PDFVerificationResult.ValidationIssue issue = createValidationIssue(assertion);
String ruleId = assertion.getRuleId() != null ? assertion.getRuleId().toString() : "";
String message = assertion.getMessage() != null ? assertion.getMessage() : "";
String clause = assertion.getRuleId() != null ? assertion.getRuleId().getClause() : "";
if (isWarningRule(ruleId)) {
verificationResult.addWarning(issue);
return;
}
if (isWarningByMessage(message)) {
verificationResult.addWarning(issue);
return;
}
if (isWarningByClause(clause)) {
verificationResult.addWarning(issue);
return;
}
verificationResult.addFailure(issue);
}
private static PDFVerificationResult.ValidationIssue createValidationIssue(
TestAssertion assertion) {
PDFVerificationResult.ValidationIssue issue = new PDFVerificationResult.ValidationIssue();
issue.setRuleId(assertion.getRuleId() != null ? assertion.getRuleId().toString() : "");
issue.setMessage(assertion.getMessage());
issue.setLocation(
assertion.getLocation() != null ? assertion.getLocation().toString() : "Unknown");
issue.setSpecification(
assertion.getRuleId() != null && assertion.getRuleId().getSpecification() != null
? assertion.getRuleId().getSpecification().toString()
: "");
issue.setClause(assertion.getRuleId() != null ? assertion.getRuleId().getClause() : "");
int testNumber = assertion.getRuleId() != null ? assertion.getRuleId().getTestNumber() : 0;
issue.setTestNumber(testNumber > 0 ? String.valueOf(testNumber) : "");
return issue;
}
private static boolean isWarningRule(String ruleId) {
return ruleId != null && WARNING_RULES.contains(ruleId);
}
private static boolean isWarningByMessage(String message) {
// isBlank() already handles null and empty strings
if (message == null || message.isBlank()) {
return false;
}
String normalized = message.toLowerCase(Locale.ROOT);
return normalized.contains("recommended")
|| normalized.contains("should")
|| normalized.contains("optional")
|| normalized.contains("missing recommended");
}
private static boolean isWarningByClause(String clause) {
// isBlank() already handles null and empty strings
if (clause == null || clause.isBlank()) {
return false;
}
if (clause.startsWith("6.7")) {
return true;
}
for (String criticalPrefix : CRITICAL_CLAUSE_PREFIXES) {
if (clause.startsWith(criticalPrefix)) {
return false;
}
}
return true;
}
private static PDFVerificationResult createNoPdfaDeclarationResult() {
PDFVerificationResult result = new PDFVerificationResult();
result.setStandard(NOT_PDFA_STANDARD_ID);
result.setStandardName(NOT_PDFA_STANDARD_NAME);
result.setComplianceSummary(NOT_PDFA_STANDARD_NAME);
result.setCompliant(false);
result.setDeclaredPdfa(false);
PDFVerificationResult.ValidationIssue issue = new PDFVerificationResult.ValidationIssue();
issue.setMessage("Document does not declare PDF/A compliance in its XMP metadata.");
issue.setSpecification("XMP pdfaid");
result.addFailure(issue);
return result;
}
private static Optional<PDFAFlavour> extractDeclaredPdfaFlavour(byte[] pdfBytes) {
try (PDDocument document = Loader.loadPDF(pdfBytes)) {
PDMetadata metadata = document.getDocumentCatalog().getMetadata();
if (metadata == null) {
return Optional.empty();
}
try (InputStream xmpStream = metadata.createInputStream()) {
if (xmpStream == null) {
return Optional.empty();
}
DomXmpParser parser = new DomXmpParser();
XMPMetadata xmpMetadata = parser.parse(xmpStream);
PDFAIdentificationSchema pdfaid = xmpMetadata.getPDFAIdentificationSchema();
if (pdfaid == null) {
return Optional.empty();
}
Integer part = pdfaid.getPart();
String conformance = pdfaid.getConformance();
if (part == null || conformance == null || conformance.isBlank()) {
return Optional.empty();
}
String flavourId = part + conformance.trim().toLowerCase(Locale.ROOT);
return Optional.ofNullable(PDFAFlavour.fromString(flavourId));
}
} catch (XmpParsingException e) {
log.warn(
"Invalid XMP metadata encountered while checking PDF/A declaration: {}",
e.getMessage());
log.debug("XMP parsing error", e);
return Optional.empty();
} catch (Exception e) {
log.warn("Unable to extract PDF/A declaration from XMP: {}", e.getMessage());
log.debug("XMP extraction error", e);
return Optional.empty();
}
}
private static PDFVerificationResult buildErrorResult(
Optional<PDFAFlavour> declaredPdfaFlavour,
PDFAFlavour validationFlavour,
String errorMessage) {
PDFVerificationResult errorResult = new PDFVerificationResult();
PDFAFlavour declaredForResult =
validationFlavour != null && isPdfaFlavour(validationFlavour)
? declaredPdfaFlavour.orElse(null)
: validationFlavour;
if (declaredForResult != null) {
errorResult.setStandard(declaredForResult.getId());
errorResult.setStandardName(getStandardName(declaredForResult) + " with errors");
errorResult.setDeclaredPdfa(isPdfaFlavour(declaredForResult));
} else if (validationFlavour != null && isPdfaFlavour(validationFlavour)) {
errorResult.setStandard(NOT_PDFA_STANDARD_ID);
errorResult.setStandardName(NOT_PDFA_STANDARD_NAME);
errorResult.setDeclaredPdfa(false);
} else {
errorResult.setStandard(
validationFlavour != null ? validationFlavour.getId() : NOT_PDFA_STANDARD_ID);
errorResult.setStandardName(
(validationFlavour != null
? getStandardName(validationFlavour)
: "Unknown standard")
+ " with errors");
errorResult.setDeclaredPdfa(false);
}
errorResult.setValidationProfile(
validationFlavour != null ? validationFlavour.getId() : NOT_PDFA_STANDARD_ID);
errorResult.setValidationProfileName(
validationFlavour != null
? getStandardName(validationFlavour)
: "Unknown standard");
errorResult.setComplianceSummary(errorResult.getStandardName());
errorResult.setCompliant(false);
PDFVerificationResult.ValidationIssue failure = new PDFVerificationResult.ValidationIssue();
failure.setMessage(errorMessage);
errorResult.addFailure(failure);
return errorResult;
}
private static boolean isPdfaFlavour(PDFAFlavour flavour) {
return PDFFlavours.isFlavourFamily(flavour, PDFAFlavour.SpecificationFamily.PDF_A);
}
private static String formatStandardDisplay(
String baseName,
int errorCount,
int warningCount,
boolean declaredPdfa,
boolean inferredPdfaWithoutDeclaration) {
if (inferredPdfaWithoutDeclaration) {
return NOT_PDFA_STANDARD_NAME;
}
if (!declaredPdfa && NOT_PDFA_STANDARD_NAME.equals(baseName)) {
return NOT_PDFA_STANDARD_NAME;
}
if (errorCount > 0) {
return baseName + " with errors";
}
if (warningCount > 0) {
return baseName + " with warnings";
}
return baseName + " compliant";
}
private static String getStandardName(PDFAFlavour flavour) {
String id = flavour.getId(); String id = flavour.getId();
String part = flavour.getPart().toString(); String part = flavour.getPart().toString();
String level = flavour.getLevel().toString(); String level = flavour.getLevel().toString();
// PDF/A standards // PDF/A standards - Fixed: proper length check and parentheses
if (!id.isEmpty() && id.charAt(0) == '1' if (!id.isEmpty()
|| !id.isEmpty() && id.charAt(0) == '2' && (id.charAt(0) == '1'
|| !id.isEmpty() && id.charAt(0) == '3' || id.charAt(0) == '2'
|| !id.isEmpty() && id.charAt(0) == '4') { || id.charAt(0) == '3'
|| id.charAt(0) == '4')) {
return "PDF/A-" + part + (level.isEmpty() ? "" : level); return "PDF/A-" + part + (level.isEmpty() ? "" : level);
} }
// PDF/UA standards // PDF/UA standards