[V2] feat(security): add PDF standards verification feature using veraPDF

- Implemented `PDFVerificationRequest` and `PDFVerificationResult` models for validation requests and responses
- Developed `VeraPDFService` to validate PDFs against specific or auto-detected standards
- Added `VerifyPDFController` with an endpoint for PDF verification
- Integrated veraPDF dependencies into project build file
- Deprecated unused `/verify-pdf` form in `SecurityWebController`
- Updated `EndpointConfiguration` to include the new `verify-pdf` endpoint

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-11-11 14:46:32 +01:00
parent 4c0c9b28ef
commit 075de9487a
7 changed files with 378 additions and 1 deletions

View File

@ -66,6 +66,13 @@ dependencies {
implementation "org.apache.pdfbox:preflight:$pdfboxVersion"
implementation "org.apache.pdfbox:xmpbox:$pdfboxVersion"
implementation 'org.verapdf:validation-model:1.28.2'
// veraPDF still uses javax.xml.bind, not the new jakarta namespace
implementation 'javax.xml.bind:jaxb-api:2.3.1'
implementation 'com.sun.xml.bind:jaxb-impl:2.3.9'
implementation 'com.sun.xml.bind:jaxb-core:2.3.0.1'
// https://mvnrepository.com/artifact/technology.tabula/tabula
implementation ('technology.tabula:tabula:1.0.5') {
exclude group: 'org.slf4j', module: 'slf4j-simple'

View File

@ -273,6 +273,7 @@ public class EndpointConfiguration {
addEndpointToGroup("Security", "auto-redact");
addEndpointToGroup("Security", "redact");
addEndpointToGroup("Security", "validate-signature");
addEndpointToGroup("Security", "verify-pdf");
addEndpointToGroup("Security", "stamp");
addEndpointToGroup("Security", "sign");
@ -389,6 +390,8 @@ public class EndpointConfiguration {
addEndpointToGroup("Java", "add-attachments");
addEndpointToGroup("Java", "compress-pdf");
addEndpointToGroup("rar", "pdf-to-cbr");
addEndpointToGroup("Java", "pdf-to-video");
addEndpointToGroup("Java", "verify-pdf");
// Javascript
addEndpointToGroup("Javascript", "pdf-organizer");
@ -440,6 +443,9 @@ public class EndpointConfiguration {
addEndpointToGroup("Weasyprint", "markdown-to-pdf");
addEndpointToGroup("Weasyprint", "eml-to-pdf");
// veraPDF dependent endpoints
addEndpointToGroup("veraPDF", "verify-pdf");
// Pdftohtml dependent endpoints
addEndpointToGroup("Pdftohtml", "pdf-to-html");
addEndpointToGroup("Pdftohtml", "pdf-to-markdown");
@ -489,7 +495,9 @@ public class EndpointConfiguration {
|| "Javascript".equals(group)
|| "Weasyprint".equals(group)
|| "Pdftohtml".equals(group)
|| "rar".equals(group);
|| "rar".equals(group)
|| "FFmpeg".equals(group)
|| "veraPDF".equals(group);
}
private boolean isEndpointEnabledDirectly(String endpoint) {

View File

@ -0,0 +1,102 @@
package stirling.software.SPDF.controller.api.security;
import java.io.IOException;
import java.util.List;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import org.verapdf.core.EncryptedPdfException;
import org.verapdf.core.ModelParsingException;
import org.verapdf.core.ValidationException;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.security.PDFVerificationRequest;
import stirling.software.SPDF.model.api.security.PDFVerificationResult;
import stirling.software.SPDF.service.VeraPDFService;
import stirling.software.common.util.ExceptionUtils;
@RestController
@RequestMapping("/api/v1/security")
@Tag(name = "Security", description = "Security APIs")
@RequiredArgsConstructor
@Slf4j
public class VerifyPDFController {
private final VeraPDFService veraPDFService;
@Operation(
summary = "Verify PDF Standards Compliance",
description =
"Validates PDF files against PDF/A, PDF/UA-1, PDF/UA-2, and WTPDF standards"
+ " using veraPDF. Can auto-detect declared standards or verify against"
+ " a specific standard. Input:PDF Output:JSON Type:SISO")
@PostMapping(value = "/verify-pdf", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<List<PDFVerificationResult>> verifyPDF(
@ModelAttribute PDFVerificationRequest request) {
MultipartFile file = request.getFileInput();
String standard = request.getStandard();
if (file == null || file.isEmpty()) {
throw ExceptionUtils.createRuntimeException(
"error.pdfRequired", "PDF file is required", null);
}
try {
List<PDFVerificationResult> results;
if (standard != null && !standard.trim().isEmpty()) {
log.info(
"Verifying PDF '{}' against standard: {}",
file.getOriginalFilename(),
standard);
PDFVerificationResult result =
veraPDFService.validatePDF(file.getInputStream(), standard.trim());
results = List.of(result);
} else {
log.info("Auto-detecting standards in PDF '{}'", file.getOriginalFilename());
results = veraPDFService.validateAllDeclaredStandards(file.getInputStream());
}
log.info(
"Verification complete for '{}': {} standard(s) checked",
file.getOriginalFilename(),
results.size());
return ResponseEntity.ok(results);
} catch (ValidationException e) {
log.error("Validation exception for file: {}", file.getOriginalFilename(), e);
throw ExceptionUtils.createRuntimeException(
"error.validationFailed", "PDF validation failed: {0}", e, e.getMessage());
} catch (ModelParsingException e) {
log.error("Model parsing exception for file: {}", file.getOriginalFilename(), e);
throw ExceptionUtils.createRuntimeException(
"error.modelParsingFailed", "PDF model parsing failed: {0}", e, e.getMessage());
} catch (EncryptedPdfException e) {
log.error("Encrypted PDF exception for file: {}", file.getOriginalFilename(), e);
throw ExceptionUtils.createRuntimeException(
"error.encryptedPdf",
"Cannot verify encrypted PDF. Please remove password first: {0}",
e,
e.getMessage());
} catch (IOException e) {
log.error("IO exception for file: {}", file.getOriginalFilename(), e);
throw ExceptionUtils.createRuntimeException(
"error.ioException",
"IO error during PDF verification: {0}",
e,
e.getMessage());
}
}
}

View File

@ -72,6 +72,14 @@ public class SecurityWebController {
return "security/validate-signature";
}
@Deprecated
@Hidden
// @GetMapping("/verify-pdf")
public String verifyPdfForm(Model model) {
model.addAttribute("currentPage", "verify-pdf");
return "security/verify-pdf";
}
@Deprecated
// @GetMapping("/remove-cert-sign")
@Hidden

View File

@ -0,0 +1,22 @@
package stirling.software.SPDF.model.api.security;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import lombok.EqualsAndHashCode;
import stirling.software.common.model.api.PDFFile;
@Data
@EqualsAndHashCode(callSuper = true)
public class PDFVerificationRequest extends PDFFile {
@Schema(
description =
"Specific PDF standard to verify against (e.g., '1b', '2a', '3u', 'ua1', 'ua2',"
+ " 'wtpdf-1.0'). Leave empty to auto-detect and verify all declared"
+ " standards. The response will include both errors (compliance failures)"
+ " and warnings (non-critical issues) separately.",
requiredMode = Schema.RequiredMode.NOT_REQUIRED)
private String standard;
}

View File

@ -0,0 +1,44 @@
package stirling.software.SPDF.model.api.security;
import java.util.ArrayList;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class PDFVerificationResult {
private String standard;
private String standardName;
private boolean compliant;
private int totalFailures;
private int totalWarnings;
private List<ValidationIssue> failures = new ArrayList<>();
private List<ValidationIssue> warnings = new ArrayList<>();
public void addFailure(ValidationIssue failure) {
this.failures.add(failure);
this.totalFailures = this.failures.size();
}
public void addWarning(ValidationIssue warning) {
this.warnings.add(warning);
this.totalWarnings = this.warnings.size();
}
@Data
@NoArgsConstructor
@AllArgsConstructor
public static class ValidationIssue {
private String ruleId;
private String message;
private String location;
private String specification;
private String clause;
private String testNumber;
}
}

View File

@ -0,0 +1,186 @@
package stirling.software.SPDF.service;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.springframework.stereotype.Service;
import org.verapdf.core.EncryptedPdfException;
import org.verapdf.core.ModelParsingException;
import org.verapdf.core.ValidationException;
import org.verapdf.gf.foundry.VeraGreenfieldFoundryProvider;
import org.verapdf.pdfa.Foundries;
import org.verapdf.pdfa.PDFAParser;
import org.verapdf.pdfa.PDFAValidator;
import org.verapdf.pdfa.flavours.PDFAFlavour;
import org.verapdf.pdfa.flavours.PDFFlavours;
import org.verapdf.pdfa.results.TestAssertion;
import org.verapdf.pdfa.results.ValidationResult;
import jakarta.annotation.PostConstruct;
import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.security.PDFVerificationResult;
@Service
@Slf4j
public class VeraPDFService {
@PostConstruct
public void initialize() {
try {
VeraGreenfieldFoundryProvider.initialise();
log.info("veraPDF Greenfield initialized successfully");
} catch (Exception e) {
log.error("Failed to initialize veraPDF", e);
}
}
public PDFVerificationResult validatePDF(InputStream pdfStream, String standardString)
throws IOException, ValidationException, ModelParsingException, EncryptedPdfException {
PDFAFlavour flavour = PDFAFlavour.fromString(standardString);
try (PDFAParser parser = Foundries.defaultInstance().createParser(pdfStream, flavour)) {
PDFAValidator validator = Foundries.defaultInstance().createValidator(flavour, false);
ValidationResult result = validator.validate(parser);
return convertToVerificationResult(result);
}
}
public List<PDFVerificationResult> validateAllDeclaredStandards(InputStream pdfStream)
throws IOException, ValidationException, ModelParsingException, EncryptedPdfException {
List<PDFVerificationResult> results = new ArrayList<>();
try (PDFAParser parser = Foundries.defaultInstance().createParser(pdfStream)) {
List<PDFAFlavour> detectedFlavours = parser.getFlavours();
List<PDFAFlavour> flavoursToValidate = new ArrayList<>();
// Filter for PDF/A, PDF/UA, and WTPDF standards
for (PDFAFlavour flavour : detectedFlavours) {
if (PDFFlavours.isFlavourFamily(flavour, PDFAFlavour.SpecificationFamily.PDF_A)
|| PDFFlavours.isFlavourFamily(
flavour, PDFAFlavour.SpecificationFamily.PDF_UA)
|| PDFFlavours.isFlavourFamily(
flavour, PDFAFlavour.SpecificationFamily.WTPDF)) {
flavoursToValidate.add(flavour);
}
}
if (flavoursToValidate.isEmpty()) {
log.info("No PDF/A, PDF/UA, or WTPDF standards declared in the document");
PDFVerificationResult noStandardResult = new PDFVerificationResult();
noStandardResult.setStandard("none");
noStandardResult.setStandardName("No standards declared");
noStandardResult.setCompliant(false);
noStandardResult.setTotalFailures(0);
noStandardResult.setTotalWarnings(0);
results.add(noStandardResult);
return results;
}
for (PDFAFlavour flavour : flavoursToValidate) {
try {
PDFAValidator validator =
Foundries.defaultInstance().createValidator(flavour, false);
ValidationResult result = validator.validate(parser);
results.add(convertToVerificationResult(result));
} catch (Exception e) {
log.error("Error validating standard {}: {}", flavour.getId(), e.getMessage());
PDFVerificationResult errorResult = new PDFVerificationResult();
errorResult.setStandard(flavour.getId());
errorResult.setStandardName(getStandardName(flavour));
errorResult.setCompliant(false);
errorResult.setTotalFailures(1);
errorResult.setTotalWarnings(0);
PDFVerificationResult.ValidationIssue failure =
new PDFVerificationResult.ValidationIssue();
failure.setMessage("Validation error: " + e.getMessage());
errorResult.addFailure(failure);
results.add(errorResult);
}
}
}
return results;
}
private PDFVerificationResult convertToVerificationResult(ValidationResult result) {
PDFVerificationResult verificationResult = new PDFVerificationResult();
PDFAFlavour flavour = result.getPDFAFlavour();
verificationResult.setStandard(flavour.getId());
verificationResult.setStandardName(getStandardName(flavour));
verificationResult.setCompliant(result.isCompliant());
// Process all assertions and separate errors from warnings
List<TestAssertion> assertions = result.getTestAssertions();
int errorCount = 0;
int warningCount = 0;
for (TestAssertion assertion : assertions) {
TestAssertion.Status status = assertion.getStatus();
// Only process FAILED assertions (PASSED assertions are successful checks)
if (status == TestAssertion.Status.FAILED) {
PDFVerificationResult.ValidationIssue issue =
new PDFVerificationResult.ValidationIssue();
issue.setRuleId(assertion.getRuleId().toString());
issue.setMessage(assertion.getMessage());
issue.setLocation(
assertion.getLocation() != null
? assertion.getLocation().toString()
: "Unknown");
issue.setSpecification(
assertion.getRuleId().getSpecification() != null
? assertion.getRuleId().getSpecification().toString()
: "");
issue.setClause(assertion.getRuleId().getClause());
int testNumber = assertion.getRuleId().getTestNumber();
issue.setTestNumber(testNumber > 0 ? String.valueOf(testNumber) : "");
verificationResult.addFailure(issue);
errorCount++;
}
}
verificationResult.setTotalFailures(errorCount);
verificationResult.setTotalWarnings(warningCount);
log.debug(
"Validation complete for {}: {} errors, {} warnings",
flavour.getId(),
errorCount,
warningCount);
return verificationResult;
}
private String getStandardName(PDFAFlavour flavour) {
String id = flavour.getId();
String part = flavour.getPart().toString();
String level = flavour.getLevel().toString();
// PDF/A standards
if (!id.isEmpty() && id.charAt(0) == '1'
|| !id.isEmpty() && id.charAt(0) == '2'
|| !id.isEmpty() && id.charAt(0) == '3'
|| !id.isEmpty() && id.charAt(0) == '4') {
return "PDF/A-" + part + (level.isEmpty() ? "" : level);
}
// PDF/UA standards
else if (id.contains("ua")) {
return "PDF/UA-" + part;
}
// WTPDF standards
else if (id.contains("wtpdf")) {
return "WTPDF " + part;
}
return flavour.toString();
}
}