feat(compliance): implement compliance verfication for get info on PDF (#5435)

# Description of Changes


This PR replaces the legacy, fragile PDF/A validation logic (which
relied on manual regex parsing and PDFBox Preflight) with the robust
**VeraPDF** integration. Additionally, it introduces a new check for
**SEC/EDGAR compliance** and refactors the frontend report view to
display detailed verification results.

### Key Changes

**Backend (`GetInfoOnPDF.java`):**

* **VeraPDF Integration:** Replaced the custom `checkForStandard` and
`validatePdfAWithPreflight` methods with `VeraPDFService.validatePDF()`.
This aligns validation with industry standards.
* **Code Cleanup:** Removed approximately ~200 lines of technical debt,
including raw XML regex parsing and file-locking prone `PreflightParser`
logic.
* **SEC Compliance:** Added `isSECCompliant()` logic to validate
documents against typical EDGAR requirements:
* No Encryption.
* No Active Content (JavaScript).
* No External Links.
* No Embedded Files.
* No AcroForms.



**Frontend (`GetPdfInfo`):**

* **New Component:** Created `ComplianceSection.tsx` to handle the
complexity of compliance reporting.
* **Enhanced UI:**
* Added visual badges (Passed/Failed/Not Detected) using
`CheckIcon`/`CloseIcon`.
* Added support for displaying the detailed VeraPDF summary alongside
legacy boolean flags.
* Improved handling of "Not PDF/A" states.


### Motivation

The previous validation logic was prone to false positives/negatives and
memory issues with large files (due to Preflight). Moving to VeraPDF
provides accurate, profile-based validation (e.g., PDF/A-1b, 2b, etc.).
The SEC check satisfies a growing need for users validating documents
for financial filings.



<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [X] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [X] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [X] I have performed a self-review of my own code
- [X] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Translations (if applicable)

- [ ] I ran
[`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md)

### UI Changes (if applicable)

- [X] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [X] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
Signed-off-by: brios <brios@heim-041-30.jkh.uni-linz.ac.at>
This commit is contained in:
Balázs Szücs
2026-01-23 22:36:35 +01:00
committed by GitHub
parent 0b86dd79d3
commit 188408fc1e
7 changed files with 637 additions and 329 deletions

View File

@@ -3,21 +3,15 @@ package stirling.software.SPDF.controller.api.security;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pdfbox.cos.COSInputStream;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
@@ -35,8 +29,7 @@ import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentGroup;
import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentProperties;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionURI;
import org.apache.pdfbox.pdmodel.interactive.action.*;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
@@ -44,14 +37,8 @@ import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlin
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineNode;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.preflight.PreflightDocument;
import org.apache.pdfbox.preflight.ValidationResult;
import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
import org.apache.pdfbox.preflight.exception.ValidationException;
import org.apache.pdfbox.preflight.parser.PreflightParser;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.xml.DomXmpParser;
import org.apache.xmpbox.xml.XmpParsingException;
import org.apache.xmpbox.xml.XmpSerializer;
@@ -69,6 +56,8 @@ import io.swagger.v3.oas.annotations.Operation;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.security.PDFVerificationResult;
import stirling.software.SPDF.service.VeraPDFService;
import stirling.software.common.annotations.AutoJobPostMapping;
import stirling.software.common.annotations.api.SecurityApi;
import stirling.software.common.model.api.PDFFile;
@@ -91,6 +80,7 @@ public class GetInfoOnPDF {
private static final ObjectMapper objectMapper = new ObjectMapper();
private final CustomPDFDocumentFactory pdfDocumentFactory;
private final VeraPDFService veraPDFService;
private static void addOutlinesToArray(PDOutlineItem outline, ArrayNode arrayNode) {
if (outline == null) return;
@@ -107,214 +97,8 @@ public class GetInfoOnPDF {
}
}
public static boolean checkForStandard(PDDocument document, String standardKeyword) {
if ("PDF/A".equalsIgnoreCase(standardKeyword)) {
return getPdfAConformanceLevel(document) != null;
}
return checkStandardInMetadata(document, standardKeyword);
}
public static String getPdfAConformanceLevel(PDDocument document) {
if (document == null || document.isEncrypted()) {
return null;
}
return getPdfAVersionFromMetadata(document);
}
private static String getPdfAVersionFromMetadata(PDDocument document) {
try {
PDMetadata pdMetadata = document.getDocumentCatalog().getMetadata();
if (pdMetadata != null) {
try (COSInputStream metaStream = pdMetadata.createInputStream()) {
DomXmpParser domXmpParser = new DomXmpParser();
XMPMetadata xmpMeta = domXmpParser.parse(metaStream);
PDFAIdentificationSchema pdfId = xmpMeta.getPDFAIdentificationSchema();
if (pdfId != null) {
Integer part = pdfId.getPart();
String conformance = pdfId.getConformance();
if (part != null && conformance != null) {
return part + conformance.toUpperCase(Locale.ROOT);
}
} else {
try (COSInputStream rawStream = pdMetadata.createInputStream()) {
byte[] metadataBytes = rawStream.readAllBytes();
String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8);
String extracted = extractPdfAVersionFromRawXml(rawMetadata);
if (extracted != null) {
return extracted;
}
}
}
} catch (XmpParsingException e) {
log.debug("XMP parsing failed, trying raw metadata search: {}", e.getMessage());
try (COSInputStream metaStream = pdMetadata.createInputStream()) {
byte[] metadataBytes = metaStream.readAllBytes();
String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8);
String extracted = extractPdfAVersionFromRawXml(rawMetadata);
if (extracted != null) {
return extracted;
}
}
}
}
} catch (Exception e) {
log.debug("Error reading PDF/A metadata: {}", e.getMessage());
}
return null;
}
private static String extractPdfAVersionFromRawXml(String rawXml) {
if (rawXml == null || rawXml.isEmpty()) {
return null;
}
try {
Pattern partPattern = RegexPatternUtils.getInstance().getPdfAidPartPattern();
Pattern confPattern = RegexPatternUtils.getInstance().getPdfAidConformancePattern();
Matcher partMatcher = partPattern.matcher(rawXml);
Matcher confMatcher = confPattern.matcher(rawXml);
if (partMatcher.find() && confMatcher.find()) {
String part = partMatcher.group(1);
String conformance = confMatcher.group(1).toUpperCase(Locale.ROOT);
return part + conformance;
}
} catch (Exception e) {
log.debug("Error parsing raw XMP for PDF/A version: {}", e.getMessage());
}
return null;
}
private static boolean validatePdfAWithPreflight(PDDocument document, String version) {
if (document == null || document.isEncrypted()) {
return false;
}
// Use Stream-to-File pattern: save to temp file instead of loading into memory
// This prevents OutOfMemoryError on large PDFs
Path tempFile = null;
try {
tempFile = Files.createTempFile("preflight-", ".pdf");
// Save document to temp file (avoids loading entire document into memory)
try (var outputStream = Files.newOutputStream(tempFile)) {
document.save(outputStream);
}
// Use RandomAccessReadBufferedFile for efficient file-based reading
// This avoids Windows file locking issues that occur with memory-mapped files
try (RandomAccessRead source = new RandomAccessReadBufferedFile(tempFile.toFile())) {
PreflightParser parser = new PreflightParser(source);
try (PDDocument parsedDocument = parser.parse()) {
if (!(parsedDocument instanceof PreflightDocument preflightDocument)) {
log.debug(
"Parsed document is not a PreflightDocument; unable to validate claimed PDF/A {}",
version);
return false;
}
try {
ValidationResult result = preflightDocument.validate();
if (!result.isValid() && log.isDebugEnabled()) {
log.debug(
"PDF/A validation found {} errors for claimed version {}",
result.getErrorsList().size(),
version);
int logged = 0;
for (ValidationResult.ValidationError error : result.getErrorsList()) {
log.debug(
" Error {}: {}", error.getErrorCode(), error.getDetails());
if (++logged >= MAX_LOGGED_ERRORS) {
break;
}
}
}
return result.isValid();
} catch (ValidationException e) {
log.debug(
"Validation exception during PDF/A validation: {}", e.getMessage());
}
} catch (SyntaxValidationException e) {
log.debug(
"Syntax validation failed during PDF/A validation: {}", e.getMessage());
return false;
}
}
} catch (IOException e) {
log.debug("IOException during PDF/A validation: {}", e.getMessage());
} catch (Exception e) {
log.debug("Unexpected error during PDF/A validation: {}", e.getMessage());
} finally {
// Explicitly clean up temp file to prevent disk exhaustion
// This must be in finally block to ensure cleanup even on exceptions
if (tempFile != null) {
try {
Files.deleteIfExists(tempFile);
} catch (IOException e) {
log.warn(
"Failed to delete temp file during PDF/A validation cleanup: {}",
tempFile,
e);
}
}
}
return false;
}
private static boolean checkStandardInMetadata(PDDocument document, String standardKeyword) {
// Check XMP Metadata
try {
PDMetadata pdMetadata = document.getDocumentCatalog().getMetadata();
if (pdMetadata != null) {
try (COSInputStream metaStream = pdMetadata.createInputStream()) {
// First try to read raw metadata as string to check for standard keywords
byte[] metadataBytes = metaStream.readAllBytes();
String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8);
if (rawMetadata.contains(standardKeyword)) {
return true;
}
}
// If raw check doesn't find it, try parsing with XMP parser
try (COSInputStream metaStream = pdMetadata.createInputStream()) {
try {
DomXmpParser domXmpParser = new DomXmpParser();
XMPMetadata xmpMeta = domXmpParser.parse(metaStream);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
new XmpSerializer().serialize(xmpMeta, baos, true);
String xmpString = baos.toString(StandardCharsets.UTF_8);
if (xmpString.contains(standardKeyword)) {
return true;
}
} catch (XmpParsingException e) {
// XMP parsing failed, but we already checked raw metadata above
log.debug(
"XMP parsing failed for standard check, but raw metadata was already checked: {}",
e.getMessage());
}
}
}
} catch (Exception e) {
ExceptionUtils.logException("PDF standard checking", e);
}
return false;
}
private static ObjectNode generatePDFSummaryData(
PDDocument document, String pdfaConformanceLevel, Boolean pdfaValidationPassed) {
PDDocument document, List<PDFVerificationResult> verificationResults) {
ObjectNode summaryData = objectMapper.createObjectNode();
// Check if encrypted
@@ -342,24 +126,16 @@ public class GetInfoOnPDF {
}
// Check standard compliance
if (pdfaConformanceLevel != null) {
summaryData.put("standardCompliance", "PDF/A-" + pdfaConformanceLevel);
summaryData.put("standardPurpose", "long-term archiving");
if (pdfaValidationPassed != null) {
summaryData.put("standardValidationPassed", pdfaValidationPassed);
if (verificationResults != null && !verificationResults.isEmpty()) {
ArrayNode complianceArray = objectMapper.createArrayNode();
for (PDFVerificationResult result : verificationResults) {
ObjectNode complianceNode = objectMapper.createObjectNode();
complianceNode.put("Standard", result.getStandard());
complianceNode.put("Compliant", result.isCompliant());
complianceNode.put("Summary", result.getComplianceSummary());
complianceArray.add(complianceNode);
}
} else if (checkForStandard(document, "PDF/X")) {
summaryData.put("standardCompliance", "PDF/X");
summaryData.put("standardPurpose", "graphic exchange");
} else if (checkForStandard(document, "PDF/UA")) {
summaryData.put("standardCompliance", "PDF/UA");
summaryData.put("standardPurpose", "universal accessibility");
} else if (checkForStandard(document, "PDF/E")) {
summaryData.put("standardCompliance", "PDF/E");
summaryData.put("standardPurpose", "engineering workflows");
} else if (checkForStandard(document, "PDF/VT")) {
summaryData.put("standardCompliance", "PDF/VT");
summaryData.put("standardPurpose", "variable and transactional printing");
summaryData.set("Compliance", complianceArray);
}
return summaryData;
@@ -587,40 +363,113 @@ public class GetInfoOnPDF {
return docInfoNode;
}
private static ObjectNode extractComplianceInfo(PDDocument document) {
private static ObjectNode extractComplianceInfo(
PDDocument doc, List<PDFVerificationResult> verificationResults) {
ObjectNode compliancy = objectMapper.createObjectNode();
try {
String pdfaConformanceLevel = getPdfAConformanceLevel(document);
boolean isPdfACompliant = pdfaConformanceLevel != null;
boolean isPdfXCompliant = checkForStandard(document, "PDF/X");
boolean isPdfECompliant = checkForStandard(document, "PDF/E");
boolean isPdfVTCompliant = checkForStandard(document, "PDF/VT");
boolean isPdfUACompliant = checkForStandard(document, "PDF/UA");
boolean isPdfBCompliant = checkForStandard(document, "PDF/B");
boolean isPdfSECCompliant = checkForStandard(document, "PDF/SEC");
boolean isPdfA = false;
boolean isPdfUA = false;
boolean isPdfX = false;
boolean isPdfE = false;
boolean isPdfB = false;
String pdfAConformanceLevel = null;
compliancy.put("IsPDF/ACompliant", isPdfACompliant);
if (pdfaConformanceLevel != null) {
compliancy.put("PDF/AConformanceLevel", pdfaConformanceLevel);
Boolean pdfaValidationPassed =
validatePdfAWithPreflight(document, pdfaConformanceLevel);
compliancy.put("IsPDF/AValidated", pdfaValidationPassed);
if (verificationResults != null) {
for (PDFVerificationResult result : verificationResults) {
if (result == null) continue;
if (result.isCompliant()) {
String std = result.getStandard().toLowerCase();
if (std.contains("pdf_a") || std.contains("pdfa")) {
isPdfA = true;
String profile = result.getValidationProfile();
if (profile != null) {
if (profile.contains("1b")
|| profile.contains("2b")
|| profile.contains("3b")) {
isPdfB = true;
}
// Simple extraction: remove "pdfa-" prefix
pdfAConformanceLevel = profile.replace("pdfa-", "");
}
}
if (std.contains("pdf_ua") || std.contains("pdfua")) isPdfUA = true;
if (std.contains("pdf_x") || std.contains("pdfx")) isPdfX = true;
if (std.contains("pdf_e") || std.contains("pdfe")) isPdfE = true;
}
}
compliancy.put("IsPDF/XCompliant", isPdfXCompliant);
compliancy.put("IsPDF/ECompliant", isPdfECompliant);
compliancy.put("IsPDF/VTCompliant", isPdfVTCompliant);
compliancy.put("IsPDF/UACompliant", isPdfUACompliant);
compliancy.put("IsPDF/BCompliant", isPdfBCompliant);
compliancy.put("IsPDF/SECCompliant", isPdfSECCompliant);
}
} catch (Exception e) {
log.error("Error extracting compliance info: {}", e.getMessage());
compliancy.put("IsPDF/ACompliant", isPdfA);
compliancy.put("IsPDF/UACompliant", isPdfUA);
compliancy.put("IsPDF/ECompliant", isPdfE);
compliancy.put("IsPDF/VTCompliant", false); // Not currently implemented
compliancy.put("IsPDF/BCompliant", isPdfB);
if (pdfAConformanceLevel != null) {
compliancy.put("PDF/AConformanceLevel", pdfAConformanceLevel);
}
compliancy.put("IsPDF/SECCompliant", isSECCompliant(doc));
if (verificationResults != null && !verificationResults.isEmpty()) {
// Keep original simple structure as backup or extra info
for (PDFVerificationResult result : verificationResults) {
if (result == null) continue;
String standard = result.getStandard();
boolean isCompliant = result.isCompliant();
if (standard != null) {
// Avoid overwriting specific keys if collision, but here keys are distinct
// enough
compliancy.put(standard, isCompliant);
}
}
}
return compliancy;
}
private static boolean isSECCompliant(PDDocument doc) {
try {
// 1. Check Encryption
if (doc.isEncrypted()) return false;
PDDocumentCatalog catalog = doc.getDocumentCatalog();
// 2. Check for JavaScript (Active Content)
if (catalog.getOpenAction() instanceof PDActionJavaScript) return false;
if (catalog.getNames() != null && catalog.getNames().getJavaScript() != null)
return false;
// Check for AcroForm
if (catalog.getAcroForm() != null) return false;
// 3. Check for Embedded Files
if (catalog.getNames() != null && catalog.getNames().getEmbeddedFiles() != null)
return false;
// 4. Check for External Links or Navigation Actions
for (PDPage page : doc.getPages()) {
for (PDAnnotation annotation : page.getAnnotations()) {
if (annotation instanceof PDAnnotationLink) {
PDAnnotationLink link = (PDAnnotationLink) annotation;
PDAction action = link.getAction();
if (action instanceof PDActionURI
|| action instanceof PDActionLaunch
|| action instanceof PDActionRemoteGoTo
|| action instanceof PDActionSubmitForm) {
return false;
}
}
}
}
return true;
} catch (Exception e) {
log.error("Error checking SEC compliance: {}", e.getMessage());
return false;
}
}
private static ObjectNode extractEncryptionInfo(PDDocument document) {
ObjectNode encryption = objectMapper.createObjectNode();
@@ -1227,6 +1076,13 @@ public class GetInfoOnPDF {
return createErrorResponse("Invalid PDF file: " + e.getMessage());
}
List<PDFVerificationResult> verificationResults = null;
try {
verificationResults = veraPDFService.validatePDF(inputFile.getInputStream());
} catch (Exception e) {
log.error("VeraPDF validation failed", e);
}
boolean readonly = true;
try (PDDocument pdfBoxDoc = pdfDocumentFactory.load(inputFile, readonly)) {
@@ -1235,20 +1091,13 @@ public class GetInfoOnPDF {
ObjectNode metadata = extractMetadata(pdfBoxDoc);
ObjectNode basicInfo = extractBasicInfo(pdfBoxDoc, inputFile.getSize());
ObjectNode docInfoNode = extractDocumentInfo(pdfBoxDoc);
ObjectNode compliancy = extractComplianceInfo(pdfBoxDoc);
ObjectNode compliancy = extractComplianceInfo(pdfBoxDoc, verificationResults);
ObjectNode encryption = extractEncryptionInfo(pdfBoxDoc);
ObjectNode permissionsNode = extractPermissions(pdfBoxDoc);
ObjectNode other = extractOtherInfo(pdfBoxDoc);
ObjectNode formFieldsNode = extractFormFields(pdfBoxDoc);
// Generate summary data
String pdfaConformanceLevel = getPdfAConformanceLevel(pdfBoxDoc);
Boolean pdfaValidationPassed = null;
if (pdfaConformanceLevel != null) {
pdfaValidationPassed = validatePdfAWithPreflight(pdfBoxDoc, pdfaConformanceLevel);
}
ObjectNode summaryData =
generatePDFSummaryData(pdfBoxDoc, pdfaConformanceLevel, pdfaValidationPassed);
ObjectNode summaryData = generatePDFSummaryData(pdfBoxDoc, verificationResults);
// Extract per-page information
ObjectNode pageInfoParent = extractPerPageInfo(pdfBoxDoc);

View File

@@ -1,22 +1,30 @@
package stirling.software.SPDF.controller.api.security;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
import java.util.GregorianCalendar;
import java.util.List;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.ProtectionPolicy;
import org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionLaunch;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionURI;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
@@ -35,6 +43,8 @@ import org.springframework.web.multipart.MultipartFile;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import stirling.software.SPDF.model.api.security.PDFVerificationResult;
import stirling.software.SPDF.service.VeraPDFService;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
@@ -43,6 +53,7 @@ import stirling.software.common.service.CustomPDFDocumentFactory;
class GetInfoOnPDFTest {
@Mock private CustomPDFDocumentFactory pdfDocumentFactory;
@Mock private VeraPDFService veraPDFService;
@InjectMocks private GetInfoOnPDF getInfoOnPDF;
@@ -661,41 +672,6 @@ class GetInfoOnPDFTest {
void testGetPageSize(float width, float height, String expected) {
Assertions.assertEquals(expected, GetInfoOnPDF.getPageSize(width, height));
}
@Test
@DisplayName("Should check for PDF/A standard")
void testCheckForStandard_PdfA() throws IOException {
// This would require a real PDF/A document or mocking
PDDocument document = createSimplePdfWithText("Test");
boolean result = GetInfoOnPDF.checkForStandard(document, "PDF/A");
Assertions.assertFalse(result); // Simple PDF is not PDF/A compliant
document.close();
}
@Test
@DisplayName("Should handle null document in checkForStandard")
void testCheckForStandard_NullDocument() {
boolean result = GetInfoOnPDF.checkForStandard(null, "PDF/A");
Assertions.assertFalse(result);
}
@Test
@DisplayName("Should get PDF/A conformance level")
void testGetPdfAConformanceLevel() throws IOException {
PDDocument document = createSimplePdfWithText("Test");
String level = GetInfoOnPDF.getPdfAConformanceLevel(document);
Assertions.assertNull(level);
document.close();
}
@Test
@DisplayName("Should handle encrypted document in getPdfAConformanceLevel")
void testGetPdfAConformanceLevel_EncryptedDocument() throws IOException {
PDDocument document = createEncryptedPdf();
String level = GetInfoOnPDF.getPdfAConformanceLevel(document);
Assertions.assertNull(level); // Encrypted documents return null
document.close();
}
}
@Nested
@@ -776,8 +752,8 @@ class GetInfoOnPDFTest {
class ComplianceTests {
@Test
@DisplayName("Should check PDF/A compliance")
void testCompliance_PdfA() throws IOException {
@DisplayName("Should extract compliance info using VeraPDF")
void testCompliance_PdfA() throws Exception {
PDDocument document = createSimplePdfWithText("Test PDF/A");
MockMultipartFile mockFile = documentToMultipartFile(document, "pdfa.pdf");
@@ -791,16 +767,22 @@ class GetInfoOnPDFTest {
ArgumentMatchers.anyBoolean()))
.thenReturn(loadedDoc);
// Mock VeraPDFService
PDFVerificationResult result = new PDFVerificationResult();
result.setStandard("pdfa-1b");
result.setCompliant(true);
result.setComplianceSummary("PDF/A-1b compliant");
Mockito.when(veraPDFService.validatePDF(ArgumentMatchers.any(InputStream.class)))
.thenReturn(List.of(result));
ResponseEntity<byte[]> response = getInfoOnPDF.getPdfInfo(request);
String jsonResponse = new String(response.getBody(), StandardCharsets.UTF_8);
JsonNode jsonNode = objectMapper.readTree(jsonResponse);
JsonNode compliancy = jsonNode.get("Compliancy");
Assertions.assertTrue(compliancy.has("IsPDF/ACompliant"));
Assertions.assertTrue(compliancy.has("IsPDF/XCompliant"));
Assertions.assertTrue(compliancy.has("IsPDF/ECompliant"));
Assertions.assertTrue(compliancy.has("IsPDF/UACompliant"));
Assertions.assertTrue(compliancy.has("pdfa-1b"));
Assertions.assertTrue(compliancy.get("pdfa-1b").asBoolean());
loadedDoc.close();
}
@@ -840,4 +822,104 @@ class GetInfoOnPDFTest {
loadedDoc.close();
}
}
@Test
@DisplayName("SEC Compliance: Clean document should pass")
void testSecCompliance_Clean() throws Exception {
try (PDDocument doc = new PDDocument()) {
doc.addPage(new PDPage());
checkSecCompliance(doc, true);
}
}
@Test
@DisplayName("SEC Compliance: JavaScript action should fail")
void testSecCompliance_JavaScript() throws Exception {
try (PDDocument doc = new PDDocument()) {
doc.addPage(new PDPage());
PDActionJavaScript jsAction = new PDActionJavaScript("app.alert('Hi')");
doc.getDocumentCatalog().setOpenAction(jsAction);
checkSecCompliance(doc, false);
}
}
@Test
@DisplayName("SEC Compliance: External URI Link should fail")
void testSecCompliance_ExternalLink() throws Exception {
try (PDDocument doc = new PDDocument()) {
PDPage page = new PDPage();
doc.addPage(page);
PDAnnotationLink link = new PDAnnotationLink();
PDActionURI action = new PDActionURI();
action.setURI("http://google.com");
link.setAction(action);
page.getAnnotations().add(link);
checkSecCompliance(doc, false);
}
}
@Test
@DisplayName("SEC Compliance: Launch Action should fail")
void testSecCompliance_LaunchAction() throws Exception {
try (PDDocument doc = new PDDocument()) {
PDPage page = new PDPage();
doc.addPage(page);
PDAnnotationLink link = new PDAnnotationLink();
PDActionLaunch action = new PDActionLaunch();
link.setAction(action);
page.getAnnotations().add(link);
checkSecCompliance(doc, false);
}
}
@Test
@DisplayName("SEC Compliance: Embedded File should fail")
void testSecCompliance_EmbeddedFile() throws Exception {
try (PDDocument doc = new PDDocument()) {
doc.addPage(new PDPage());
PDComplexFileSpecification fs = new PDComplexFileSpecification();
fs.setFile("test.txt");
PDEmbeddedFile ef =
new PDEmbeddedFile(doc, new ByteArrayInputStream("test".getBytes()));
fs.setEmbeddedFile(ef);
PDEmbeddedFilesNameTreeNode efTree = new PDEmbeddedFilesNameTreeNode();
efTree.setNames(Collections.singletonMap("test", fs));
PDDocumentNameDictionary names = new PDDocumentNameDictionary(doc.getDocumentCatalog());
names.setEmbeddedFiles(efTree);
doc.getDocumentCatalog().setNames(names);
checkSecCompliance(doc, false);
}
}
private void checkSecCompliance(PDDocument doc, boolean expected) throws Exception {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
doc.save(baos);
byte[] bytes = baos.toByteArray();
Mockito.when(
pdfDocumentFactory.load(
ArgumentMatchers.any(MultipartFile.class),
ArgumentMatchers.anyBoolean()))
.thenReturn(Loader.loadPDF(bytes));
PDFFile request = new PDFFile();
request.setFileInput(
new MockMultipartFile("file", "test.pdf", "application/pdf", bytes));
ResponseEntity<byte[]> response = getInfoOnPDF.getPdfInfo(request);
String jsonResponse = new String(response.getBody(), StandardCharsets.UTF_8);
JsonNode jsonNode = objectMapper.readTree(jsonResponse);
boolean actual = jsonNode.get("Compliancy").get("IsPDF/SECCompliant").asBoolean();
Assertions.assertEquals(expected, actual, "SEC Compliance check failed");
}
}
}