feat(get-info-on-pdf): use PDFBox preflight to validate PDF compliancy level, and parse in compliancy type (#4595)

# Description of Changes

- Refactored methods for parsing and extracting PDF/A conformance levels
from XMP metadata.
- Implemented PDF/A validation using Preflight from Apache PDFBox.
- Enhanced PDF information generation to include PDF/A conformance level
and validation results.
- Updated compliance checks and JSON output to reflect new PDF/A
capabilities.

### Test files:

[lorem-ipsum_PDFA1b.pdf](https://github.com/user-attachments/files/22687689/lorem-ipsum_PDFA1b.pdf)

[lorem-ipsum_PDFA_2b.pdf](https://github.com/user-attachments/files/22687692/lorem-ipsum_PDFA_2b.pdf)

[lorem-ipsum_PD⁄A3a.pdf](https://github.com/user-attachments/files/22687693/lorem-ipsum_PD.A3a.pdf)

### New results:
<img width="699" height="257" alt="image"
src="https://github.com/user-attachments/assets/b8cb5510-2908-4e08-97f6-d5799e0e1be7"
/>
<img width="699" height="257" alt="image"
src="https://github.com/user-attachments/assets/d7af3731-ad19-4524-b1c1-32f47776e6af"
/>
<img width="699" height="257" alt="image"
src="https://github.com/user-attachments/assets/6e48e65b-2ebc-402a-a222-bfdbf783e45d"
/>

I also validated with online tools. Should be good now!

I was also thinking moving this to GeneralUtils; it may be useful for
PDF/A converter in the future, or for other features. Not sure yet, for
now I think this is good for now.

Closes #4568

<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [x] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [x] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-10-04 11:22:29 +02:00 committed by GitHub
parent dabc52ef73
commit 575854952a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 375 additions and 188 deletions

View File

@ -447,7 +447,20 @@ public final class RegexPatternUtils {
return getPattern("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)");
}
// API doc parsing patterns
/** Pattern for matching pdfaid:part attribute in XMP metadata */
public Pattern getPdfAidPartPattern() {
return getPattern("pdfaid:part[\"\\s]*=[\"\\s]*([0-9]+)");
}
/** Pattern for matching pdfaid:conformance attribute in XMP metadata */
public Pattern getPdfAidConformancePattern() {
return getPattern("pdfaid:conformance[\"\\s]*=[\"\\s]*([A-Za-z]+)");
}
/** Pattern for matching slash in page mode description */
public Pattern getPageModePattern() {
return getPattern("/");
}
/**
* Pre-compile commonly used patterns for immediate availability. This eliminates first-call

View File

@ -7,10 +7,13 @@ import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pdfbox.cos.COSInputStream;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
@ -40,8 +43,14 @@ import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlin
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineNode;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.preflight.PreflightDocument;
import org.apache.pdfbox.preflight.ValidationResult;
import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
import org.apache.pdfbox.preflight.exception.ValidationException;
import org.apache.pdfbox.preflight.parser.PreflightParser;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.xml.DomXmpParser;
import org.apache.xmpbox.xml.XmpParsingException;
import org.apache.xmpbox.xml.XmpSerializer;
@ -95,60 +104,147 @@ public class GetInfoOnPDF {
}
}
/**
* Generates structured summary data about the PDF highlighting its unique characteristics such
* as encryption status, permission restrictions, and standards compliance.
*
* @param document The PDF document to analyze
* @return An ObjectNode containing structured summary data
*/
private ObjectNode generatePDFSummaryData(PDDocument document) {
ObjectNode summaryData = objectMapper.createObjectNode();
// Check if encrypted
if (document.isEncrypted()) {
summaryData.put("encrypted", true);
public static boolean checkForStandard(PDDocument document, String standardKeyword) {
if ("PDF/A".equalsIgnoreCase(standardKeyword)) {
return getPdfAConformanceLevel(document) != null;
}
// Check permissions
AccessPermission ap = document.getCurrentAccessPermission();
ArrayNode restrictedPermissions = objectMapper.createArrayNode();
if (!ap.canAssembleDocument()) restrictedPermissions.add("document assembly");
if (!ap.canExtractContent()) restrictedPermissions.add("content extraction");
if (!ap.canExtractForAccessibility()) restrictedPermissions.add("accessibility extraction");
if (!ap.canFillInForm()) restrictedPermissions.add("form filling");
if (!ap.canModify()) restrictedPermissions.add("modification");
if (!ap.canModifyAnnotations()) restrictedPermissions.add("annotation modification");
if (!ap.canPrint()) restrictedPermissions.add("printing");
if (!restrictedPermissions.isEmpty()) {
summaryData.set("restrictedPermissions", restrictedPermissions);
summaryData.put("restrictedPermissionsCount", restrictedPermissions.size());
}
// Check standard compliance
if (checkForStandard(document, "PDF/A")) {
summaryData.put("standardCompliance", "PDF/A");
summaryData.put("standardPurpose", "long-term archiving");
} else if (checkForStandard(document, "PDF/X")) {
summaryData.put("standardCompliance", "PDF/X");
summaryData.put("standardPurpose", "graphic exchange");
} else if (checkForStandard(document, "PDF/UA")) {
summaryData.put("standardCompliance", "PDF/UA");
summaryData.put("standardPurpose", "universal accessibility");
} else if (checkForStandard(document, "PDF/E")) {
summaryData.put("standardCompliance", "PDF/E");
summaryData.put("standardPurpose", "engineering workflows");
} else if (checkForStandard(document, "PDF/VT")) {
summaryData.put("standardCompliance", "PDF/VT");
summaryData.put("standardPurpose", "variable and transactional printing");
}
return summaryData;
return checkStandardInMetadata(document, standardKeyword);
}
public static boolean checkForStandard(PDDocument document, String standardKeyword) {
public static String getPdfAConformanceLevel(PDDocument document) {
if (document == null || document.isEncrypted()) {
return null;
}
return getPdfAVersionFromMetadata(document);
}
private static String getPdfAVersionFromMetadata(PDDocument document) {
try {
PDMetadata pdMetadata = document.getDocumentCatalog().getMetadata();
if (pdMetadata != null) {
try (COSInputStream metaStream = pdMetadata.createInputStream()) {
DomXmpParser domXmpParser = new DomXmpParser();
XMPMetadata xmpMeta = domXmpParser.parse(metaStream);
PDFAIdentificationSchema pdfId = xmpMeta.getPDFAIdentificationSchema();
if (pdfId != null) {
Integer part = pdfId.getPart();
String conformance = pdfId.getConformance();
if (part != null && conformance != null) {
return part + conformance.toUpperCase(Locale.ROOT);
}
} else {
try (COSInputStream rawStream = pdMetadata.createInputStream()) {
byte[] metadataBytes = rawStream.readAllBytes();
String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8);
String extracted = extractPdfAVersionFromRawXml(rawMetadata);
if (extracted != null) {
return extracted;
}
}
}
} catch (XmpParsingException e) {
log.debug("XMP parsing failed, trying raw metadata search: {}", e.getMessage());
try (COSInputStream metaStream = pdMetadata.createInputStream()) {
byte[] metadataBytes = metaStream.readAllBytes();
String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8);
String extracted = extractPdfAVersionFromRawXml(rawMetadata);
if (extracted != null) {
return extracted;
}
}
}
}
} catch (Exception e) {
log.debug("Error reading PDF/A metadata: {}", e.getMessage());
}
return null;
}
private static String extractPdfAVersionFromRawXml(String rawXml) {
if (rawXml == null || rawXml.isEmpty()) {
return null;
}
try {
Pattern partPattern = RegexPatternUtils.getInstance().getPdfAidPartPattern();
Pattern confPattern = RegexPatternUtils.getInstance().getPdfAidConformancePattern();
Matcher partMatcher = partPattern.matcher(rawXml);
Matcher confMatcher = confPattern.matcher(rawXml);
if (partMatcher.find() && confMatcher.find()) {
String part = partMatcher.group(1);
String conformance = confMatcher.group(1).toUpperCase(Locale.ROOT);
return part + conformance;
}
} catch (Exception e) {
log.debug("Error parsing raw XMP for PDF/A version: {}", e.getMessage());
}
return null;
}
private static boolean validatePdfAWithPreflight(PDDocument document, String version) {
if (document == null || document.isEncrypted()) {
return false;
}
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
document.save(baos);
try (RandomAccessReadBuffer source = new RandomAccessReadBuffer(baos.toByteArray())) {
PreflightParser parser = new PreflightParser(source);
try (PDDocument parsedDocument = parser.parse()) {
if (!(parsedDocument instanceof PreflightDocument preflightDocument)) {
log.debug(
"Parsed document is not a PreflightDocument; unable to validate claimed PDF/A {}",
version);
return false;
}
try {
ValidationResult result = preflightDocument.validate();
if (!result.isValid() && log.isDebugEnabled()) {
log.debug(
"PDF/A validation found {} errors for claimed version {}",
result.getErrorsList().size(),
version);
int logged = 0;
for (ValidationResult.ValidationError error : result.getErrorsList()) {
log.debug(
" Error {}: {}", error.getErrorCode(), error.getDetails());
if (++logged >= 5) {
break;
}
}
}
return result.isValid();
} catch (ValidationException e) {
log.debug(
"Validation exception during PDF/A validation: {}", e.getMessage());
}
} catch (SyntaxValidationException e) {
log.debug(
"Syntax validation failed during PDF/A validation: {}", e.getMessage());
return false;
}
}
} catch (IOException e) {
log.debug("IOException during PDF/A validation: {}", e.getMessage());
} catch (Exception e) {
log.debug("Unexpected error during PDF/A validation: {}", e.getMessage());
}
return false;
}
private static boolean checkStandardInMetadata(PDDocument document, String standardKeyword) {
// Check XMP Metadata
try {
PDMetadata pdMetadata = document.getDocumentCatalog().getMetadata();
@ -191,11 +287,197 @@ public class GetInfoOnPDF {
return false;
}
/**
* Generates structured summary data about the PDF highlighting its unique characteristics such
* as encryption status, permission restrictions, and standards compliance.
*
* @param document The PDF document to analyze
* @return An ObjectNode containing structured summary data
*/
private ObjectNode generatePDFSummaryData(
PDDocument document, String pdfaConformanceLevel, Boolean pdfaValidationPassed) {
ObjectNode summaryData = objectMapper.createObjectNode();
// Check if encrypted
if (document.isEncrypted()) {
summaryData.put("encrypted", true);
}
// Check permissions
AccessPermission ap = document.getCurrentAccessPermission();
ArrayNode restrictedPermissions = objectMapper.createArrayNode();
if (!ap.canAssembleDocument()) restrictedPermissions.add("document assembly");
if (!ap.canExtractContent()) restrictedPermissions.add("content extraction");
if (!ap.canExtractForAccessibility()) restrictedPermissions.add("accessibility extraction");
if (!ap.canFillInForm()) restrictedPermissions.add("form filling");
if (!ap.canModify()) restrictedPermissions.add("modification");
if (!ap.canModifyAnnotations()) restrictedPermissions.add("annotation modification");
if (!ap.canPrint()) restrictedPermissions.add("printing");
if (!restrictedPermissions.isEmpty()) {
summaryData.set("restrictedPermissions", restrictedPermissions);
summaryData.put("restrictedPermissionsCount", restrictedPermissions.size());
}
// Check standard compliance
if (pdfaConformanceLevel != null) {
summaryData.put("standardCompliance", "PDF/A-" + pdfaConformanceLevel);
summaryData.put("standardPurpose", "long-term archiving");
if (pdfaValidationPassed != null) {
summaryData.put("standardValidationPassed", pdfaValidationPassed);
}
} else if (checkForStandard(document, "PDF/X")) {
summaryData.put("standardCompliance", "PDF/X");
summaryData.put("standardPurpose", "graphic exchange");
} else if (checkForStandard(document, "PDF/UA")) {
summaryData.put("standardCompliance", "PDF/UA");
summaryData.put("standardPurpose", "universal accessibility");
} else if (checkForStandard(document, "PDF/E")) {
summaryData.put("standardCompliance", "PDF/E");
summaryData.put("standardPurpose", "engineering workflows");
} else if (checkForStandard(document, "PDF/VT")) {
summaryData.put("standardCompliance", "PDF/VT");
summaryData.put("standardPurpose", "variable and transactional printing");
}
return summaryData;
}
private static void setNodePermissions(PDDocument pdfBoxDoc, ObjectNode permissionsNode) {
AccessPermission ap = pdfBoxDoc.getCurrentAccessPermission();
permissionsNode.put("Document Assembly", getPermissionState(ap.canAssembleDocument()));
permissionsNode.put("Extracting Content", getPermissionState(ap.canExtractContent()));
permissionsNode.put(
"Extracting for accessibility",
getPermissionState(ap.canExtractForAccessibility()));
permissionsNode.put("Form Filling", getPermissionState(ap.canFillInForm()));
permissionsNode.put("Modifying", getPermissionState(ap.canModify()));
permissionsNode.put("Modifying annotations", getPermissionState(ap.canModifyAnnotations()));
permissionsNode.put("Printing", getPermissionState(ap.canPrint()));
}
private static String getPermissionState(boolean state) {
return state ? "Allowed" : "Not Allowed";
}
public static String getPageOrientation(double width, double height) {
if (width > height) {
return "Landscape";
} else if (height > width) {
return "Portrait";
} else {
return "Square";
}
}
public static String getPageSize(float width, float height) {
// Define standard page sizes
Map<String, PDRectangle> standardSizes = new HashMap<>();
standardSizes.put("Letter", PDRectangle.LETTER);
standardSizes.put("LEGAL", PDRectangle.LEGAL);
standardSizes.put("A0", PDRectangle.A0);
standardSizes.put("A1", PDRectangle.A1);
standardSizes.put("A2", PDRectangle.A2);
standardSizes.put("A3", PDRectangle.A3);
standardSizes.put("A4", PDRectangle.A4);
standardSizes.put("A5", PDRectangle.A5);
standardSizes.put("A6", PDRectangle.A6);
for (Map.Entry<String, PDRectangle> entry : standardSizes.entrySet()) {
PDRectangle size = entry.getValue();
if (isCloseToSize(width, height, size.getWidth(), size.getHeight())) {
return entry.getKey();
}
}
return "Custom";
}
private static boolean isCloseToSize(
float width, float height, float standardWidth, float standardHeight) {
float tolerance = 1.0f; // You can adjust the tolerance as needed
return Math.abs(width - standardWidth) <= tolerance
&& Math.abs(height - standardHeight) <= tolerance;
}
private static void setDimensionInfo(ObjectNode dimensionInfo, float width, float height) {
float ppi = 72; // Points Per Inch
float widthInInches = width / ppi;
float heightInInches = height / ppi;
float widthInCm = widthInInches * 2.54f;
float heightInCm = heightInInches * 2.54f;
dimensionInfo.put("Width (px)", String.format("%.2f", width));
dimensionInfo.put("Height (px)", String.format("%.2f", height));
dimensionInfo.put("Width (in)", String.format("%.2f", widthInInches));
dimensionInfo.put("Height (in)", String.format("%.2f", heightInInches));
dimensionInfo.put("Width (cm)", String.format("%.2f", widthInCm));
dimensionInfo.put("Height (cm)", String.format("%.2f", heightInCm));
}
private static ArrayNode exploreStructureTree(List<Object> nodes) {
ArrayNode elementsArray = objectMapper.createArrayNode();
if (nodes != null) {
for (Object obj : nodes) {
if (obj instanceof PDStructureNode node) {
ObjectNode elementNode = objectMapper.createObjectNode();
if (node instanceof PDStructureElement structureElement) {
elementNode.put("Type", structureElement.getStructureType());
elementNode.put("Content", getContent(structureElement));
// Recursively explore child elements
ArrayNode childElements = exploreStructureTree(structureElement.getKids());
if (!childElements.isEmpty()) {
elementNode.set("Children", childElements);
}
}
elementsArray.add(elementNode);
}
}
}
return elementsArray;
}
private static String getContent(PDStructureElement structureElement) {
StringBuilder contentBuilder = new StringBuilder();
for (Object item : structureElement.getKids()) {
if (item instanceof COSString cosString) {
contentBuilder.append(cosString.getString());
} else if (item instanceof PDStructureElement pdstructureelement) {
// For simplicity, we're handling only COSString and PDStructureElement here
// but a more comprehensive method would handle other types too
contentBuilder.append(getContent(pdstructureelement));
}
}
return contentBuilder.toString();
}
private static String formatDate(Calendar calendar) {
if (calendar != null) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
ZonedDateTime zonedDateTime =
ZonedDateTime.ofInstant(calendar.toInstant(), ZoneId.systemDefault());
return zonedDateTime.format(formatter);
} else {
return null;
}
}
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/get-info-on-pdf")
@Operation(summary = "Summary here", description = "desc. Input:PDF Output:JSON Type:SISO")
public ResponseEntity<byte[]> getPdfInfo(@ModelAttribute PDFFile request) throws IOException {
MultipartFile inputFile = request.getFileInput();
boolean readonly = true;
final String pagePrefix = "Page ";
final int prefixLength = pagePrefix.length();
StringBuilder keyBuilder = new StringBuilder(prefixLength + 8);
keyBuilder.append(pagePrefix);
try (PDDocument pdfBoxDoc = pdfDocumentFactory.load(inputFile, readonly)) {
ObjectMapper objectMapper = new ObjectMapper();
ObjectNode jsonOutput = objectMapper.createObjectNode();
@ -267,8 +549,15 @@ public class GetInfoOnPDF {
}
jsonOutput.set("FormFields", formFieldsNode);
String pdfaConformanceLevel = getPdfAConformanceLevel(pdfBoxDoc);
Boolean pdfaValidationPassed = null;
if (pdfaConformanceLevel != null) {
pdfaValidationPassed = validatePdfAWithPreflight(pdfBoxDoc, pdfaConformanceLevel);
}
// Generate structured summary data about PDF characteristics
ObjectNode summaryData = generatePDFSummaryData(pdfBoxDoc);
ObjectNode summaryData =
generatePDFSummaryData(pdfBoxDoc, pdfaConformanceLevel, pdfaValidationPassed);
if (summaryData != null && !summaryData.isEmpty()) {
jsonOutput.set("SummaryData", summaryData);
}
@ -373,7 +662,7 @@ public class GetInfoOnPDF {
log.error("exception", e);
}
boolean isPdfACompliant = checkForStandard(pdfBoxDoc, "PDF/A");
boolean isPdfACompliant = pdfaConformanceLevel != null;
boolean isPdfXCompliant = checkForStandard(pdfBoxDoc, "PDF/X");
boolean isPdfECompliant = checkForStandard(pdfBoxDoc, "PDF/E");
boolean isPdfVTCompliant = checkForStandard(pdfBoxDoc, "PDF/VT");
@ -390,6 +679,12 @@ public class GetInfoOnPDF {
// development in 2021.
compliancy.put("IsPDF/ACompliant", isPdfACompliant);
if (pdfaConformanceLevel != null) {
compliancy.put("PDF/AConformanceLevel", pdfaConformanceLevel);
if (pdfaValidationPassed != null) {
compliancy.put("IsPDF/AValidated", pdfaValidationPassed);
}
}
compliancy.put("IsPDF/XCompliant", isPdfXCompliant);
compliancy.put("IsPDF/ECompliant", isPdfECompliant);
compliancy.put("IsPDF/VTCompliant", isPdfVTCompliant);
@ -466,7 +761,7 @@ public class GetInfoOnPDF {
ObjectNode sizeInfo = objectMapper.createObjectNode();
getDimensionInfo(sizeInfo, width, height);
setDimensionInfo(sizeInfo, width, height);
sizeInfo.put("Standard Page", getPageSize(width, height));
pageInfo.set("Size", sizeInfo);
@ -552,11 +847,10 @@ public class GetInfoOnPDF {
Set<String> uniqueURIs = new HashSet<>(); // To store unique URIs
for (PDAnnotation annotation : annotations) {
if (annotation instanceof PDAnnotationLink linkAnnotation) {
if (linkAnnotation.getAction() instanceof PDActionURI uriAction) {
String uri = uriAction.getURI();
uniqueURIs.add(uri); // Add to set to ensure uniqueness
}
if (annotation instanceof PDAnnotationLink linkAnnotation
&& linkAnnotation.getAction() instanceof PDActionURI uriAction) {
String uri = uriAction.getURI();
uniqueURIs.add(uri); // Add to set to ensure uniqueness
}
}
@ -679,8 +973,10 @@ public class GetInfoOnPDF {
}
pageInfo.set("Multimedia", multimediaArray);
keyBuilder.setLength(prefixLength);
keyBuilder.append(pageNum + 1);
pageInfoParent.set("Page " + (pageNum + 1), pageInfo);
pageInfoParent.set(keyBuilder.toString(), pageInfo);
}
jsonOutput.set("BasicInfo", basicInfo);
@ -706,133 +1002,11 @@ public class GetInfoOnPDF {
return null;
}
private void setNodePermissions(PDDocument pdfBoxDoc, ObjectNode permissionsNode) {
AccessPermission ap = pdfBoxDoc.getCurrentAccessPermission();
permissionsNode.put("Document Assembly", getPermissionState(ap.canAssembleDocument()));
permissionsNode.put("Extracting Content", getPermissionState(ap.canExtractContent()));
permissionsNode.put(
"Extracting for accessibility",
getPermissionState(ap.canExtractForAccessibility()));
permissionsNode.put("Form Filling", getPermissionState(ap.canFillInForm()));
permissionsNode.put("Modifying", getPermissionState(ap.canModify()));
permissionsNode.put("Modifying annotations", getPermissionState(ap.canModifyAnnotations()));
permissionsNode.put("Printing", getPermissionState(ap.canPrint()));
}
private String getPermissionState(boolean state) {
return state ? "Allowed" : "Not Allowed";
}
public String getPageOrientation(double width, double height) {
if (width > height) {
return "Landscape";
} else if (height > width) {
return "Portrait";
} else {
return "Square";
}
}
public String getPageSize(float width, float height) {
// Define standard page sizes
Map<String, PDRectangle> standardSizes = new HashMap<>();
standardSizes.put("Letter", PDRectangle.LETTER);
standardSizes.put("LEGAL", PDRectangle.LEGAL);
standardSizes.put("A0", PDRectangle.A0);
standardSizes.put("A1", PDRectangle.A1);
standardSizes.put("A2", PDRectangle.A2);
standardSizes.put("A3", PDRectangle.A3);
standardSizes.put("A4", PDRectangle.A4);
standardSizes.put("A5", PDRectangle.A5);
standardSizes.put("A6", PDRectangle.A6);
for (Map.Entry<String, PDRectangle> entry : standardSizes.entrySet()) {
PDRectangle size = entry.getValue();
if (isCloseToSize(width, height, size.getWidth(), size.getHeight())) {
return entry.getKey();
}
}
return "Custom";
}
private boolean isCloseToSize(
float width, float height, float standardWidth, float standardHeight) {
float tolerance = 1.0f; // You can adjust the tolerance as needed
return Math.abs(width - standardWidth) <= tolerance
&& Math.abs(height - standardHeight) <= tolerance;
}
public ObjectNode getDimensionInfo(ObjectNode dimensionInfo, float width, float height) {
float ppi = 72; // Points Per Inch
float widthInInches = width / ppi;
float heightInInches = height / ppi;
float widthInCm = widthInInches * 2.54f;
float heightInCm = heightInInches * 2.54f;
dimensionInfo.put("Width (px)", String.format("%.2f", width));
dimensionInfo.put("Height (px)", String.format("%.2f", height));
dimensionInfo.put("Width (in)", String.format("%.2f", widthInInches));
dimensionInfo.put("Height (in)", String.format("%.2f", heightInInches));
dimensionInfo.put("Width (cm)", String.format("%.2f", widthInCm));
dimensionInfo.put("Height (cm)", String.format("%.2f", heightInCm));
return dimensionInfo;
}
public ArrayNode exploreStructureTree(List<Object> nodes) {
ArrayNode elementsArray = objectMapper.createArrayNode();
if (nodes != null) {
for (Object obj : nodes) {
if (obj instanceof PDStructureNode node) {
ObjectNode elementNode = objectMapper.createObjectNode();
if (node instanceof PDStructureElement structureElement) {
elementNode.put("Type", structureElement.getStructureType());
elementNode.put("Content", getContent(structureElement));
// Recursively explore child elements
ArrayNode childElements = exploreStructureTree(structureElement.getKids());
if (!childElements.isEmpty()) {
elementNode.set("Children", childElements);
}
}
elementsArray.add(elementNode);
}
}
}
return elementsArray;
}
public String getContent(PDStructureElement structureElement) {
StringBuilder contentBuilder = new StringBuilder();
for (Object item : structureElement.getKids()) {
if (item instanceof COSString cosString) {
contentBuilder.append(cosString.getString());
} else if (item instanceof PDStructureElement) {
// For simplicity, we're handling only COSString and PDStructureElement here
// but a more comprehensive method would handle other types too
contentBuilder.append(getContent((PDStructureElement) item));
}
}
return contentBuilder.toString();
}
private String formatDate(Calendar calendar) {
if (calendar != null) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
ZonedDateTime zonedDateTime =
ZonedDateTime.ofInstant(calendar.toInstant(), ZoneId.systemDefault());
return zonedDateTime.format(formatter);
} else {
return null;
}
}
private String getPageModeDescription(String pageMode) {
return pageMode != null ? pageMode.toString().replaceFirst("/", "") : "Unknown";
private static String getPageModeDescription(String pageMode) {
if (pageMode == null) return "Unknown";
return RegexPatternUtils.getInstance()
.getPageModePattern()
.matcher(pageMode)
.replaceFirst("");
}
}