From 575854952a39809023e7e487ad7916c921e83a4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bal=C3=A1zs=20Sz=C3=BCcs?=
 <127139797+balazs-szucs@users.noreply.github.com>
Date: Sat, 4 Oct 2025 11:22:29 +0200
Subject: [PATCH] feat(get-info-on-pdf): use PDFBox preflight to validate PDF
 compliancy level, and parse in compliancy type (#4595)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# Description of Changes

- Refactored methods for parsing and extracting PDF/A conformance levels
from XMP metadata.
- Implemented PDF/A validation using Preflight from Apache PDFBox.
- Enhanced PDF information generation to include PDF/A conformance level
and validation results.
- Updated compliance checks and JSON output to reflect new PDF/A
capabilities.

### Test files:

[lorem-ipsum_PDFA1b.pdf](https://github.com/user-attachments/files/22687689/lorem-ipsum_PDFA1b.pdf)

[lorem-ipsum_PDFA_2b.pdf](https://github.com/user-attachments/files/22687692/lorem-ipsum_PDFA_2b.pdf)

[lorem-ipsum_PD⁄A3a.pdf](https://github.com/user-attachments/files/22687693/lorem-ipsum_PD.A3a.pdf)

### New results:
<img width="699" height="257" alt="image"
src="https://github.com/user-attachments/assets/b8cb5510-2908-4e08-97f6-d5799e0e1be7"
/>
<img width="699" height="257" alt="image"
src="https://github.com/user-attachments/assets/d7af3731-ad19-4524-b1c1-32f47776e6af"
/>
<img width="699" height="257" alt="image"
src="https://github.com/user-attachments/assets/6e48e65b-2ebc-402a-a222-bfdbf783e45d"
/>

I also validated with online tools. Should be good now!

I was also thinking moving this to GeneralUtils; it may be useful for
PDF/A converter in the future, or for other features. Not sure yet, for
now I think this is good for now.

Closes #4568

<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [x] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [x] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
---
 .../common/util/RegexPatternUtils.java        |  15 +-
 .../controller/api/security/GetInfoOnPDF.java | 548 ++++++++++++------
 2 files changed, 375 insertions(+), 188 deletions(-)
diff --git a/app/common/src/main/java/stirling/software/common/util/RegexPatternUtils.java b/app/common/src/main/java/stirling/software/common/util/RegexPatternUtils.java
index 4d119e179..858ad0605 100644
--- a/app/common/src/main/java/stirling/software/common/util/RegexPatternUtils.java
+++ b/app/common/src/main/java/stirling/software/common/util/RegexPatternUtils.java
@@ -447,7 +447,20 @@ public final class RegexPatternUtils {
         return getPattern("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)");
     }
 
-    // API doc parsing patterns
+    /** Pattern for matching pdfaid:part attribute in XMP metadata */
+    public Pattern getPdfAidPartPattern() {
+        return getPattern("pdfaid:part[\"\\s]*=[\"\\s]*([0-9]+)");
+    }
+
+    /** Pattern for matching pdfaid:conformance attribute in XMP metadata */
+    public Pattern getPdfAidConformancePattern() {
+        return getPattern("pdfaid:conformance[\"\\s]*=[\"\\s]*([A-Za-z]+)");
+    }
+
+    /** Pattern for matching slash in page mode description */
+    public Pattern getPageModePattern() {
+        return getPattern("/");
+    }
 
     /**
      * Pre-compile commonly used patterns for immediate availability. This eliminates first-call
diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java
index e92cf1dd6..1e9038380 100644
--- a/app/core/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java
+++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java
@@ -7,10 +7,13 @@ import java.time.ZoneId;
 import java.time.ZonedDateTime;
 import java.time.format.DateTimeFormatter;
 import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.pdfbox.cos.COSInputStream;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.io.RandomAccessReadBuffer;
 import org.apache.pdfbox.pdmodel.*;
 import org.apache.pdfbox.pdmodel.common.PDMetadata;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
@@ -40,8 +43,14 @@ import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlin
 import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineNode;
 import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
 import org.apache.pdfbox.pdmodel.interactive.form.PDField;
+import org.apache.pdfbox.preflight.PreflightDocument;
+import org.apache.pdfbox.preflight.ValidationResult;
+import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
+import org.apache.pdfbox.preflight.exception.ValidationException;
+import org.apache.pdfbox.preflight.parser.PreflightParser;
 import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.xmpbox.XMPMetadata;
+import org.apache.xmpbox.schema.PDFAIdentificationSchema;
 import org.apache.xmpbox.xml.DomXmpParser;
 import org.apache.xmpbox.xml.XmpParsingException;
 import org.apache.xmpbox.xml.XmpSerializer;
@@ -95,60 +104,147 @@ public class GetInfoOnPDF {
         }
     }
 
-    /**
-     * Generates structured summary data about the PDF highlighting its unique characteristics such
-     * as encryption status, permission restrictions, and standards compliance.
-     *
-     * @param document The PDF document to analyze
-     * @return An ObjectNode containing structured summary data
-     */
-    private ObjectNode generatePDFSummaryData(PDDocument document) {
-        ObjectNode summaryData = objectMapper.createObjectNode();
-
-        // Check if encrypted
-        if (document.isEncrypted()) {
-            summaryData.put("encrypted", true);
+    public static boolean checkForStandard(PDDocument document, String standardKeyword) {
+        if ("PDF/A".equalsIgnoreCase(standardKeyword)) {
+            return getPdfAConformanceLevel(document) != null;
         }
 
-        // Check permissions
-        AccessPermission ap = document.getCurrentAccessPermission();
-        ArrayNode restrictedPermissions = objectMapper.createArrayNode();
-
-        if (!ap.canAssembleDocument()) restrictedPermissions.add("document assembly");
-        if (!ap.canExtractContent()) restrictedPermissions.add("content extraction");
-        if (!ap.canExtractForAccessibility()) restrictedPermissions.add("accessibility extraction");
-        if (!ap.canFillInForm()) restrictedPermissions.add("form filling");
-        if (!ap.canModify()) restrictedPermissions.add("modification");
-        if (!ap.canModifyAnnotations()) restrictedPermissions.add("annotation modification");
-        if (!ap.canPrint()) restrictedPermissions.add("printing");
-
-        if (!restrictedPermissions.isEmpty()) {
-            summaryData.set("restrictedPermissions", restrictedPermissions);
-            summaryData.put("restrictedPermissionsCount", restrictedPermissions.size());
-        }
-
-        // Check standard compliance
-        if (checkForStandard(document, "PDF/A")) {
-            summaryData.put("standardCompliance", "PDF/A");
-            summaryData.put("standardPurpose", "long-term archiving");
-        } else if (checkForStandard(document, "PDF/X")) {
-            summaryData.put("standardCompliance", "PDF/X");
-            summaryData.put("standardPurpose", "graphic exchange");
-        } else if (checkForStandard(document, "PDF/UA")) {
-            summaryData.put("standardCompliance", "PDF/UA");
-            summaryData.put("standardPurpose", "universal accessibility");
-        } else if (checkForStandard(document, "PDF/E")) {
-            summaryData.put("standardCompliance", "PDF/E");
-            summaryData.put("standardPurpose", "engineering workflows");
-        } else if (checkForStandard(document, "PDF/VT")) {
-            summaryData.put("standardCompliance", "PDF/VT");
-            summaryData.put("standardPurpose", "variable and transactional printing");
-        }
-
-        return summaryData;
+        return checkStandardInMetadata(document, standardKeyword);
     }
 
-    public static boolean checkForStandard(PDDocument document, String standardKeyword) {
+    public static String getPdfAConformanceLevel(PDDocument document) {
+        if (document == null || document.isEncrypted()) {
+            return null;
+        }
+
+        return getPdfAVersionFromMetadata(document);
+    }
+
+    private static String getPdfAVersionFromMetadata(PDDocument document) {
+        try {
+            PDMetadata pdMetadata = document.getDocumentCatalog().getMetadata();
+            if (pdMetadata != null) {
+                try (COSInputStream metaStream = pdMetadata.createInputStream()) {
+                    DomXmpParser domXmpParser = new DomXmpParser();
+                    XMPMetadata xmpMeta = domXmpParser.parse(metaStream);
+
+                    PDFAIdentificationSchema pdfId = xmpMeta.getPDFAIdentificationSchema();
+                    if (pdfId != null) {
+                        Integer part = pdfId.getPart();
+                        String conformance = pdfId.getConformance();
+
+                        if (part != null && conformance != null) {
+                            return part + conformance.toUpperCase(Locale.ROOT);
+                        }
+                    } else {
+                        try (COSInputStream rawStream = pdMetadata.createInputStream()) {
+                            byte[] metadataBytes = rawStream.readAllBytes();
+                            String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8);
+                            String extracted = extractPdfAVersionFromRawXml(rawMetadata);
+                            if (extracted != null) {
+                                return extracted;
+                            }
+                        }
+                    }
+                } catch (XmpParsingException e) {
+                    log.debug("XMP parsing failed, trying raw metadata search: {}", e.getMessage());
+                    try (COSInputStream metaStream = pdMetadata.createInputStream()) {
+                        byte[] metadataBytes = metaStream.readAllBytes();
+                        String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8);
+                        String extracted = extractPdfAVersionFromRawXml(rawMetadata);
+                        if (extracted != null) {
+                            return extracted;
+                        }
+                    }
+                }
+            }
+        } catch (Exception e) {
+            log.debug("Error reading PDF/A metadata: {}", e.getMessage());
+        }
+
+        return null;
+    }
+
+    private static String extractPdfAVersionFromRawXml(String rawXml) {
+        if (rawXml == null || rawXml.isEmpty()) {
+            return null;
+        }
+
+        try {
+            Pattern partPattern = RegexPatternUtils.getInstance().getPdfAidPartPattern();
+            Pattern confPattern = RegexPatternUtils.getInstance().getPdfAidConformancePattern();
+
+            Matcher partMatcher = partPattern.matcher(rawXml);
+            Matcher confMatcher = confPattern.matcher(rawXml);
+
+            if (partMatcher.find() && confMatcher.find()) {
+                String part = partMatcher.group(1);
+                String conformance = confMatcher.group(1).toUpperCase(Locale.ROOT);
+                return part + conformance;
+            }
+        } catch (Exception e) {
+            log.debug("Error parsing raw XMP for PDF/A version: {}", e.getMessage());
+        }
+
+        return null;
+    }
+
+    private static boolean validatePdfAWithPreflight(PDDocument document, String version) {
+        if (document == null || document.isEncrypted()) {
+            return false;
+        }
+
+        try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+            document.save(baos);
+
+            try (RandomAccessReadBuffer source = new RandomAccessReadBuffer(baos.toByteArray())) {
+                PreflightParser parser = new PreflightParser(source);
+
+                try (PDDocument parsedDocument = parser.parse()) {
+                    if (!(parsedDocument instanceof PreflightDocument preflightDocument)) {
+                        log.debug(
+                                "Parsed document is not a PreflightDocument; unable to validate claimed PDF/A {}",
+                                version);
+                        return false;
+                    }
+
+                    try {
+                        ValidationResult result = preflightDocument.validate();
+                        if (!result.isValid() && log.isDebugEnabled()) {
+                            log.debug(
+                                    "PDF/A validation found {} errors for claimed version {}",
+                                    result.getErrorsList().size(),
+                                    version);
+                            int logged = 0;
+                            for (ValidationResult.ValidationError error : result.getErrorsList()) {
+                                log.debug(
+                                        "  Error {}: {}", error.getErrorCode(), error.getDetails());
+                                if (++logged >= 5) {
+                                    break;
+                                }
+                            }
+                        }
+                        return result.isValid();
+                    } catch (ValidationException e) {
+                        log.debug(
+                                "Validation exception during PDF/A validation: {}", e.getMessage());
+                    }
+                } catch (SyntaxValidationException e) {
+                    log.debug(
+                            "Syntax validation failed during PDF/A validation: {}", e.getMessage());
+                    return false;
+                }
+            }
+        } catch (IOException e) {
+            log.debug("IOException during PDF/A validation: {}", e.getMessage());
+        } catch (Exception e) {
+            log.debug("Unexpected error during PDF/A validation: {}", e.getMessage());
+        }
+
+        return false;
+    }
+
+    private static boolean checkStandardInMetadata(PDDocument document, String standardKeyword) {
         // Check XMP Metadata
         try {
             PDMetadata pdMetadata = document.getDocumentCatalog().getMetadata();
@@ -191,11 +287,197 @@ public class GetInfoOnPDF {
         return false;
     }
 
+    /**
+     * Generates structured summary data about the PDF highlighting its unique characteristics such
+     * as encryption status, permission restrictions, and standards compliance.
+     *
+     * @param document The PDF document to analyze
+     * @return An ObjectNode containing structured summary data
+     */
+    private ObjectNode generatePDFSummaryData(
+            PDDocument document, String pdfaConformanceLevel, Boolean pdfaValidationPassed) {
+        ObjectNode summaryData = objectMapper.createObjectNode();
+
+        // Check if encrypted
+        if (document.isEncrypted()) {
+            summaryData.put("encrypted", true);
+        }
+
+        // Check permissions
+        AccessPermission ap = document.getCurrentAccessPermission();
+        ArrayNode restrictedPermissions = objectMapper.createArrayNode();
+
+        if (!ap.canAssembleDocument()) restrictedPermissions.add("document assembly");
+        if (!ap.canExtractContent()) restrictedPermissions.add("content extraction");
+        if (!ap.canExtractForAccessibility()) restrictedPermissions.add("accessibility extraction");
+        if (!ap.canFillInForm()) restrictedPermissions.add("form filling");
+        if (!ap.canModify()) restrictedPermissions.add("modification");
+        if (!ap.canModifyAnnotations()) restrictedPermissions.add("annotation modification");
+        if (!ap.canPrint()) restrictedPermissions.add("printing");
+
+        if (!restrictedPermissions.isEmpty()) {
+            summaryData.set("restrictedPermissions", restrictedPermissions);
+            summaryData.put("restrictedPermissionsCount", restrictedPermissions.size());
+        }
+
+        // Check standard compliance
+        if (pdfaConformanceLevel != null) {
+            summaryData.put("standardCompliance", "PDF/A-" + pdfaConformanceLevel);
+            summaryData.put("standardPurpose", "long-term archiving");
+            if (pdfaValidationPassed != null) {
+                summaryData.put("standardValidationPassed", pdfaValidationPassed);
+            }
+        } else if (checkForStandard(document, "PDF/X")) {
+            summaryData.put("standardCompliance", "PDF/X");
+            summaryData.put("standardPurpose", "graphic exchange");
+        } else if (checkForStandard(document, "PDF/UA")) {
+            summaryData.put("standardCompliance", "PDF/UA");
+            summaryData.put("standardPurpose", "universal accessibility");
+        } else if (checkForStandard(document, "PDF/E")) {
+            summaryData.put("standardCompliance", "PDF/E");
+            summaryData.put("standardPurpose", "engineering workflows");
+        } else if (checkForStandard(document, "PDF/VT")) {
+            summaryData.put("standardCompliance", "PDF/VT");
+            summaryData.put("standardPurpose", "variable and transactional printing");
+        }
+
+        return summaryData;
+    }
+
+    private static void setNodePermissions(PDDocument pdfBoxDoc, ObjectNode permissionsNode) {
+        AccessPermission ap = pdfBoxDoc.getCurrentAccessPermission();
+
+        permissionsNode.put("Document Assembly", getPermissionState(ap.canAssembleDocument()));
+        permissionsNode.put("Extracting Content", getPermissionState(ap.canExtractContent()));
+        permissionsNode.put(
+                "Extracting for accessibility",
+                getPermissionState(ap.canExtractForAccessibility()));
+        permissionsNode.put("Form Filling", getPermissionState(ap.canFillInForm()));
+        permissionsNode.put("Modifying", getPermissionState(ap.canModify()));
+        permissionsNode.put("Modifying annotations", getPermissionState(ap.canModifyAnnotations()));
+        permissionsNode.put("Printing", getPermissionState(ap.canPrint()));
+    }
+
+    private static String getPermissionState(boolean state) {
+        return state ? "Allowed" : "Not Allowed";
+    }
+
+    public static String getPageOrientation(double width, double height) {
+        if (width > height) {
+            return "Landscape";
+        } else if (height > width) {
+            return "Portrait";
+        } else {
+            return "Square";
+        }
+    }
+
+    public static String getPageSize(float width, float height) {
+        // Define standard page sizes
+        Map<String, PDRectangle> standardSizes = new HashMap<>();
+        standardSizes.put("Letter", PDRectangle.LETTER);
+        standardSizes.put("LEGAL", PDRectangle.LEGAL);
+        standardSizes.put("A0", PDRectangle.A0);
+        standardSizes.put("A1", PDRectangle.A1);
+        standardSizes.put("A2", PDRectangle.A2);
+        standardSizes.put("A3", PDRectangle.A3);
+        standardSizes.put("A4", PDRectangle.A4);
+        standardSizes.put("A5", PDRectangle.A5);
+        standardSizes.put("A6", PDRectangle.A6);
+
+        for (Map.Entry<String, PDRectangle> entry : standardSizes.entrySet()) {
+            PDRectangle size = entry.getValue();
+            if (isCloseToSize(width, height, size.getWidth(), size.getHeight())) {
+                return entry.getKey();
+            }
+        }
+        return "Custom";
+    }
+
+    private static boolean isCloseToSize(
+            float width, float height, float standardWidth, float standardHeight) {
+        float tolerance = 1.0f; // You can adjust the tolerance as needed
+        return Math.abs(width - standardWidth) <= tolerance
+                && Math.abs(height - standardHeight) <= tolerance;
+    }
+
+    private static void setDimensionInfo(ObjectNode dimensionInfo, float width, float height) {
+        float ppi = 72; // Points Per Inch
+
+        float widthInInches = width / ppi;
+        float heightInInches = height / ppi;
+
+        float widthInCm = widthInInches * 2.54f;
+        float heightInCm = heightInInches * 2.54f;
+
+        dimensionInfo.put("Width (px)", String.format("%.2f", width));
+        dimensionInfo.put("Height (px)", String.format("%.2f", height));
+        dimensionInfo.put("Width (in)", String.format("%.2f", widthInInches));
+        dimensionInfo.put("Height (in)", String.format("%.2f", heightInInches));
+        dimensionInfo.put("Width (cm)", String.format("%.2f", widthInCm));
+        dimensionInfo.put("Height (cm)", String.format("%.2f", heightInCm));
+    }
+
+    private static ArrayNode exploreStructureTree(List<Object> nodes) {
+        ArrayNode elementsArray = objectMapper.createArrayNode();
+        if (nodes != null) {
+            for (Object obj : nodes) {
+                if (obj instanceof PDStructureNode node) {
+                    ObjectNode elementNode = objectMapper.createObjectNode();
+
+                    if (node instanceof PDStructureElement structureElement) {
+                        elementNode.put("Type", structureElement.getStructureType());
+                        elementNode.put("Content", getContent(structureElement));
+
+                        // Recursively explore child elements
+                        ArrayNode childElements = exploreStructureTree(structureElement.getKids());
+                        if (!childElements.isEmpty()) {
+                            elementNode.set("Children", childElements);
+                        }
+                    }
+                    elementsArray.add(elementNode);
+                }
+            }
+        }
+        return elementsArray;
+    }
+
+    private static String getContent(PDStructureElement structureElement) {
+        StringBuilder contentBuilder = new StringBuilder();
+
+        for (Object item : structureElement.getKids()) {
+            if (item instanceof COSString cosString) {
+                contentBuilder.append(cosString.getString());
+            } else if (item instanceof PDStructureElement pdstructureelement) {
+                // For simplicity, we're handling only COSString and PDStructureElement here
+                // but a more comprehensive method would handle other types too
+                contentBuilder.append(getContent(pdstructureelement));
+            }
+        }
+
+        return contentBuilder.toString();
+    }
+
+    private static String formatDate(Calendar calendar) {
+        if (calendar != null) {
+            DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+            ZonedDateTime zonedDateTime =
+                    ZonedDateTime.ofInstant(calendar.toInstant(), ZoneId.systemDefault());
+            return zonedDateTime.format(formatter);
+        } else {
+            return null;
+        }
+    }
+
     @PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/get-info-on-pdf")
     @Operation(summary = "Summary here", description = "desc. Input:PDF Output:JSON Type:SISO")
     public ResponseEntity<byte[]> getPdfInfo(@ModelAttribute PDFFile request) throws IOException {
         MultipartFile inputFile = request.getFileInput();
         boolean readonly = true;
+        final String pagePrefix = "Page ";
+        final int prefixLength = pagePrefix.length();
+        StringBuilder keyBuilder = new StringBuilder(prefixLength + 8);
+        keyBuilder.append(pagePrefix);
         try (PDDocument pdfBoxDoc = pdfDocumentFactory.load(inputFile, readonly)) {
             ObjectMapper objectMapper = new ObjectMapper();
             ObjectNode jsonOutput = objectMapper.createObjectNode();
@@ -267,8 +549,15 @@ public class GetInfoOnPDF {
             }
             jsonOutput.set("FormFields", formFieldsNode);
 
+            String pdfaConformanceLevel = getPdfAConformanceLevel(pdfBoxDoc);
+            Boolean pdfaValidationPassed = null;
+            if (pdfaConformanceLevel != null) {
+                pdfaValidationPassed = validatePdfAWithPreflight(pdfBoxDoc, pdfaConformanceLevel);
+            }
+
             // Generate structured summary data about PDF characteristics
-            ObjectNode summaryData = generatePDFSummaryData(pdfBoxDoc);
+            ObjectNode summaryData =
+                    generatePDFSummaryData(pdfBoxDoc, pdfaConformanceLevel, pdfaValidationPassed);
             if (summaryData != null && !summaryData.isEmpty()) {
                 jsonOutput.set("SummaryData", summaryData);
             }
@@ -373,7 +662,7 @@ public class GetInfoOnPDF {
                 log.error("exception", e);
             }
 
-            boolean isPdfACompliant = checkForStandard(pdfBoxDoc, "PDF/A");
+            boolean isPdfACompliant = pdfaConformanceLevel != null;
             boolean isPdfXCompliant = checkForStandard(pdfBoxDoc, "PDF/X");
             boolean isPdfECompliant = checkForStandard(pdfBoxDoc, "PDF/E");
             boolean isPdfVTCompliant = checkForStandard(pdfBoxDoc, "PDF/VT");
@@ -390,6 +679,12 @@ public class GetInfoOnPDF {
             // development in 2021.
 
             compliancy.put("IsPDF/ACompliant", isPdfACompliant);
+            if (pdfaConformanceLevel != null) {
+                compliancy.put("PDF/AConformanceLevel", pdfaConformanceLevel);
+                if (pdfaValidationPassed != null) {
+                    compliancy.put("IsPDF/AValidated", pdfaValidationPassed);
+                }
+            }
             compliancy.put("IsPDF/XCompliant", isPdfXCompliant);
             compliancy.put("IsPDF/ECompliant", isPdfECompliant);
             compliancy.put("IsPDF/VTCompliant", isPdfVTCompliant);
@@ -466,7 +761,7 @@ public class GetInfoOnPDF {
 
                 ObjectNode sizeInfo = objectMapper.createObjectNode();
 
-                getDimensionInfo(sizeInfo, width, height);
+                setDimensionInfo(sizeInfo, width, height);
 
                 sizeInfo.put("Standard Page", getPageSize(width, height));
                 pageInfo.set("Size", sizeInfo);
@@ -552,11 +847,10 @@ public class GetInfoOnPDF {
                 Set<String> uniqueURIs = new HashSet<>(); // To store unique URIs
 
                 for (PDAnnotation annotation : annotations) {
-                    if (annotation instanceof PDAnnotationLink linkAnnotation) {
-                        if (linkAnnotation.getAction() instanceof PDActionURI uriAction) {
-                            String uri = uriAction.getURI();
-                            uniqueURIs.add(uri); // Add to set to ensure uniqueness
-                        }
+                    if (annotation instanceof PDAnnotationLink linkAnnotation
+                            && linkAnnotation.getAction() instanceof PDActionURI uriAction) {
+                        String uri = uriAction.getURI();
+                        uniqueURIs.add(uri); // Add to set to ensure uniqueness
                     }
                 }
 
@@ -679,8 +973,10 @@ public class GetInfoOnPDF {
                 }
 
                 pageInfo.set("Multimedia", multimediaArray);
+                keyBuilder.setLength(prefixLength);
+                keyBuilder.append(pageNum + 1);
 
-                pageInfoParent.set("Page " + (pageNum + 1), pageInfo);
+                pageInfoParent.set(keyBuilder.toString(), pageInfo);
             }
 
             jsonOutput.set("BasicInfo", basicInfo);
@@ -706,133 +1002,11 @@ public class GetInfoOnPDF {
         return null;
     }
 
-    private void setNodePermissions(PDDocument pdfBoxDoc, ObjectNode permissionsNode) {
-        AccessPermission ap = pdfBoxDoc.getCurrentAccessPermission();
-
-        permissionsNode.put("Document Assembly", getPermissionState(ap.canAssembleDocument()));
-        permissionsNode.put("Extracting Content", getPermissionState(ap.canExtractContent()));
-        permissionsNode.put(
-                "Extracting for accessibility",
-                getPermissionState(ap.canExtractForAccessibility()));
-        permissionsNode.put("Form Filling", getPermissionState(ap.canFillInForm()));
-        permissionsNode.put("Modifying", getPermissionState(ap.canModify()));
-        permissionsNode.put("Modifying annotations", getPermissionState(ap.canModifyAnnotations()));
-        permissionsNode.put("Printing", getPermissionState(ap.canPrint()));
-    }
-
-    private String getPermissionState(boolean state) {
-        return state ? "Allowed" : "Not Allowed";
-    }
-
-    public String getPageOrientation(double width, double height) {
-        if (width > height) {
-            return "Landscape";
-        } else if (height > width) {
-            return "Portrait";
-        } else {
-            return "Square";
-        }
-    }
-
-    public String getPageSize(float width, float height) {
-        // Define standard page sizes
-        Map<String, PDRectangle> standardSizes = new HashMap<>();
-        standardSizes.put("Letter", PDRectangle.LETTER);
-        standardSizes.put("LEGAL", PDRectangle.LEGAL);
-        standardSizes.put("A0", PDRectangle.A0);
-        standardSizes.put("A1", PDRectangle.A1);
-        standardSizes.put("A2", PDRectangle.A2);
-        standardSizes.put("A3", PDRectangle.A3);
-        standardSizes.put("A4", PDRectangle.A4);
-        standardSizes.put("A5", PDRectangle.A5);
-        standardSizes.put("A6", PDRectangle.A6);
-
-        for (Map.Entry<String, PDRectangle> entry : standardSizes.entrySet()) {
-            PDRectangle size = entry.getValue();
-            if (isCloseToSize(width, height, size.getWidth(), size.getHeight())) {
-                return entry.getKey();
-            }
-        }
-        return "Custom";
-    }
-
-    private boolean isCloseToSize(
-            float width, float height, float standardWidth, float standardHeight) {
-        float tolerance = 1.0f; // You can adjust the tolerance as needed
-        return Math.abs(width - standardWidth) <= tolerance
-                && Math.abs(height - standardHeight) <= tolerance;
-    }
-
-    public ObjectNode getDimensionInfo(ObjectNode dimensionInfo, float width, float height) {
-        float ppi = 72; // Points Per Inch
-
-        float widthInInches = width / ppi;
-        float heightInInches = height / ppi;
-
-        float widthInCm = widthInInches * 2.54f;
-        float heightInCm = heightInInches * 2.54f;
-
-        dimensionInfo.put("Width (px)", String.format("%.2f", width));
-        dimensionInfo.put("Height (px)", String.format("%.2f", height));
-        dimensionInfo.put("Width (in)", String.format("%.2f", widthInInches));
-        dimensionInfo.put("Height (in)", String.format("%.2f", heightInInches));
-        dimensionInfo.put("Width (cm)", String.format("%.2f", widthInCm));
-        dimensionInfo.put("Height (cm)", String.format("%.2f", heightInCm));
-        return dimensionInfo;
-    }
-
-    public ArrayNode exploreStructureTree(List<Object> nodes) {
-        ArrayNode elementsArray = objectMapper.createArrayNode();
-        if (nodes != null) {
-            for (Object obj : nodes) {
-                if (obj instanceof PDStructureNode node) {
-                    ObjectNode elementNode = objectMapper.createObjectNode();
-
-                    if (node instanceof PDStructureElement structureElement) {
-                        elementNode.put("Type", structureElement.getStructureType());
-                        elementNode.put("Content", getContent(structureElement));
-
-                        // Recursively explore child elements
-                        ArrayNode childElements = exploreStructureTree(structureElement.getKids());
-                        if (!childElements.isEmpty()) {
-                            elementNode.set("Children", childElements);
-                        }
-                    }
-                    elementsArray.add(elementNode);
-                }
-            }
-        }
-        return elementsArray;
-    }
-
-    public String getContent(PDStructureElement structureElement) {
-        StringBuilder contentBuilder = new StringBuilder();
-
-        for (Object item : structureElement.getKids()) {
-            if (item instanceof COSString cosString) {
-                contentBuilder.append(cosString.getString());
-            } else if (item instanceof PDStructureElement) {
-                // For simplicity, we're handling only COSString and PDStructureElement here
-                // but a more comprehensive method would handle other types too
-                contentBuilder.append(getContent((PDStructureElement) item));
-            }
-        }
-
-        return contentBuilder.toString();
-    }
-
-    private String formatDate(Calendar calendar) {
-        if (calendar != null) {
-            DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
-            ZonedDateTime zonedDateTime =
-                    ZonedDateTime.ofInstant(calendar.toInstant(), ZoneId.systemDefault());
-            return zonedDateTime.format(formatter);
-        } else {
-            return null;
-        }
-    }
-
-    private String getPageModeDescription(String pageMode) {
-        return pageMode != null ? pageMode.toString().replaceFirst("/", "") : "Unknown";
+    private static String getPageModeDescription(String pageMode) {
+        if (pageMode == null) return "Unknown";
+        return RegexPatternUtils.getInstance()
+                .getPageModePattern()
+                .matcher(pageMode)
+                .replaceFirst("");
     }
 }