From 575854952a39809023e7e487ad7916c921e83a4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Sz=C3=BCcs?= <127139797+balazs-szucs@users.noreply.github.com> Date: Sat, 4 Oct 2025 11:22:29 +0200 Subject: [PATCH] feat(get-info-on-pdf): use PDFBox preflight to validate PDF compliancy level, and parse in compliancy type (#4595) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description of Changes - Refactored methods for parsing and extracting PDF/A conformance levels from XMP metadata. - Implemented PDF/A validation using Preflight from Apache PDFBox. - Enhanced PDF information generation to include PDF/A conformance level and validation results. - Updated compliance checks and JSON output to reflect new PDF/A capabilities. ### Test files: [lorem-ipsum_PDFA1b.pdf](https://github.com/user-attachments/files/22687689/lorem-ipsum_PDFA1b.pdf) [lorem-ipsum_PDFA_2b.pdf](https://github.com/user-attachments/files/22687692/lorem-ipsum_PDFA_2b.pdf) [lorem-ipsum_PD⁄A3a.pdf](https://github.com/user-attachments/files/22687693/lorem-ipsum_PD.A3a.pdf) ### New results: image image image I also validated with online tools. Should be good now! I was also thinking moving this to GeneralUtils; it may be useful for PDF/A converter in the future, or for other features. Not sure yet, for now I think this is good for now. Closes #4568 --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [x] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [x] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Signed-off-by: Balázs Szücs --- .../common/util/RegexPatternUtils.java | 15 +- .../controller/api/security/GetInfoOnPDF.java | 548 ++++++++++++------ 2 files changed, 375 insertions(+), 188 deletions(-) diff --git a/app/common/src/main/java/stirling/software/common/util/RegexPatternUtils.java b/app/common/src/main/java/stirling/software/common/util/RegexPatternUtils.java index 4d119e179..858ad0605 100644 --- a/app/common/src/main/java/stirling/software/common/util/RegexPatternUtils.java +++ b/app/common/src/main/java/stirling/software/common/util/RegexPatternUtils.java @@ -447,7 +447,20 @@ public final class RegexPatternUtils { return getPattern("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)"); } - // API doc parsing patterns + /** Pattern for matching pdfaid:part attribute in XMP metadata */ + public Pattern getPdfAidPartPattern() { + return getPattern("pdfaid:part[\"\\s]*=[\"\\s]*([0-9]+)"); + } + + /** Pattern for matching pdfaid:conformance attribute in XMP metadata */ + public Pattern getPdfAidConformancePattern() { + return getPattern("pdfaid:conformance[\"\\s]*=[\"\\s]*([A-Za-z]+)"); + } + + /** Pattern for matching slash in page mode description */ + public Pattern getPageModePattern() { + return getPattern("/"); + } /** * Pre-compile commonly used patterns for immediate availability. This eliminates first-call diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java index e92cf1dd6..1e9038380 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java @@ -7,10 +7,13 @@ import java.time.ZoneId; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.pdfbox.cos.COSInputStream; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSString; +import org.apache.pdfbox.io.RandomAccessReadBuffer; import org.apache.pdfbox.pdmodel.*; import org.apache.pdfbox.pdmodel.common.PDMetadata; import org.apache.pdfbox.pdmodel.common.PDRectangle; @@ -40,8 +43,14 @@ import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlin import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineNode; import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; import org.apache.pdfbox.pdmodel.interactive.form.PDField; +import org.apache.pdfbox.preflight.PreflightDocument; +import org.apache.pdfbox.preflight.ValidationResult; +import org.apache.pdfbox.preflight.exception.SyntaxValidationException; +import org.apache.pdfbox.preflight.exception.ValidationException; +import org.apache.pdfbox.preflight.parser.PreflightParser; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.xmpbox.XMPMetadata; +import org.apache.xmpbox.schema.PDFAIdentificationSchema; import org.apache.xmpbox.xml.DomXmpParser; import org.apache.xmpbox.xml.XmpParsingException; import org.apache.xmpbox.xml.XmpSerializer; @@ -95,60 +104,147 @@ public class GetInfoOnPDF { } } - /** - * Generates structured summary data about the PDF highlighting its unique characteristics such - * as encryption status, permission restrictions, and standards compliance. - * - * @param document The PDF document to analyze - * @return An ObjectNode containing structured summary data - */ - private ObjectNode generatePDFSummaryData(PDDocument document) { - ObjectNode summaryData = objectMapper.createObjectNode(); - - // Check if encrypted - if (document.isEncrypted()) { - summaryData.put("encrypted", true); + public static boolean checkForStandard(PDDocument document, String standardKeyword) { + if ("PDF/A".equalsIgnoreCase(standardKeyword)) { + return getPdfAConformanceLevel(document) != null; } - // Check permissions - AccessPermission ap = document.getCurrentAccessPermission(); - ArrayNode restrictedPermissions = objectMapper.createArrayNode(); - - if (!ap.canAssembleDocument()) restrictedPermissions.add("document assembly"); - if (!ap.canExtractContent()) restrictedPermissions.add("content extraction"); - if (!ap.canExtractForAccessibility()) restrictedPermissions.add("accessibility extraction"); - if (!ap.canFillInForm()) restrictedPermissions.add("form filling"); - if (!ap.canModify()) restrictedPermissions.add("modification"); - if (!ap.canModifyAnnotations()) restrictedPermissions.add("annotation modification"); - if (!ap.canPrint()) restrictedPermissions.add("printing"); - - if (!restrictedPermissions.isEmpty()) { - summaryData.set("restrictedPermissions", restrictedPermissions); - summaryData.put("restrictedPermissionsCount", restrictedPermissions.size()); - } - - // Check standard compliance - if (checkForStandard(document, "PDF/A")) { - summaryData.put("standardCompliance", "PDF/A"); - summaryData.put("standardPurpose", "long-term archiving"); - } else if (checkForStandard(document, "PDF/X")) { - summaryData.put("standardCompliance", "PDF/X"); - summaryData.put("standardPurpose", "graphic exchange"); - } else if (checkForStandard(document, "PDF/UA")) { - summaryData.put("standardCompliance", "PDF/UA"); - summaryData.put("standardPurpose", "universal accessibility"); - } else if (checkForStandard(document, "PDF/E")) { - summaryData.put("standardCompliance", "PDF/E"); - summaryData.put("standardPurpose", "engineering workflows"); - } else if (checkForStandard(document, "PDF/VT")) { - summaryData.put("standardCompliance", "PDF/VT"); - summaryData.put("standardPurpose", "variable and transactional printing"); - } - - return summaryData; + return checkStandardInMetadata(document, standardKeyword); } - public static boolean checkForStandard(PDDocument document, String standardKeyword) { + public static String getPdfAConformanceLevel(PDDocument document) { + if (document == null || document.isEncrypted()) { + return null; + } + + return getPdfAVersionFromMetadata(document); + } + + private static String getPdfAVersionFromMetadata(PDDocument document) { + try { + PDMetadata pdMetadata = document.getDocumentCatalog().getMetadata(); + if (pdMetadata != null) { + try (COSInputStream metaStream = pdMetadata.createInputStream()) { + DomXmpParser domXmpParser = new DomXmpParser(); + XMPMetadata xmpMeta = domXmpParser.parse(metaStream); + + PDFAIdentificationSchema pdfId = xmpMeta.getPDFAIdentificationSchema(); + if (pdfId != null) { + Integer part = pdfId.getPart(); + String conformance = pdfId.getConformance(); + + if (part != null && conformance != null) { + return part + conformance.toUpperCase(Locale.ROOT); + } + } else { + try (COSInputStream rawStream = pdMetadata.createInputStream()) { + byte[] metadataBytes = rawStream.readAllBytes(); + String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8); + String extracted = extractPdfAVersionFromRawXml(rawMetadata); + if (extracted != null) { + return extracted; + } + } + } + } catch (XmpParsingException e) { + log.debug("XMP parsing failed, trying raw metadata search: {}", e.getMessage()); + try (COSInputStream metaStream = pdMetadata.createInputStream()) { + byte[] metadataBytes = metaStream.readAllBytes(); + String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8); + String extracted = extractPdfAVersionFromRawXml(rawMetadata); + if (extracted != null) { + return extracted; + } + } + } + } + } catch (Exception e) { + log.debug("Error reading PDF/A metadata: {}", e.getMessage()); + } + + return null; + } + + private static String extractPdfAVersionFromRawXml(String rawXml) { + if (rawXml == null || rawXml.isEmpty()) { + return null; + } + + try { + Pattern partPattern = RegexPatternUtils.getInstance().getPdfAidPartPattern(); + Pattern confPattern = RegexPatternUtils.getInstance().getPdfAidConformancePattern(); + + Matcher partMatcher = partPattern.matcher(rawXml); + Matcher confMatcher = confPattern.matcher(rawXml); + + if (partMatcher.find() && confMatcher.find()) { + String part = partMatcher.group(1); + String conformance = confMatcher.group(1).toUpperCase(Locale.ROOT); + return part + conformance; + } + } catch (Exception e) { + log.debug("Error parsing raw XMP for PDF/A version: {}", e.getMessage()); + } + + return null; + } + + private static boolean validatePdfAWithPreflight(PDDocument document, String version) { + if (document == null || document.isEncrypted()) { + return false; + } + + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + document.save(baos); + + try (RandomAccessReadBuffer source = new RandomAccessReadBuffer(baos.toByteArray())) { + PreflightParser parser = new PreflightParser(source); + + try (PDDocument parsedDocument = parser.parse()) { + if (!(parsedDocument instanceof PreflightDocument preflightDocument)) { + log.debug( + "Parsed document is not a PreflightDocument; unable to validate claimed PDF/A {}", + version); + return false; + } + + try { + ValidationResult result = preflightDocument.validate(); + if (!result.isValid() && log.isDebugEnabled()) { + log.debug( + "PDF/A validation found {} errors for claimed version {}", + result.getErrorsList().size(), + version); + int logged = 0; + for (ValidationResult.ValidationError error : result.getErrorsList()) { + log.debug( + " Error {}: {}", error.getErrorCode(), error.getDetails()); + if (++logged >= 5) { + break; + } + } + } + return result.isValid(); + } catch (ValidationException e) { + log.debug( + "Validation exception during PDF/A validation: {}", e.getMessage()); + } + } catch (SyntaxValidationException e) { + log.debug( + "Syntax validation failed during PDF/A validation: {}", e.getMessage()); + return false; + } + } + } catch (IOException e) { + log.debug("IOException during PDF/A validation: {}", e.getMessage()); + } catch (Exception e) { + log.debug("Unexpected error during PDF/A validation: {}", e.getMessage()); + } + + return false; + } + + private static boolean checkStandardInMetadata(PDDocument document, String standardKeyword) { // Check XMP Metadata try { PDMetadata pdMetadata = document.getDocumentCatalog().getMetadata(); @@ -191,11 +287,197 @@ public class GetInfoOnPDF { return false; } + /** + * Generates structured summary data about the PDF highlighting its unique characteristics such + * as encryption status, permission restrictions, and standards compliance. + * + * @param document The PDF document to analyze + * @return An ObjectNode containing structured summary data + */ + private ObjectNode generatePDFSummaryData( + PDDocument document, String pdfaConformanceLevel, Boolean pdfaValidationPassed) { + ObjectNode summaryData = objectMapper.createObjectNode(); + + // Check if encrypted + if (document.isEncrypted()) { + summaryData.put("encrypted", true); + } + + // Check permissions + AccessPermission ap = document.getCurrentAccessPermission(); + ArrayNode restrictedPermissions = objectMapper.createArrayNode(); + + if (!ap.canAssembleDocument()) restrictedPermissions.add("document assembly"); + if (!ap.canExtractContent()) restrictedPermissions.add("content extraction"); + if (!ap.canExtractForAccessibility()) restrictedPermissions.add("accessibility extraction"); + if (!ap.canFillInForm()) restrictedPermissions.add("form filling"); + if (!ap.canModify()) restrictedPermissions.add("modification"); + if (!ap.canModifyAnnotations()) restrictedPermissions.add("annotation modification"); + if (!ap.canPrint()) restrictedPermissions.add("printing"); + + if (!restrictedPermissions.isEmpty()) { + summaryData.set("restrictedPermissions", restrictedPermissions); + summaryData.put("restrictedPermissionsCount", restrictedPermissions.size()); + } + + // Check standard compliance + if (pdfaConformanceLevel != null) { + summaryData.put("standardCompliance", "PDF/A-" + pdfaConformanceLevel); + summaryData.put("standardPurpose", "long-term archiving"); + if (pdfaValidationPassed != null) { + summaryData.put("standardValidationPassed", pdfaValidationPassed); + } + } else if (checkForStandard(document, "PDF/X")) { + summaryData.put("standardCompliance", "PDF/X"); + summaryData.put("standardPurpose", "graphic exchange"); + } else if (checkForStandard(document, "PDF/UA")) { + summaryData.put("standardCompliance", "PDF/UA"); + summaryData.put("standardPurpose", "universal accessibility"); + } else if (checkForStandard(document, "PDF/E")) { + summaryData.put("standardCompliance", "PDF/E"); + summaryData.put("standardPurpose", "engineering workflows"); + } else if (checkForStandard(document, "PDF/VT")) { + summaryData.put("standardCompliance", "PDF/VT"); + summaryData.put("standardPurpose", "variable and transactional printing"); + } + + return summaryData; + } + + private static void setNodePermissions(PDDocument pdfBoxDoc, ObjectNode permissionsNode) { + AccessPermission ap = pdfBoxDoc.getCurrentAccessPermission(); + + permissionsNode.put("Document Assembly", getPermissionState(ap.canAssembleDocument())); + permissionsNode.put("Extracting Content", getPermissionState(ap.canExtractContent())); + permissionsNode.put( + "Extracting for accessibility", + getPermissionState(ap.canExtractForAccessibility())); + permissionsNode.put("Form Filling", getPermissionState(ap.canFillInForm())); + permissionsNode.put("Modifying", getPermissionState(ap.canModify())); + permissionsNode.put("Modifying annotations", getPermissionState(ap.canModifyAnnotations())); + permissionsNode.put("Printing", getPermissionState(ap.canPrint())); + } + + private static String getPermissionState(boolean state) { + return state ? "Allowed" : "Not Allowed"; + } + + public static String getPageOrientation(double width, double height) { + if (width > height) { + return "Landscape"; + } else if (height > width) { + return "Portrait"; + } else { + return "Square"; + } + } + + public static String getPageSize(float width, float height) { + // Define standard page sizes + Map standardSizes = new HashMap<>(); + standardSizes.put("Letter", PDRectangle.LETTER); + standardSizes.put("LEGAL", PDRectangle.LEGAL); + standardSizes.put("A0", PDRectangle.A0); + standardSizes.put("A1", PDRectangle.A1); + standardSizes.put("A2", PDRectangle.A2); + standardSizes.put("A3", PDRectangle.A3); + standardSizes.put("A4", PDRectangle.A4); + standardSizes.put("A5", PDRectangle.A5); + standardSizes.put("A6", PDRectangle.A6); + + for (Map.Entry entry : standardSizes.entrySet()) { + PDRectangle size = entry.getValue(); + if (isCloseToSize(width, height, size.getWidth(), size.getHeight())) { + return entry.getKey(); + } + } + return "Custom"; + } + + private static boolean isCloseToSize( + float width, float height, float standardWidth, float standardHeight) { + float tolerance = 1.0f; // You can adjust the tolerance as needed + return Math.abs(width - standardWidth) <= tolerance + && Math.abs(height - standardHeight) <= tolerance; + } + + private static void setDimensionInfo(ObjectNode dimensionInfo, float width, float height) { + float ppi = 72; // Points Per Inch + + float widthInInches = width / ppi; + float heightInInches = height / ppi; + + float widthInCm = widthInInches * 2.54f; + float heightInCm = heightInInches * 2.54f; + + dimensionInfo.put("Width (px)", String.format("%.2f", width)); + dimensionInfo.put("Height (px)", String.format("%.2f", height)); + dimensionInfo.put("Width (in)", String.format("%.2f", widthInInches)); + dimensionInfo.put("Height (in)", String.format("%.2f", heightInInches)); + dimensionInfo.put("Width (cm)", String.format("%.2f", widthInCm)); + dimensionInfo.put("Height (cm)", String.format("%.2f", heightInCm)); + } + + private static ArrayNode exploreStructureTree(List nodes) { + ArrayNode elementsArray = objectMapper.createArrayNode(); + if (nodes != null) { + for (Object obj : nodes) { + if (obj instanceof PDStructureNode node) { + ObjectNode elementNode = objectMapper.createObjectNode(); + + if (node instanceof PDStructureElement structureElement) { + elementNode.put("Type", structureElement.getStructureType()); + elementNode.put("Content", getContent(structureElement)); + + // Recursively explore child elements + ArrayNode childElements = exploreStructureTree(structureElement.getKids()); + if (!childElements.isEmpty()) { + elementNode.set("Children", childElements); + } + } + elementsArray.add(elementNode); + } + } + } + return elementsArray; + } + + private static String getContent(PDStructureElement structureElement) { + StringBuilder contentBuilder = new StringBuilder(); + + for (Object item : structureElement.getKids()) { + if (item instanceof COSString cosString) { + contentBuilder.append(cosString.getString()); + } else if (item instanceof PDStructureElement pdstructureelement) { + // For simplicity, we're handling only COSString and PDStructureElement here + // but a more comprehensive method would handle other types too + contentBuilder.append(getContent(pdstructureelement)); + } + } + + return contentBuilder.toString(); + } + + private static String formatDate(Calendar calendar) { + if (calendar != null) { + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + ZonedDateTime zonedDateTime = + ZonedDateTime.ofInstant(calendar.toInstant(), ZoneId.systemDefault()); + return zonedDateTime.format(formatter); + } else { + return null; + } + } + @PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/get-info-on-pdf") @Operation(summary = "Summary here", description = "desc. Input:PDF Output:JSON Type:SISO") public ResponseEntity getPdfInfo(@ModelAttribute PDFFile request) throws IOException { MultipartFile inputFile = request.getFileInput(); boolean readonly = true; + final String pagePrefix = "Page "; + final int prefixLength = pagePrefix.length(); + StringBuilder keyBuilder = new StringBuilder(prefixLength + 8); + keyBuilder.append(pagePrefix); try (PDDocument pdfBoxDoc = pdfDocumentFactory.load(inputFile, readonly)) { ObjectMapper objectMapper = new ObjectMapper(); ObjectNode jsonOutput = objectMapper.createObjectNode(); @@ -267,8 +549,15 @@ public class GetInfoOnPDF { } jsonOutput.set("FormFields", formFieldsNode); + String pdfaConformanceLevel = getPdfAConformanceLevel(pdfBoxDoc); + Boolean pdfaValidationPassed = null; + if (pdfaConformanceLevel != null) { + pdfaValidationPassed = validatePdfAWithPreflight(pdfBoxDoc, pdfaConformanceLevel); + } + // Generate structured summary data about PDF characteristics - ObjectNode summaryData = generatePDFSummaryData(pdfBoxDoc); + ObjectNode summaryData = + generatePDFSummaryData(pdfBoxDoc, pdfaConformanceLevel, pdfaValidationPassed); if (summaryData != null && !summaryData.isEmpty()) { jsonOutput.set("SummaryData", summaryData); } @@ -373,7 +662,7 @@ public class GetInfoOnPDF { log.error("exception", e); } - boolean isPdfACompliant = checkForStandard(pdfBoxDoc, "PDF/A"); + boolean isPdfACompliant = pdfaConformanceLevel != null; boolean isPdfXCompliant = checkForStandard(pdfBoxDoc, "PDF/X"); boolean isPdfECompliant = checkForStandard(pdfBoxDoc, "PDF/E"); boolean isPdfVTCompliant = checkForStandard(pdfBoxDoc, "PDF/VT"); @@ -390,6 +679,12 @@ public class GetInfoOnPDF { // development in 2021. compliancy.put("IsPDF/ACompliant", isPdfACompliant); + if (pdfaConformanceLevel != null) { + compliancy.put("PDF/AConformanceLevel", pdfaConformanceLevel); + if (pdfaValidationPassed != null) { + compliancy.put("IsPDF/AValidated", pdfaValidationPassed); + } + } compliancy.put("IsPDF/XCompliant", isPdfXCompliant); compliancy.put("IsPDF/ECompliant", isPdfECompliant); compliancy.put("IsPDF/VTCompliant", isPdfVTCompliant); @@ -466,7 +761,7 @@ public class GetInfoOnPDF { ObjectNode sizeInfo = objectMapper.createObjectNode(); - getDimensionInfo(sizeInfo, width, height); + setDimensionInfo(sizeInfo, width, height); sizeInfo.put("Standard Page", getPageSize(width, height)); pageInfo.set("Size", sizeInfo); @@ -552,11 +847,10 @@ public class GetInfoOnPDF { Set uniqueURIs = new HashSet<>(); // To store unique URIs for (PDAnnotation annotation : annotations) { - if (annotation instanceof PDAnnotationLink linkAnnotation) { - if (linkAnnotation.getAction() instanceof PDActionURI uriAction) { - String uri = uriAction.getURI(); - uniqueURIs.add(uri); // Add to set to ensure uniqueness - } + if (annotation instanceof PDAnnotationLink linkAnnotation + && linkAnnotation.getAction() instanceof PDActionURI uriAction) { + String uri = uriAction.getURI(); + uniqueURIs.add(uri); // Add to set to ensure uniqueness } } @@ -679,8 +973,10 @@ public class GetInfoOnPDF { } pageInfo.set("Multimedia", multimediaArray); + keyBuilder.setLength(prefixLength); + keyBuilder.append(pageNum + 1); - pageInfoParent.set("Page " + (pageNum + 1), pageInfo); + pageInfoParent.set(keyBuilder.toString(), pageInfo); } jsonOutput.set("BasicInfo", basicInfo); @@ -706,133 +1002,11 @@ public class GetInfoOnPDF { return null; } - private void setNodePermissions(PDDocument pdfBoxDoc, ObjectNode permissionsNode) { - AccessPermission ap = pdfBoxDoc.getCurrentAccessPermission(); - - permissionsNode.put("Document Assembly", getPermissionState(ap.canAssembleDocument())); - permissionsNode.put("Extracting Content", getPermissionState(ap.canExtractContent())); - permissionsNode.put( - "Extracting for accessibility", - getPermissionState(ap.canExtractForAccessibility())); - permissionsNode.put("Form Filling", getPermissionState(ap.canFillInForm())); - permissionsNode.put("Modifying", getPermissionState(ap.canModify())); - permissionsNode.put("Modifying annotations", getPermissionState(ap.canModifyAnnotations())); - permissionsNode.put("Printing", getPermissionState(ap.canPrint())); - } - - private String getPermissionState(boolean state) { - return state ? "Allowed" : "Not Allowed"; - } - - public String getPageOrientation(double width, double height) { - if (width > height) { - return "Landscape"; - } else if (height > width) { - return "Portrait"; - } else { - return "Square"; - } - } - - public String getPageSize(float width, float height) { - // Define standard page sizes - Map standardSizes = new HashMap<>(); - standardSizes.put("Letter", PDRectangle.LETTER); - standardSizes.put("LEGAL", PDRectangle.LEGAL); - standardSizes.put("A0", PDRectangle.A0); - standardSizes.put("A1", PDRectangle.A1); - standardSizes.put("A2", PDRectangle.A2); - standardSizes.put("A3", PDRectangle.A3); - standardSizes.put("A4", PDRectangle.A4); - standardSizes.put("A5", PDRectangle.A5); - standardSizes.put("A6", PDRectangle.A6); - - for (Map.Entry entry : standardSizes.entrySet()) { - PDRectangle size = entry.getValue(); - if (isCloseToSize(width, height, size.getWidth(), size.getHeight())) { - return entry.getKey(); - } - } - return "Custom"; - } - - private boolean isCloseToSize( - float width, float height, float standardWidth, float standardHeight) { - float tolerance = 1.0f; // You can adjust the tolerance as needed - return Math.abs(width - standardWidth) <= tolerance - && Math.abs(height - standardHeight) <= tolerance; - } - - public ObjectNode getDimensionInfo(ObjectNode dimensionInfo, float width, float height) { - float ppi = 72; // Points Per Inch - - float widthInInches = width / ppi; - float heightInInches = height / ppi; - - float widthInCm = widthInInches * 2.54f; - float heightInCm = heightInInches * 2.54f; - - dimensionInfo.put("Width (px)", String.format("%.2f", width)); - dimensionInfo.put("Height (px)", String.format("%.2f", height)); - dimensionInfo.put("Width (in)", String.format("%.2f", widthInInches)); - dimensionInfo.put("Height (in)", String.format("%.2f", heightInInches)); - dimensionInfo.put("Width (cm)", String.format("%.2f", widthInCm)); - dimensionInfo.put("Height (cm)", String.format("%.2f", heightInCm)); - return dimensionInfo; - } - - public ArrayNode exploreStructureTree(List nodes) { - ArrayNode elementsArray = objectMapper.createArrayNode(); - if (nodes != null) { - for (Object obj : nodes) { - if (obj instanceof PDStructureNode node) { - ObjectNode elementNode = objectMapper.createObjectNode(); - - if (node instanceof PDStructureElement structureElement) { - elementNode.put("Type", structureElement.getStructureType()); - elementNode.put("Content", getContent(structureElement)); - - // Recursively explore child elements - ArrayNode childElements = exploreStructureTree(structureElement.getKids()); - if (!childElements.isEmpty()) { - elementNode.set("Children", childElements); - } - } - elementsArray.add(elementNode); - } - } - } - return elementsArray; - } - - public String getContent(PDStructureElement structureElement) { - StringBuilder contentBuilder = new StringBuilder(); - - for (Object item : structureElement.getKids()) { - if (item instanceof COSString cosString) { - contentBuilder.append(cosString.getString()); - } else if (item instanceof PDStructureElement) { - // For simplicity, we're handling only COSString and PDStructureElement here - // but a more comprehensive method would handle other types too - contentBuilder.append(getContent((PDStructureElement) item)); - } - } - - return contentBuilder.toString(); - } - - private String formatDate(Calendar calendar) { - if (calendar != null) { - DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); - ZonedDateTime zonedDateTime = - ZonedDateTime.ofInstant(calendar.toInstant(), ZoneId.systemDefault()); - return zonedDateTime.format(formatter); - } else { - return null; - } - } - - private String getPageModeDescription(String pageMode) { - return pageMode != null ? pageMode.toString().replaceFirst("/", "") : "Unknown"; + private static String getPageModeDescription(String pageMode) { + if (pageMode == null) return "Unknown"; + return RegexPatternUtils.getInstance() + .getPageModePattern() + .matcher(pageMode) + .replaceFirst(""); } }