mirror of
				https://github.com/Frooodle/Stirling-PDF.git
				synced 2025-11-01 01:21:18 +01:00 
			
		
		
		
	all info
This commit is contained in:
		
							parent
							
								
									52a7885f3c
								
							
						
					
					
						commit
						0da9c62ef8
					
				@ -1,8 +1,23 @@
 | 
				
			|||||||
package stirling.software.SPDF.controller.api.security;
 | 
					package stirling.software.SPDF.controller.api.security;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import org.apache.pdfbox.cos.COSArray;
 | 
				
			||||||
 | 
					import org.apache.pdfbox.cos.COSBase;
 | 
				
			||||||
 | 
					import org.apache.pdfbox.cos.COSDictionary;
 | 
				
			||||||
 | 
					import org.apache.pdfbox.cos.COSName;
 | 
				
			||||||
 | 
					import org.apache.pdfbox.cos.COSString;
 | 
				
			||||||
import org.apache.pdfbox.pdmodel.PDDocument;
 | 
					import org.apache.pdfbox.pdmodel.PDDocument;
 | 
				
			||||||
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
 | 
					import org.apache.pdfbox.pdmodel.PDDocumentInformation;
 | 
				
			||||||
 | 
					import org.apache.pdfbox.pdmodel.PDPage;
 | 
				
			||||||
 | 
					import org.apache.pdfbox.pdmodel.common.PDRectangle;
 | 
				
			||||||
 | 
					import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureElement;
 | 
				
			||||||
 | 
					import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureNode;
 | 
				
			||||||
 | 
					import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot;
 | 
				
			||||||
 | 
					import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
 | 
				
			||||||
 | 
					import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
 | 
				
			||||||
 | 
					import org.apache.pdfbox.pdmodel.interactive.form.PDField;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import com.itextpdf.kernel.pdf.PdfObject;
 | 
					import com.itextpdf.kernel.pdf.PdfObject;
 | 
				
			||||||
 | 
					import com.itextpdf.kernel.pdf.PdfOutline;
 | 
				
			||||||
import com.itextpdf.forms.PdfAcroForm;
 | 
					import com.itextpdf.forms.PdfAcroForm;
 | 
				
			||||||
import com.itextpdf.forms.fields.PdfFormField;
 | 
					import com.itextpdf.forms.fields.PdfFormField;
 | 
				
			||||||
import com.itextpdf.kernel.geom.Rectangle;
 | 
					import com.itextpdf.kernel.geom.Rectangle;
 | 
				
			||||||
@ -15,29 +30,64 @@ import com.itextpdf.kernel.pdf.PdfEncryption;
 | 
				
			|||||||
import com.itextpdf.kernel.pdf.PdfReader;
 | 
					import com.itextpdf.kernel.pdf.PdfReader;
 | 
				
			||||||
import com.itextpdf.kernel.pdf.PdfResources;
 | 
					import com.itextpdf.kernel.pdf.PdfResources;
 | 
				
			||||||
import com.itextpdf.kernel.pdf.PdfStream;
 | 
					import com.itextpdf.kernel.pdf.PdfStream;
 | 
				
			||||||
 | 
					import com.itextpdf.kernel.pdf.PdfString;
 | 
				
			||||||
import com.itextpdf.kernel.pdf.PdfName;
 | 
					import com.itextpdf.kernel.pdf.PdfName;
 | 
				
			||||||
import com.itextpdf.kernel.pdf.PdfViewerPreferences;
 | 
					import com.itextpdf.kernel.pdf.PdfViewerPreferences;
 | 
				
			||||||
import com.itextpdf.kernel.pdf.PdfWriter;
 | 
					import com.itextpdf.kernel.pdf.PdfWriter;
 | 
				
			||||||
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
 | 
					import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
 | 
				
			||||||
import com.itextpdf.kernel.pdf.annot.PdfFileAttachmentAnnotation;
 | 
					import com.itextpdf.kernel.pdf.annot.PdfFileAttachmentAnnotation;
 | 
				
			||||||
import com.itextpdf.kernel.pdf.annot.PdfLinkAnnotation;
 | 
					import com.itextpdf.kernel.pdf.annot.PdfLinkAnnotation;
 | 
				
			||||||
 | 
					import com.itextpdf.kernel.pdf.annot.PdfWidgetAnnotation;
 | 
				
			||||||
import com.itextpdf.kernel.pdf.layer.PdfLayer;
 | 
					import com.itextpdf.kernel.pdf.layer.PdfLayer;
 | 
				
			||||||
import com.itextpdf.kernel.pdf.layer.PdfOCProperties;
 | 
					import com.itextpdf.kernel.pdf.layer.PdfOCProperties;
 | 
				
			||||||
 | 
					import com.itextpdf.kernel.xmp.XMPException;
 | 
				
			||||||
 | 
					import com.itextpdf.kernel.xmp.XMPMeta;
 | 
				
			||||||
 | 
					import com.itextpdf.kernel.xmp.XMPMetaFactory;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import io.swagger.v3.oas.annotations.Operation;
 | 
				
			||||||
 | 
					import io.swagger.v3.oas.annotations.Parameter;
 | 
				
			||||||
 | 
					import io.swagger.v3.oas.annotations.tags.Tag;
 | 
				
			||||||
 | 
					import stirling.software.SPDF.utils.WebResponseUtils;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import com.fasterxml.jackson.databind.ObjectMapper;
 | 
					import com.fasterxml.jackson.databind.ObjectMapper;
 | 
				
			||||||
import com.fasterxml.jackson.databind.node.ArrayNode;
 | 
					import com.fasterxml.jackson.databind.node.ArrayNode;
 | 
				
			||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
 | 
					import com.fasterxml.jackson.databind.node.ObjectNode;
 | 
				
			||||||
import org.apache.pdfbox.text.PDFTextStripper;
 | 
					import org.apache.pdfbox.text.PDFTextStripper;
 | 
				
			||||||
 | 
					import org.springframework.http.MediaType;
 | 
				
			||||||
 | 
					import org.springframework.http.ResponseEntity;
 | 
				
			||||||
 | 
					import org.springframework.web.bind.annotation.PostMapping;
 | 
				
			||||||
 | 
					import org.springframework.web.bind.annotation.RequestPart;
 | 
				
			||||||
 | 
					import org.springframework.web.bind.annotation.RestController;
 | 
				
			||||||
 | 
					import org.springframework.web.multipart.MultipartFile;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import java.io.File;
 | 
					import java.io.File;
 | 
				
			||||||
 | 
					import java.util.HashMap;
 | 
				
			||||||
import java.io.FileWriter;
 | 
					import java.io.FileWriter;
 | 
				
			||||||
 | 
					import java.io.IOException;
 | 
				
			||||||
 | 
					import java.nio.charset.StandardCharsets;
 | 
				
			||||||
import java.text.SimpleDateFormat;
 | 
					import java.text.SimpleDateFormat;
 | 
				
			||||||
import java.util.Calendar;
 | 
					import java.util.Calendar;
 | 
				
			||||||
import java.util.List;
 | 
					import java.util.List;
 | 
				
			||||||
import java.util.Map;
 | 
					import java.util.Map;
 | 
				
			||||||
 | 
					import java.util.Set;
 | 
				
			||||||
 | 
					import java.util.HashSet;
 | 
				
			||||||
 | 
					@RestController
 | 
				
			||||||
 | 
					@Tag(name = "Security", description = "Security APIs")
 | 
				
			||||||
public class PDFExtractor {
 | 
					public class PDFExtractor {
 | 
				
			||||||
    public static void main(String[] args) {
 | 
						
 | 
				
			||||||
        try {
 | 
						static ObjectMapper objectMapper = new ObjectMapper();
 | 
				
			||||||
            PDDocument pdfBoxDoc = PDDocument.load(new File("path_to_pdf.pdf"));
 | 
					
 | 
				
			||||||
 | 
						@PostMapping(consumes = "multipart/form-data", value = "/get-info-on-pdf")
 | 
				
			||||||
 | 
					    @Operation(summary = "Summary here", description = "desc. Input:PDF Output:JSON Type:SISO")
 | 
				
			||||||
 | 
					    public ResponseEntity<byte[]> getPdfInfo(
 | 
				
			||||||
 | 
					            @RequestPart(required = true, value = "fileInput") 
 | 
				
			||||||
 | 
					            @Parameter(description = "The input PDF file to get info on", required = true) MultipartFile inputFile)
 | 
				
			||||||
 | 
					            throws IOException {
 | 
				
			||||||
 | 
							
 | 
				
			||||||
 | 
							try (
 | 
				
			||||||
 | 
								    PDDocument pdfBoxDoc = PDDocument.load(inputFile.getInputStream());
 | 
				
			||||||
 | 
								    PdfDocument itextDoc = new PdfDocument(new PdfReader(inputFile.getInputStream()))
 | 
				
			||||||
 | 
								) {
 | 
				
			||||||
            ObjectMapper objectMapper = new ObjectMapper();
 | 
					            ObjectMapper objectMapper = new ObjectMapper();
 | 
				
			||||||
            ObjectNode jsonOutput = objectMapper.createObjectNode();
 | 
					            ObjectNode jsonOutput = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -55,22 +105,256 @@ public class PDFExtractor {
 | 
				
			|||||||
            metadata.put("Trapped", info.getTrapped());
 | 
					            metadata.put("Trapped", info.getTrapped());
 | 
				
			||||||
            jsonOutput.set("Metadata", metadata);
 | 
					            jsonOutput.set("Metadata", metadata);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            // Total file size of the PDF
 | 
				
			||||||
 | 
					            long fileSizeInBytes = inputFile.getSize();
 | 
				
			||||||
 | 
					            jsonOutput.put("FileSizeInBytes", fileSizeInBytes);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            // Number of words, paragraphs, and images in the entire document
 | 
				
			||||||
 | 
					            String fullText = new PDFTextStripper().getText(pdfBoxDoc);
 | 
				
			||||||
 | 
					            String[] words = fullText.split("\\s+");
 | 
				
			||||||
 | 
					            int wordCount = words.length;
 | 
				
			||||||
 | 
					            int paragraphCount = fullText.split("\r\n|\r|\n").length;
 | 
				
			||||||
 | 
					            jsonOutput.put("WordCount", wordCount);
 | 
				
			||||||
 | 
					            jsonOutput.put("ParagraphCount", paragraphCount);
 | 
				
			||||||
 | 
					            // Number of characters in the entire document (including spaces and special characters)
 | 
				
			||||||
 | 
					            int charCount = fullText.length();
 | 
				
			||||||
 | 
					            jsonOutput.put("CharacterCount", charCount);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            // Initialize the flags and types
 | 
				
			||||||
 | 
					            boolean hasCompression = false;
 | 
				
			||||||
 | 
					            String compressionType = "None";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // Check for object streams
 | 
				
			||||||
 | 
					            for (int i = 1; i <= itextDoc.getNumberOfPdfObjects(); i++) {
 | 
				
			||||||
 | 
					                PdfObject obj = itextDoc.getPdfObject(i);
 | 
				
			||||||
 | 
					                if (obj != null && obj.isStream() && ((PdfStream) obj).get(PdfName.Type) == PdfName.ObjStm) {
 | 
				
			||||||
 | 
					                    hasCompression = true;
 | 
				
			||||||
 | 
					                    compressionType = "Object Streams";
 | 
				
			||||||
 | 
					                    break;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // If not compressed using object streams, check for compressed Xref tables
 | 
				
			||||||
 | 
					            if (!hasCompression && itextDoc.getReader().hasRebuiltXref()) {
 | 
				
			||||||
 | 
					                hasCompression = true;
 | 
				
			||||||
 | 
					                compressionType = "Compressed Xref or Rebuilt Xref";
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            jsonOutput.put("Compression", hasCompression);
 | 
				
			||||||
 | 
					            if(hasCompression)
 | 
				
			||||||
 | 
					            	jsonOutput.put("CompressionType", compressionType);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            String language = pdfBoxDoc.getDocumentCatalog().getLanguage();
 | 
				
			||||||
 | 
					            jsonOutput.put("Language", language);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
            // Document Information using PDFBox
 | 
					            // Document Information using PDFBox
 | 
				
			||||||
            ObjectNode docInfoNode = objectMapper.createObjectNode();
 | 
					            ObjectNode docInfoNode = objectMapper.createObjectNode();
 | 
				
			||||||
            docInfoNode.put("Number of pages", pdfBoxDoc.getNumberOfPages());
 | 
					            docInfoNode.put("Number of pages", pdfBoxDoc.getNumberOfPages());
 | 
				
			||||||
            docInfoNode.put("PDF version", pdfBoxDoc.getVersion());
 | 
					            docInfoNode.put("PDF version", pdfBoxDoc.getVersion());
 | 
				
			||||||
            ;
 | 
					            
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // Page Mode using iText7
 | 
					            // Page Mode using iText7
 | 
				
			||||||
            PdfDocument itextDoc = new PdfDocument(new PdfReader("path_to_pdf.pdf"));
 | 
					 | 
				
			||||||
            PdfCatalog catalog = itextDoc.getCatalog();
 | 
					            PdfCatalog catalog = itextDoc.getCatalog();
 | 
				
			||||||
            PdfName pageMode = catalog.getPdfObject().getAsName(PdfName.PageMode);
 | 
					            PdfName pageMode = catalog.getPdfObject().getAsName(PdfName.PageMode);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            ObjectNode itextDocInfo = objectMapper.createObjectNode();
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            PdfAcroForm acroForm = PdfAcroForm.getAcroForm(itextDoc, false);
 | 
				
			||||||
 | 
					            ObjectNode formFieldsNode = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					            if (acroForm != null) {
 | 
				
			||||||
 | 
					                for (Map.Entry<String, PdfFormField> entry : acroForm.getFormFields().entrySet()) {
 | 
				
			||||||
 | 
					                    formFieldsNode.put(entry.getKey(), entry.getValue().getValueAsString());
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            jsonOutput.set("FormFields", formFieldsNode);
 | 
				
			||||||
 | 
					           
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            //embeed files TODO size
 | 
				
			||||||
 | 
					            ArrayNode embeddedFilesArray = objectMapper.createArrayNode();
 | 
				
			||||||
 | 
					            if(itextDoc.getCatalog().getPdfObject().getAsDictionary(PdfName.Names) != null)
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					            PdfDictionary embeddedFiles = itextDoc.getCatalog().getPdfObject().getAsDictionary(PdfName.Names)
 | 
				
			||||||
 | 
					                    .getAsDictionary(PdfName.EmbeddedFiles);
 | 
				
			||||||
 | 
					            if (embeddedFiles != null) {
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                PdfArray namesArray = embeddedFiles.getAsArray(PdfName.Names);
 | 
				
			||||||
 | 
					                for (int i = 0; i < namesArray.size(); i += 2) {
 | 
				
			||||||
 | 
					                    ObjectNode embeddedFileNode = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					                    embeddedFileNode.put("Name", namesArray.getAsString(i).toString());
 | 
				
			||||||
 | 
					                    // Add other details if required
 | 
				
			||||||
 | 
					                    embeddedFilesArray.add(embeddedFileNode);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            jsonOutput.set("EmbeddedFiles", embeddedFilesArray);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            //attachments TODO size
 | 
				
			||||||
 | 
					            ArrayNode attachmentsArray = objectMapper.createArrayNode();
 | 
				
			||||||
 | 
					            for (int pageNum = 1; pageNum <= itextDoc.getNumberOfPages(); pageNum++) {
 | 
				
			||||||
 | 
					                for (PdfAnnotation annotation : itextDoc.getPage(pageNum).getAnnotations()) {
 | 
				
			||||||
 | 
					                    if (annotation instanceof PdfFileAttachmentAnnotation) {
 | 
				
			||||||
 | 
					                        ObjectNode attachmentNode = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					                        attachmentNode.put("Name", ((PdfFileAttachmentAnnotation) annotation).getName().toString());
 | 
				
			||||||
 | 
					                        attachmentNode.put("Description", annotation.getContents().getValue());
 | 
				
			||||||
 | 
					                        attachmentsArray.add(attachmentNode);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            jsonOutput.set("Attachments", attachmentsArray);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            //Javascript
 | 
				
			||||||
 | 
					            PdfDictionary namesDict = itextDoc.getCatalog().getPdfObject().getAsDictionary(PdfName.Names);
 | 
				
			||||||
 | 
					            ArrayNode javascriptArray = objectMapper.createArrayNode();
 | 
				
			||||||
 | 
					            if (namesDict != null) {
 | 
				
			||||||
 | 
					                PdfDictionary javascriptDict = namesDict.getAsDictionary(PdfName.JavaScript);
 | 
				
			||||||
 | 
					                if (javascriptDict != null) {
 | 
				
			||||||
 | 
					                    
 | 
				
			||||||
 | 
					                    PdfArray namesArray = javascriptDict.getAsArray(PdfName.Names);
 | 
				
			||||||
 | 
					                    for (int i = 0; i < namesArray.size(); i += 2) {
 | 
				
			||||||
 | 
					                        ObjectNode jsNode = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					                        jsNode.put("JS Name", namesArray.getAsString(i).toString());
 | 
				
			||||||
 | 
					                        jsNode.put("JS Code", namesArray.getAsString(i + 1).toString());
 | 
				
			||||||
 | 
					                        javascriptArray.add(jsNode);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            jsonOutput.set("JavaScript", javascriptArray);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            //TODO size
 | 
				
			||||||
 | 
					            PdfOCProperties ocProperties = itextDoc.getCatalog().getOCProperties(false);
 | 
				
			||||||
 | 
					            ArrayNode layersArray = objectMapper.createArrayNode();
 | 
				
			||||||
 | 
					            if (ocProperties != null) {
 | 
				
			||||||
 | 
					               
 | 
				
			||||||
 | 
					                for (PdfLayer layer : ocProperties.getLayers()) {
 | 
				
			||||||
 | 
					                    ObjectNode layerNode = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					                    layerNode.put("Name", layer.getPdfObject().getAsString(PdfName.Name).toString());
 | 
				
			||||||
 | 
					                    layersArray.add(layerNode);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            jsonOutput.set("Layers", layersArray);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            //TODO Security
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            // Digital Signatures using iText7 TODO
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            PDAcroForm pdAcroForm = pdfBoxDoc.getDocumentCatalog().getAcroForm();
 | 
				
			||||||
 | 
					            ArrayNode formFieldsArray2 = objectMapper.createArrayNode();
 | 
				
			||||||
 | 
					            if (pdAcroForm != null) {
 | 
				
			||||||
 | 
					               
 | 
				
			||||||
 | 
					                for (PDField field : pdAcroForm.getFields()) {
 | 
				
			||||||
 | 
					                    ObjectNode fieldNode = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					                    fieldNode.put("FieldName", field.getFullyQualifiedName());
 | 
				
			||||||
 | 
					                    fieldNode.put("FieldType", field.getFieldType());
 | 
				
			||||||
 | 
					                    // Add more attributes as needed...
 | 
				
			||||||
 | 
					                    formFieldsArray2.add(fieldNode);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            jsonOutput.set("FormFields", formFieldsArray2);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            PDStructureTreeRoot structureTreeRoot = pdfBoxDoc.getDocumentCatalog().getStructureTreeRoot();
 | 
				
			||||||
 | 
					            ArrayNode structureTreeArray;
 | 
				
			||||||
 | 
								try {
 | 
				
			||||||
 | 
									if(structureTreeRoot != null) {
 | 
				
			||||||
 | 
										structureTreeArray = exploreStructureTree(structureTreeRoot.getKids());
 | 
				
			||||||
 | 
										jsonOutput.set("StructureTree", structureTreeArray);
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								} catch (Exception e) {
 | 
				
			||||||
 | 
									// TODO Auto-generated catch block
 | 
				
			||||||
 | 
									e.printStackTrace();
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            boolean isPdfACompliant = checkOutputIntent(itextDoc, "PDF/A");
 | 
				
			||||||
 | 
					            boolean isPdfXCompliant = checkOutputIntent(itextDoc, "PDF/X");
 | 
				
			||||||
 | 
					            boolean isPdfECompliant = checkForStandard(itextDoc, "PDF/E");
 | 
				
			||||||
 | 
					            boolean isPdfVTCompliant = checkForStandard(itextDoc, "PDF/VT");
 | 
				
			||||||
 | 
					            boolean isPdfUACompliant = checkForStandard(itextDoc, "PDF/UA");
 | 
				
			||||||
 | 
					            boolean isPdfBCompliant = checkForStandard(itextDoc, "PDF/B"); // If you want to check for PDF/Broadcast, though this isn't an official ISO standard.
 | 
				
			||||||
 | 
					            boolean isPdfSECCompliant = checkForStandard(itextDoc, "PDF/SEC"); // This might not be effective since PDF/SEC was under development in 2021.
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            ObjectNode compliancy = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					            compliancy.put("IsPDF/ACompliant", isPdfACompliant);
 | 
				
			||||||
 | 
					            compliancy.put("IsPDF/XCompliant", isPdfXCompliant);
 | 
				
			||||||
 | 
					            compliancy.put("IsPDF/ECompliant", isPdfECompliant);
 | 
				
			||||||
 | 
					            compliancy.put("IsPDF/VTCompliant", isPdfVTCompliant);
 | 
				
			||||||
 | 
					            compliancy.put("IsPDF/UACompliant", isPdfUACompliant);
 | 
				
			||||||
 | 
					            compliancy.put("IsPDF/BCompliant", isPdfBCompliant);
 | 
				
			||||||
 | 
					            compliancy.put("IsPDF/SECCompliant", isPdfSECCompliant);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            jsonOutput.set("Compliancy", compliancy);
 | 
				
			||||||
 | 
					     
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					           
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            ArrayNode bookmarksArray = objectMapper.createArrayNode();
 | 
				
			||||||
 | 
					            PdfOutline root = itextDoc.getOutlines(false);
 | 
				
			||||||
 | 
					            if (root != null) {
 | 
				
			||||||
 | 
					                for (PdfOutline child : root.getAllChildren()) {
 | 
				
			||||||
 | 
					                    addOutlinesToArray(child, bookmarksArray);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            jsonOutput.set("Bookmarks/Outline/TOC", bookmarksArray);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            String xmpString = null;
 | 
				
			||||||
 | 
					            try {
 | 
				
			||||||
 | 
					                byte[] xmpBytes = itextDoc.getXmpMetadata();
 | 
				
			||||||
 | 
					                if (xmpBytes != null) {
 | 
				
			||||||
 | 
					                    XMPMeta xmpMeta = XMPMetaFactory.parseFromBuffer(xmpBytes);
 | 
				
			||||||
 | 
					                    xmpString = xmpMeta.dumpObject();
 | 
				
			||||||
 | 
					                    
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            } catch (XMPException e) {
 | 
				
			||||||
 | 
					                e.printStackTrace();
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            jsonOutput.put("XMPMetadata", xmpString);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            ObjectNode encryptionNode = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					            if (pdfBoxDoc.isEncrypted()) {
 | 
				
			||||||
 | 
					                encryptionNode.put("IsEncrypted", true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                // Retrieve encryption details using getEncryption()
 | 
				
			||||||
 | 
					                PDEncryption encryption = pdfBoxDoc.getEncryption();
 | 
				
			||||||
 | 
					                encryptionNode.put("EncryptionAlgorithm", encryption.getFilter());
 | 
				
			||||||
 | 
					                encryptionNode.put("KeyLength", encryption.getLength());
 | 
				
			||||||
 | 
					                encryptionNode.put("Permissions", pdfBoxDoc.getCurrentAccessPermission().toString());
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                // Add other encryption-related properties as needed
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                encryptionNode.put("IsEncrypted", false);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            jsonOutput.set("Encryption", encryptionNode);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
            docInfoNode.put("Page Mode", getPageModeDescription(pageMode));;
 | 
					            docInfoNode.put("Page Mode", getPageModeDescription(pageMode));;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            jsonOutput.set("Document Information", docInfoNode);
 | 
					            jsonOutput.set("Document Information", docInfoNode);
 | 
				
			||||||
            
 | 
					            ObjectNode pageInfoParent = objectMapper.createObjectNode();
 | 
				
			||||||
            for (int pageNum = 1; pageNum <= itextDoc.getNumberOfPages(); pageNum++) {
 | 
					            for (int pageNum = 1; pageNum <= itextDoc.getNumberOfPages(); pageNum++) {
 | 
				
			||||||
                ObjectNode pageInfo = objectMapper.createObjectNode();
 | 
					                ObjectNode pageInfo = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -79,6 +363,8 @@ public class PDFExtractor {
 | 
				
			|||||||
                pageInfo.put("Width", pageSize.getWidth());
 | 
					                pageInfo.put("Width", pageSize.getWidth());
 | 
				
			||||||
                pageInfo.put("Height", pageSize.getHeight());
 | 
					                pageInfo.put("Height", pageSize.getHeight());
 | 
				
			||||||
                pageInfo.put("Rotation", itextDoc.getPage(pageNum).getRotation());
 | 
					                pageInfo.put("Rotation", itextDoc.getPage(pageNum).getRotation());
 | 
				
			||||||
 | 
					                pageInfo.put("Page Orientation", getPageOrientation(pageSize.getWidth(),pageSize.getHeight())); 
 | 
				
			||||||
 | 
					                pageInfo.put("Standard Size", getPageSize(pageSize.getWidth(),pageSize.getHeight())); 
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                // Boxes
 | 
					                // Boxes
 | 
				
			||||||
                pageInfo.put("MediaBox", itextDoc.getPage(pageNum).getMediaBox().toString());
 | 
					                pageInfo.put("MediaBox", itextDoc.getPage(pageNum).getMediaBox().toString());
 | 
				
			||||||
@ -98,13 +384,24 @@ public class PDFExtractor {
 | 
				
			|||||||
             // Annotations
 | 
					             // Annotations
 | 
				
			||||||
                ArrayNode annotationsArray = objectMapper.createArrayNode();
 | 
					                ArrayNode annotationsArray = objectMapper.createArrayNode();
 | 
				
			||||||
                List<PdfAnnotation> annotations = itextDoc.getPage(pageNum).getAnnotations();
 | 
					                List<PdfAnnotation> annotations = itextDoc.getPage(pageNum).getAnnotations();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                int subtypeCount = 0;
 | 
				
			||||||
 | 
					                int contentsCount = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                for (PdfAnnotation annotation : annotations) {
 | 
					                for (PdfAnnotation annotation : annotations) {
 | 
				
			||||||
                    ObjectNode annotationNode = objectMapper.createObjectNode();
 | 
					                    if(annotation.getSubtype() != null) {
 | 
				
			||||||
                    annotationNode.put("Subtype", annotation.getSubtype().toString());
 | 
					                        subtypeCount++;  // Increase subtype count
 | 
				
			||||||
                    annotationNode.put("Contents", annotation.getContents().getValue());
 | 
					 | 
				
			||||||
                    annotationsArray.add(annotationNode);
 | 
					 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                pageInfo.set("Annotations", annotationsArray);
 | 
					                    if(annotation.getContents() != null) {
 | 
				
			||||||
 | 
					                        contentsCount++;  // Increase contents count
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                ObjectNode annotationsObject = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					                annotationsObject.put("AnnotationsCount", annotations.size());
 | 
				
			||||||
 | 
					                annotationsObject.put("SubtypeCount", subtypeCount);
 | 
				
			||||||
 | 
					                annotationsObject.put("ContentsCount", contentsCount);
 | 
				
			||||||
 | 
					                pageInfo.set("Annotations", annotationsObject);
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                // Images (simplified)
 | 
					                // Images (simplified)
 | 
				
			||||||
                // This part is non-trivial as images can be embedded in multiple ways in a PDF.
 | 
					                // This part is non-trivial as images can be embedded in multiple ways in a PDF.
 | 
				
			||||||
@ -129,31 +426,61 @@ public class PDFExtractor {
 | 
				
			|||||||
                }
 | 
					                }
 | 
				
			||||||
                pageInfo.set("Images", imagesArray);
 | 
					                pageInfo.set("Images", imagesArray);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
                // Links
 | 
					                // Links
 | 
				
			||||||
                ArrayNode linksArray = objectMapper.createArrayNode();
 | 
					                ArrayNode linksArray = objectMapper.createArrayNode();
 | 
				
			||||||
 | 
					                Set<String> uniqueURIs = new HashSet<>();  // To store unique URIs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                for (PdfAnnotation annotation : annotations) {
 | 
					                for (PdfAnnotation annotation : annotations) {
 | 
				
			||||||
                    if (annotation instanceof PdfLinkAnnotation) {
 | 
					                    if (annotation instanceof PdfLinkAnnotation) {
 | 
				
			||||||
                        PdfLinkAnnotation linkAnnotation = (PdfLinkAnnotation) annotation;
 | 
					                        PdfLinkAnnotation linkAnnotation = (PdfLinkAnnotation) annotation;
 | 
				
			||||||
                        ObjectNode linkNode = objectMapper.createObjectNode();
 | 
					                        String uri = linkAnnotation.getAction().toString();
 | 
				
			||||||
                        linkNode.put("URI", linkAnnotation.getAction().toString()); // Basic, might not work for all links
 | 
					                        uniqueURIs.add(uri);  // Add to set to ensure uniqueness
 | 
				
			||||||
                        linksArray.add(linkNode);
 | 
					 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                // Add unique URIs to linksArray
 | 
				
			||||||
 | 
					                for (String uri : uniqueURIs) {
 | 
				
			||||||
 | 
					                    ObjectNode linkNode = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					                    linkNode.put("URI", uri);
 | 
				
			||||||
 | 
					                    linksArray.add(linkNode);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
                pageInfo.set("Links", linksArray);
 | 
					                pageInfo.set("Links", linksArray);
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                //Fonts
 | 
					                //Fonts
 | 
				
			||||||
                ArrayNode fontsArray = objectMapper.createArrayNode();
 | 
					                ArrayNode fontsArray = objectMapper.createArrayNode();
 | 
				
			||||||
                PdfDictionary fontDicts = resources.getResource(PdfName.Font);
 | 
					                PdfDictionary fontDicts = resources.getResource(PdfName.Font);
 | 
				
			||||||
 | 
					                Set<String> uniqueSubtypes = new HashSet<>();  // To store unique subtypes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
                if (fontDicts != null) {
 | 
					                if (fontDicts != null) {
 | 
				
			||||||
                    for (PdfName key : fontDicts.keySet()) {
 | 
					                    for (PdfName key : fontDicts.keySet()) {
 | 
				
			||||||
 | 
					                        ObjectNode fontNode = objectMapper.createObjectNode();  // Create a new font node for each font
 | 
				
			||||||
                        PdfDictionary font = fontDicts.getAsDictionary(key);
 | 
					                        PdfDictionary font = fontDicts.getAsDictionary(key);
 | 
				
			||||||
                        ObjectNode fontNode = objectMapper.createObjectNode();
 | 
					                        
 | 
				
			||||||
 | 
					                        boolean isEmbedded = font.containsKey(PdfName.FontFile) || 
 | 
				
			||||||
 | 
					                                font.containsKey(PdfName.FontFile2) || 
 | 
				
			||||||
 | 
					                                font.containsKey(PdfName.FontFile3);
 | 
				
			||||||
 | 
					                    	fontNode.put("IsEmbedded", isEmbedded);
 | 
				
			||||||
 | 
					                    	
 | 
				
			||||||
 | 
					                    	
 | 
				
			||||||
 | 
					                    	if (font.containsKey(PdfName.Encoding)) {
 | 
				
			||||||
 | 
					                    	    String encoding = font.getAsName(PdfName.Encoding).toString();
 | 
				
			||||||
 | 
					                    	    fontNode.put("Encoding", encoding);
 | 
				
			||||||
 | 
					                    	}
 | 
				
			||||||
 | 
					                    	
 | 
				
			||||||
 | 
					                    	
 | 
				
			||||||
 | 
					                    	
 | 
				
			||||||
 | 
					                        if(font.getAsString(PdfName.BaseFont) != null)
 | 
				
			||||||
                            fontNode.put("Name", font.getAsString(PdfName.BaseFont).toString());
 | 
					                            fontNode.put("Name", font.getAsString(PdfName.BaseFont).toString());
 | 
				
			||||||
                        
 | 
					                        
 | 
				
			||||||
 | 
					                        String subtype = null;
 | 
				
			||||||
                        // Font Subtype (e.g., Type1, TrueType)
 | 
					                        // Font Subtype (e.g., Type1, TrueType)
 | 
				
			||||||
                        if (font.containsKey(PdfName.Subtype)) {
 | 
					                        if (font.containsKey(PdfName.Subtype)) {
 | 
				
			||||||
                            fontNode.put("Subtype", font.getAsName(PdfName.Subtype).toString());
 | 
					                            subtype = font.getAsName(PdfName.Subtype).toString();
 | 
				
			||||||
 | 
					                            uniqueSubtypes.add(subtype);  // Add to set to ensure uniqueness
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
 | 
					                        fontNode.put("Subtype", subtype);
 | 
				
			||||||
                        
 | 
					                        
 | 
				
			||||||
                        // Font Descriptor
 | 
					                        // Font Descriptor
 | 
				
			||||||
                        PdfDictionary fontDescriptor = font.getAsDictionary(PdfName.FontDescriptor);
 | 
					                        PdfDictionary fontDescriptor = font.getAsDictionary(PdfName.FontDescriptor);
 | 
				
			||||||
@ -166,14 +493,53 @@ public class PDFExtractor {
 | 
				
			|||||||
                            // Flags (e.g., italic, bold)
 | 
					                            // Flags (e.g., italic, bold)
 | 
				
			||||||
                            if (fontDescriptor.containsKey(PdfName.Flags)) {
 | 
					                            if (fontDescriptor.containsKey(PdfName.Flags)) {
 | 
				
			||||||
                                int flags = fontDescriptor.getAsNumber(PdfName.Flags).intValue();
 | 
					                                int flags = fontDescriptor.getAsNumber(PdfName.Flags).intValue();
 | 
				
			||||||
                                fontNode.put("IsItalic", (flags & 64) != 0);
 | 
					                                fontNode.put("IsItalic", (flags & 64) != 0);  // Existing italic flag
 | 
				
			||||||
                                fontNode.put("IsBold", (flags & 1) != 0);
 | 
					                                fontNode.put("IsBold", (flags & 1 << 16) != 0);  // Existing bold flag
 | 
				
			||||||
 | 
					                                fontNode.put("IsFixedPitch", (flags & 1) != 0);
 | 
				
			||||||
 | 
					                                fontNode.put("IsSerif", (flags & 2) != 0);
 | 
				
			||||||
 | 
					                                fontNode.put("IsSymbolic", (flags & 4) != 0);
 | 
				
			||||||
 | 
					                                fontNode.put("IsScript", (flags & 8) != 0);
 | 
				
			||||||
 | 
					                                fontNode.put("IsNonsymbolic", (flags & 16) != 0);
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                            
 | 
				
			||||||
 | 
					                            if (fontDescriptor.containsKey(PdfName.FontFamily)) {
 | 
				
			||||||
 | 
					                                String fontFamily = fontDescriptor.getAsString(PdfName.FontFamily).toString();
 | 
				
			||||||
 | 
					                                fontNode.put("FontFamily", fontFamily);
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                            if (fontDescriptor.containsKey(PdfName.FontStretch)) {
 | 
				
			||||||
 | 
					                                String fontStretch = fontDescriptor.getAsName(PdfName.FontStretch).toString();
 | 
				
			||||||
 | 
					                                fontNode.put("FontStretch", fontStretch);
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                            
 | 
				
			||||||
 | 
					                            if (fontDescriptor != null && fontDescriptor.containsKey(PdfName.FontBBox)) {
 | 
				
			||||||
 | 
					                                PdfArray bbox = fontDescriptor.getAsArray(PdfName.FontBBox);
 | 
				
			||||||
 | 
					                                fontNode.put("FontBoundingBox", bbox.toString());
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                            if (fontDescriptor != null && fontDescriptor.containsKey(PdfName.FontWeight)) {
 | 
				
			||||||
 | 
					                                float fontWeight = fontDescriptor.getAsNumber(PdfName.FontWeight).floatValue();
 | 
				
			||||||
 | 
					                                fontNode.put("FontWeight", fontWeight);
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                            
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        if (font.containsKey(PdfName.ToUnicode)) {
 | 
				
			||||||
 | 
					                            PdfStream toUnicodeStream = font.getAsStream(PdfName.ToUnicode);
 | 
				
			||||||
 | 
					                            // Handle the stream as needed, maybe extract some details or just note its existence
 | 
				
			||||||
 | 
					                            fontNode.put("HasToUnicodeMap", true);
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        if (fontNode.size() > 0) {
 | 
				
			||||||
 | 
					                        	fontsArray.add(fontNode);  // Add each font node to fontsArray
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                        fontsArray.add(fontNode);
 | 
					                // Add unique subtypes to fontsArray
 | 
				
			||||||
                    }
 | 
					                ArrayNode subtypesArray = objectMapper.createArrayNode();
 | 
				
			||||||
 | 
					                for (String subtype : uniqueSubtypes) {
 | 
				
			||||||
 | 
					                    subtypesArray.add(subtype);
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					                pageInfo.set("FontSubtypes", subtypesArray); // Changed from Fonts to FontSubtypes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                pageInfo.set("Fonts", fontsArray);
 | 
					                pageInfo.set("Fonts", fontsArray);
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
@ -204,123 +570,186 @@ public class PDFExtractor {
 | 
				
			|||||||
                pageInfo.set("Color Spaces & ICC Profiles", colorSpacesArray);
 | 
					                pageInfo.set("Color Spaces & ICC Profiles", colorSpacesArray);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                // Other XObjects
 | 
					                // Other XObjects
 | 
				
			||||||
                ArrayNode xObjectsArray = objectMapper.createArrayNode();
 | 
					                Map<String, Integer> xObjectCountMap = new HashMap<>();  // To store the count for each type
 | 
				
			||||||
                PdfDictionary xObjects = resourcesDict.getAsDictionary(PdfName.XObject);
 | 
					                PdfDictionary xObjects = resourcesDict.getAsDictionary(PdfName.XObject);
 | 
				
			||||||
                if (xObjects != null) {
 | 
					                if (xObjects != null) {
 | 
				
			||||||
                    for (PdfName name : xObjects.keySet()) {
 | 
					                    for (PdfName name : xObjects.keySet()) {
 | 
				
			||||||
                        PdfStream xObjectStream = xObjects.getAsStream(name);
 | 
					                        PdfStream xObjectStream = xObjects.getAsStream(name);
 | 
				
			||||||
                        ObjectNode xObjectNode = objectMapper.createObjectNode();
 | 
					                        String xObjectType = xObjectStream.getAsName(PdfName.Subtype).toString();
 | 
				
			||||||
                        xObjectNode.put("Type", xObjectStream.getAsName(PdfName.Subtype).toString());
 | 
					 | 
				
			||||||
                        // TODO: Extract further details depending on the XObject type
 | 
					 | 
				
			||||||
                        xObjectsArray.add(xObjectNode);
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
                pageInfo.set("XObjects", xObjectsArray);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                jsonOutput.set("Page " + pageNum, pageInfo);
 | 
					                        // Increment the count for this type in the map
 | 
				
			||||||
            }
 | 
					                        xObjectCountMap.put(xObjectType, xObjectCountMap.getOrDefault(xObjectType, 0) + 1);
 | 
				
			||||||
            
 | 
					 | 
				
			||||||
            PdfAcroForm acroForm = PdfAcroForm.getAcroForm(itextDoc, false);
 | 
					 | 
				
			||||||
            if (acroForm != null) {
 | 
					 | 
				
			||||||
                ObjectNode formFieldsNode = objectMapper.createObjectNode();
 | 
					 | 
				
			||||||
                for (Map.Entry<String, PdfFormField> entry : acroForm.getFormFields().entrySet()) {
 | 
					 | 
				
			||||||
                    formFieldsNode.put(entry.getKey(), entry.getValue().getValueAsString());
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
                jsonOutput.set("FormFields", formFieldsNode);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
           //TODO bookmarks here
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
            //embeed files TODO size
 | 
					 | 
				
			||||||
            PdfDictionary embeddedFiles = itextDoc.getCatalog().getPdfObject().getAsDictionary(PdfName.Names)
 | 
					 | 
				
			||||||
                    .getAsDictionary(PdfName.EmbeddedFiles);
 | 
					 | 
				
			||||||
            if (embeddedFiles != null) {
 | 
					 | 
				
			||||||
                ArrayNode embeddedFilesArray = objectMapper.createArrayNode();
 | 
					 | 
				
			||||||
                PdfArray namesArray = embeddedFiles.getAsArray(PdfName.Names);
 | 
					 | 
				
			||||||
                for (int i = 0; i < namesArray.size(); i += 2) {
 | 
					 | 
				
			||||||
                    ObjectNode embeddedFileNode = objectMapper.createObjectNode();
 | 
					 | 
				
			||||||
                    embeddedFileNode.put("Name", namesArray.getAsString(i).toString());
 | 
					 | 
				
			||||||
                    // Add other details if required
 | 
					 | 
				
			||||||
                    embeddedFilesArray.add(embeddedFileNode);
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
                jsonOutput.set("EmbeddedFiles", embeddedFilesArray);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
            //attachments TODO size
 | 
					 | 
				
			||||||
            ArrayNode attachmentsArray = objectMapper.createArrayNode();
 | 
					 | 
				
			||||||
            for (int pageNum = 1; pageNum <= itextDoc.getNumberOfPages(); pageNum++) {
 | 
					 | 
				
			||||||
                for (PdfAnnotation annotation : itextDoc.getPage(pageNum).getAnnotations()) {
 | 
					 | 
				
			||||||
                    if (annotation instanceof PdfFileAttachmentAnnotation) {
 | 
					 | 
				
			||||||
                        ObjectNode attachmentNode = objectMapper.createObjectNode();
 | 
					 | 
				
			||||||
                        attachmentNode.put("Name", ((PdfFileAttachmentAnnotation) annotation).getName().toString());
 | 
					 | 
				
			||||||
                        attachmentNode.put("Description", annotation.getContents().getValue());
 | 
					 | 
				
			||||||
                        attachmentsArray.add(attachmentNode);
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            jsonOutput.set("Attachments", attachmentsArray);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            //Javascript
 | 
					 | 
				
			||||||
            PdfDictionary namesDict = itextDoc.getCatalog().getPdfObject().getAsDictionary(PdfName.Names);
 | 
					 | 
				
			||||||
            if (namesDict != null) {
 | 
					 | 
				
			||||||
                PdfDictionary javascriptDict = namesDict.getAsDictionary(PdfName.JavaScript);
 | 
					 | 
				
			||||||
                if (javascriptDict != null) {
 | 
					 | 
				
			||||||
                    ArrayNode javascriptArray = objectMapper.createArrayNode();
 | 
					 | 
				
			||||||
                    PdfArray namesArray = javascriptDict.getAsArray(PdfName.Names);
 | 
					 | 
				
			||||||
                    for (int i = 0; i < namesArray.size(); i += 2) {
 | 
					 | 
				
			||||||
                        ObjectNode jsNode = objectMapper.createObjectNode();
 | 
					 | 
				
			||||||
                        jsNode.put("JS Name", namesArray.getAsString(i).toString());
 | 
					 | 
				
			||||||
                        jsNode.put("JS Code", namesArray.getAsString(i + 1).toString());
 | 
					 | 
				
			||||||
                        javascriptArray.add(jsNode);
 | 
					 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                    jsonOutput.set("JavaScripts", javascriptArray);
 | 
					 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            
 | 
					                // Add the count map to pageInfo (or wherever you want to store it)
 | 
				
			||||||
            //TODO size
 | 
					                ObjectNode xObjectCountNode = objectMapper.createObjectNode();
 | 
				
			||||||
            PdfOCProperties ocProperties = itextDoc.getCatalog().getOCProperties(false);
 | 
					                for (Map.Entry<String, Integer> entry : xObjectCountMap.entrySet()) {
 | 
				
			||||||
            if (ocProperties != null) {
 | 
					                    xObjectCountNode.put(entry.getKey(), entry.getValue());
 | 
				
			||||||
                ArrayNode layersArray = objectMapper.createArrayNode();
 | 
					 | 
				
			||||||
                for (PdfLayer layer : ocProperties.getLayers()) {
 | 
					 | 
				
			||||||
                    ObjectNode layerNode = objectMapper.createObjectNode();
 | 
					 | 
				
			||||||
                    layerNode.put("Name", layer.getPdfObject().getAsString(PdfName.Name).toString());
 | 
					 | 
				
			||||||
                    layersArray.add(layerNode);
 | 
					 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                jsonOutput.set("Layers", layersArray);
 | 
					                pageInfo.set("XObjectCounts", xObjectCountNode);
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					         
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                ArrayNode multimediaArray = objectMapper.createArrayNode();
 | 
				
			||||||
 | 
					                for (PdfAnnotation annotation : annotations) {
 | 
				
			||||||
 | 
					                    if (PdfName.RichMedia.equals(annotation.getSubtype())) {
 | 
				
			||||||
 | 
					                        ObjectNode multimediaNode = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					                        // Extract details from the dictionary as needed
 | 
				
			||||||
 | 
					                        multimediaArray.add(multimediaNode);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                pageInfo.set("Multimedia", multimediaArray);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                pageInfoParent.set("Page " + pageNum, pageInfo);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
 | 
					            jsonOutput.set("Per Page Info", pageInfoParent);
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            //TODO Security
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
            
 | 
					 | 
				
			||||||
         // Digital Signatures using iText7 TODO
 | 
					 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            // Save JSON to file
 | 
					            // Save JSON to file
 | 
				
			||||||
            try (FileWriter file = new FileWriter("output.json")) {
 | 
					            String jsonString = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(jsonOutput);
 | 
				
			||||||
                file.write(objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(jsonOutput)); 
 | 
					            
 | 
				
			||||||
                file.flush();
 | 
					            
 | 
				
			||||||
            }
 | 
					            
 | 
				
			||||||
 | 
					            return WebResponseUtils.bytesToWebResponse(jsonString.getBytes(StandardCharsets.UTF_8), "response.json", MediaType.APPLICATION_JSON);
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            pdfBoxDoc.close();
 | 
					 | 
				
			||||||
            itextDoc.close();
 | 
					 | 
				
			||||||
        } catch (Exception e) {
 | 
					        } catch (Exception e) {
 | 
				
			||||||
            e.printStackTrace();
 | 
					            e.printStackTrace();
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
							return null;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    private static String formatDate(Calendar calendar) {
 | 
					    private static void addOutlinesToArray(PdfOutline outline, ArrayNode arrayNode) {
 | 
				
			||||||
 | 
					        if (outline == null) return;
 | 
				
			||||||
 | 
					        ObjectNode outlineNode = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					        outlineNode.put("Title", outline.getTitle());
 | 
				
			||||||
 | 
					        // You can add other properties if needed
 | 
				
			||||||
 | 
					        arrayNode.add(outlineNode);
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        for (PdfOutline child : outline.getAllChildren()) {
 | 
				
			||||||
 | 
					            addOutlinesToArray(child, arrayNode);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    public String getPageOrientation(double width, double height) {        
 | 
				
			||||||
 | 
					        if (width > height) {
 | 
				
			||||||
 | 
					            return "Landscape";
 | 
				
			||||||
 | 
					        } else if (height > width) {
 | 
				
			||||||
 | 
					            return "Portrait";
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            return "Square";
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    public String getPageSize(double width, double height) {
 | 
				
			||||||
 | 
					        // Common aspect ratios used for standard paper sizes
 | 
				
			||||||
 | 
					        double[] aspectRatios = {4.0 / 3.0, 3.0 / 2.0, Math.sqrt(2.0), 16.0 / 9.0};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Check if the page matches any common aspect ratio
 | 
				
			||||||
 | 
					        for (double aspectRatio : aspectRatios) {
 | 
				
			||||||
 | 
					            if (isCloseToAspectRatio(width, height, aspectRatio)) {
 | 
				
			||||||
 | 
					                return "Standard";
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // If not a standard aspect ratio, consider it as a custom size
 | 
				
			||||||
 | 
					        return "Custom";
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    private boolean isCloseToAspectRatio(double width, double height, double aspectRatio) {
 | 
				
			||||||
 | 
					        // Calculate the aspect ratio of the page
 | 
				
			||||||
 | 
					        double pageAspectRatio = width / height;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Compare the page aspect ratio with the common aspect ratio within a threshold
 | 
				
			||||||
 | 
					        return Math.abs(pageAspectRatio - aspectRatio) <= 0.05;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    public boolean checkForStandard(PdfDocument document, String standardKeyword) {
 | 
				
			||||||
 | 
					        // Check Output Intents
 | 
				
			||||||
 | 
					        boolean foundInOutputIntents = checkOutputIntent(document, standardKeyword);
 | 
				
			||||||
 | 
					        if (foundInOutputIntents) return true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Check XMP Metadata (rudimentary)
 | 
				
			||||||
 | 
					        try {
 | 
				
			||||||
 | 
					            byte[] metadataBytes = document.getXmpMetadata();
 | 
				
			||||||
 | 
					            if (metadataBytes != null) {
 | 
				
			||||||
 | 
					                XMPMeta xmpMeta = XMPMetaFactory.parseFromBuffer(metadataBytes);
 | 
				
			||||||
 | 
					                String xmpString = xmpMeta.dumpObject();
 | 
				
			||||||
 | 
					                if (xmpString.contains(standardKeyword)) {
 | 
				
			||||||
 | 
					                    return true;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        } catch (XMPException e) {
 | 
				
			||||||
 | 
					            e.printStackTrace();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return false;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    public boolean checkOutputIntent(PdfDocument document, String standard) {
 | 
				
			||||||
 | 
					        PdfArray outputIntents = document.getCatalog().getPdfObject().getAsArray(PdfName.OutputIntents);
 | 
				
			||||||
 | 
					        if (outputIntents != null && !outputIntents.isEmpty()) {
 | 
				
			||||||
 | 
					            for (int i = 0; i < outputIntents.size(); i++) {
 | 
				
			||||||
 | 
					                PdfDictionary outputIntentDict = outputIntents.getAsDictionary(i);
 | 
				
			||||||
 | 
					                if (outputIntentDict != null) {
 | 
				
			||||||
 | 
					                    PdfString s = outputIntentDict.getAsString(PdfName.S);
 | 
				
			||||||
 | 
					                    if (s != null && s.toString().contains(standard)) {
 | 
				
			||||||
 | 
					                        return true;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        return false;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    public ArrayNode exploreStructureTree(List<Object> nodes) {
 | 
				
			||||||
 | 
					        ArrayNode elementsArray = objectMapper.createArrayNode();
 | 
				
			||||||
 | 
					        if (nodes != null) {
 | 
				
			||||||
 | 
					            for (Object obj : nodes) {
 | 
				
			||||||
 | 
					                if (obj instanceof PDStructureNode) {
 | 
				
			||||||
 | 
					                    PDStructureNode node = (PDStructureNode) obj;
 | 
				
			||||||
 | 
					                    ObjectNode elementNode = objectMapper.createObjectNode();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    if (node instanceof PDStructureElement) {
 | 
				
			||||||
 | 
					                        PDStructureElement structureElement = (PDStructureElement) node;
 | 
				
			||||||
 | 
					                        elementNode.put("Type", structureElement.getStructureType());
 | 
				
			||||||
 | 
					                        elementNode.put("Content", getContent(structureElement));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        // Recursively explore child elements
 | 
				
			||||||
 | 
					                        ArrayNode childElements = exploreStructureTree(structureElement.getKids());
 | 
				
			||||||
 | 
					                        if (childElements.size() > 0) {
 | 
				
			||||||
 | 
					                            elementNode.set("Children", childElements);
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    elementsArray.add(elementNode);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        return elementsArray;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    public String getContent(PDStructureElement structureElement) {
 | 
				
			||||||
 | 
					        StringBuilder contentBuilder = new StringBuilder();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (Object item : structureElement.getKids()) {
 | 
				
			||||||
 | 
					            if (item instanceof COSString) {
 | 
				
			||||||
 | 
					                COSString cosString = (COSString) item;
 | 
				
			||||||
 | 
					                contentBuilder.append(cosString.getString());
 | 
				
			||||||
 | 
					            } else if (item instanceof PDStructureElement) {
 | 
				
			||||||
 | 
					                // For simplicity, we're handling only COSString and PDStructureElement here
 | 
				
			||||||
 | 
					                // but a more comprehensive method would handle other types too
 | 
				
			||||||
 | 
					                contentBuilder.append(getContent((PDStructureElement) item));
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return contentBuilder.toString();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    private String formatDate(Calendar calendar) {
 | 
				
			||||||
        if (calendar != null) {
 | 
					        if (calendar != null) {
 | 
				
			||||||
            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 | 
					            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 | 
				
			||||||
            return sdf.format(calendar.getTime());
 | 
					            return sdf.format(calendar.getTime());
 | 
				
			||||||
@ -329,7 +758,7 @@ public class PDFExtractor {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    private static String getPageModeDescription(PdfName pageMode) {
 | 
					    private String getPageModeDescription(PdfName pageMode) {
 | 
				
			||||||
        return pageMode != null ? pageMode.toString().replaceFirst("/", "") : "Unknown";
 | 
					        return pageMode != null ? pageMode.toString().replaceFirst("/", "") : "Unknown";
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -52,4 +52,11 @@ public class SecurityWebController {
 | 
				
			|||||||
        model.addAttribute("currentPage", "sanitize-pdf");
 | 
					        model.addAttribute("currentPage", "sanitize-pdf");
 | 
				
			||||||
        return "security/sanitize-pdf";
 | 
					        return "security/sanitize-pdf";
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    @GetMapping("/get-info-on-pdf")
 | 
				
			||||||
 | 
					    @Hidden
 | 
				
			||||||
 | 
					    public String getInfo(Model model) {
 | 
				
			||||||
 | 
					        model.addAttribute("currentPage", "get-info-on-pdf");
 | 
				
			||||||
 | 
					        return "security/get-info-on-pdf";
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										33
									
								
								src/main/resources/templates/security/get-info-on-pdf.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								src/main/resources/templates/security/get-info-on-pdf.html
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,33 @@
 | 
				
			|||||||
 | 
					<!DOCTYPE html>
 | 
				
			||||||
 | 
					<html th:lang="${#locale.toString()}" th:lang-direction="#{language.direction}" xmlns:th="http://www.thymeleaf.org">
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<th:block th:insert="~{fragments/common :: head(title=#{getPdfInfo.title})}"></th:block>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<body>
 | 
				
			||||||
 | 
					    <th:block th:insert="~{fragments/common :: game}"></th:block>
 | 
				
			||||||
 | 
					    <div id="page-container">
 | 
				
			||||||
 | 
					        <div id="content-wrap">
 | 
				
			||||||
 | 
					            <div th:insert="~{fragments/navbar.html :: navbar}"></div>
 | 
				
			||||||
 | 
					            <br> <br>
 | 
				
			||||||
 | 
					            <div class="container">
 | 
				
			||||||
 | 
					                <div class="row justify-content-center">
 | 
				
			||||||
 | 
					                    <div class="col-md-6">
 | 
				
			||||||
 | 
					                        <h2 th:text="#{getPdfInfo.header}"></h2>
 | 
				
			||||||
 | 
											<p th:text="#{processTimeWarning}">
 | 
				
			||||||
 | 
					                        <form method="post" enctype="multipart/form-data" th:action="@{get-info-on-pdf}">
 | 
				
			||||||
 | 
					                            <div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false)}"></div>
 | 
				
			||||||
 | 
					                            <br>
 | 
				
			||||||
 | 
					                            <button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{getPdfInfo.submit}"></button>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        </form>
 | 
				
			||||||
 | 
					                       
 | 
				
			||||||
 | 
					                    </div>
 | 
				
			||||||
 | 
					                </div>
 | 
				
			||||||
 | 
					            </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					        <div th:insert="~{fragments/footer.html :: footer}"></div>
 | 
				
			||||||
 | 
					    </div>
 | 
				
			||||||
 | 
					</body>
 | 
				
			||||||
 | 
					</html>
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user