diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java index 5c388b504..f08675f7f 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java @@ -11,6 +11,7 @@ import java.time.Instant; import java.time.ZoneId; import java.time.ZonedDateTime; import java.util.*; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -26,22 +27,33 @@ import org.apache.pdfbox.pdfwriter.compress.CompressParameters; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDDocumentInformation; +import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; +import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDMetadata; +import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDStream; +import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification; +import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDFontDescriptor; import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont; import org.apache.pdfbox.pdmodel.font.PDType0Font; +import org.apache.pdfbox.pdmodel.font.PDType1CFont; +import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent; import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; +import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentProperties; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; import org.apache.pdfbox.pdmodel.interactive.viewerpreferences.PDViewerPreferences; import org.apache.pdfbox.preflight.Format; import org.apache.pdfbox.preflight.PreflightConfiguration; @@ -88,11 +100,93 @@ import stirling.software.common.util.WebResponseUtils; @RequiredArgsConstructor public class ConvertPDFToPDFA { + private static final Pattern NON_PRINTABLE_ASCII = Pattern.compile("[^\\x20-\\x7E]"); private final RuntimePathConfig runtimePathConfig; private static final String ICC_RESOURCE_PATH = "/icc/sRGB2014.icc"; private static final int PDFA_COMPATIBILITY_POLICY = 1; + private static final String ANNOTATION_HIGHLIGHT = "Highlight"; + private static final String ANNOTATION_POPUP = "Popup"; + private static final String ANNOTATION_LINK = "Link"; + + private static final COSName COS_AF_RELATIONSHIP = COSName.getPDFName("AFRelationship"); + private static final COSName COS_AF = COSName.getPDFName("AF"); // The Associated Files Array + private static final COSName COS_UF = COSName.getPDFName("UF"); + private static final String AF_RELATIONSHIP_UNSPECIFIED = "Unspecified"; + + private static final Map MIME_TYPE_MAP = + Map.ofEntries( + Map.entry(".xml", "application/xml"), + Map.entry(".json", "application/json"), + Map.entry(".txt", "text/plain"), + Map.entry(".csv", "text/csv"), + Map.entry(".pdf", "application/pdf"), + Map.entry(".png", "image/png"), + Map.entry(".jpg", "image/jpeg"), + Map.entry(".jpeg", "image/jpeg"), + Map.entry(".gif", "image/gif"), + Map.entry(".html", "text/html"), + Map.entry(".htm", "text/html"), + Map.entry(".zip", "application/zip"), + Map.entry(".doc", "application/msword"), + Map.entry( + ".docx", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document"), + Map.entry(".xls", "application/vnd.ms-excel"), + Map.entry( + ".xlsx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), + Map.entry(".ppt", "application/vnd.ms-powerpoint"), + Map.entry( + ".pptx", + "application/vnd.openxmlformats-officedocument.presentationml.presentation"), + Map.entry(".svg", "image/svg+xml"), + Map.entry(".webp", "image/webp"), + Map.entry(".mp3", "audio/mpeg"), + Map.entry(".mp4", "video/mp4"), + Map.entry(".wav", "audio/wav"), + Map.entry(".avi", "video/x-msvideo"), + Map.entry(".tar", "application/x-tar"), + Map.entry(".gz", "application/gzip"), + Map.entry(".rar", "application/vnd.rar"), + Map.entry(".7z", "application/x-7z-compressed")); + + private static final String DEFAULT_MIME_TYPE = "application/octet-stream"; + + private static void fixCidSetIssues(PDDocument document) { + for (PDPage page : document.getPages()) { + PDResources resources = page.getResources(); + if (resources == null) continue; + + for (COSName fontName : resources.getFontNames()) { + try { + PDFont font = resources.getFont(fontName); + if (font == null) continue; + + PDFontDescriptor descriptor = font.getFontDescriptor(); + if (descriptor == null) continue; + + COSDictionary fontDict = descriptor.getCOSObject(); + + // Remove invalid or incomplete CIDSet entries for PDF/A-1 compliance + // PDF/A-1 requires CIDSet to be present and complete for subsetted CIDFonts + // For PDF/A-2+, CIDSet is optional but must be complete if present + COSBase cidSet = fontDict.getDictionaryObject(COSName.getPDFName("CIDSet")); + if (cidSet != null) { + // If CIDSet exists but may be invalid, remove it to avoid validation errors + // This is safer than trying to fix incomplete CIDSet streams + fontDict.removeItem(COSName.getPDFName("CIDSet")); + log.debug( + "Removed potentially invalid CIDSet from font {}", font.getName()); + } + } catch (Exception e) { + log.debug("Error processing CIDSet for font: {}", e.getMessage()); + } + } + } + } + private static void validateAndWarnPdfA(byte[] pdfBytes, PdfaProfile profile, String method) { Path tempPdfPath = null; try { @@ -562,7 +656,7 @@ public class ConvertPDFToPDFA { } } - private static void fixCidSetIssues(PDDocument document) throws IOException { + public static void fixType1FontCharSet(PDDocument document) throws IOException { for (PDPage page : document.getPages()) { PDResources resources = page.getResources(); if (resources == null) continue; @@ -572,24 +666,39 @@ public class ConvertPDFToPDFA { PDFont font = resources.getFont(fontName); if (font == null) continue; + String fontNameStr = font.getName(); + if (fontNameStr == null) continue; + PDFontDescriptor descriptor = font.getFontDescriptor(); if (descriptor == null) continue; - COSDictionary fontDict = descriptor.getCOSObject(); + // Check if this is a Type1 font + if (fontNameStr.contains("Type1") + || descriptor.getFontFile() != null + || (descriptor.getFontFile2() == null + && descriptor.getFontFile3() == null)) { - // Remove invalid or incomplete CIDSet entries for PDF/A-1 compliance - // PDF/A-1 requires CIDSet to be present and complete for subsetted CIDFonts - // For PDF/A-2+, CIDSet is optional but must be complete if present - COSBase cidSet = fontDict.getDictionaryObject(COSName.getPDFName("CIDSet")); - if (cidSet != null) { - // If CIDSet exists but may be invalid, remove it to avoid validation errors - // This is safer than trying to fix incomplete CIDSet streams - fontDict.removeItem(COSName.getPDFName("CIDSet")); - log.debug( - "Removed potentially invalid CIDSet from font {}", font.getName()); + String existingCharSet = + descriptor.getCOSObject().getString(COSName.CHAR_SET); + + String glyphSet = buildStandardType1GlyphSet(); + if (!glyphSet.isEmpty()) { + if (existingCharSet == null + || existingCharSet.trim().isEmpty() + || countGlyphs(existingCharSet) < countGlyphs(glyphSet)) { + descriptor.getCOSObject().setString(COSName.CHAR_SET, glyphSet); + log.debug( + "Fixed CharSet for Type1 font {} with {} glyphs (was: {})", + fontNameStr, + countGlyphs(glyphSet), + existingCharSet != null ? countGlyphs(existingCharSet) : 0); + } + } } } catch (Exception e) { - log.debug("Error processing CIDSet for font: {}", e.getMessage()); + log.warn( + "Error processing font descriptor for page resource: {}", + e.getMessage()); } } } @@ -694,108 +803,105 @@ public class ConvertPDFToPDFA { } } - private byte[] convertWithGhostscript(Path inputPdf, Path workingDir, PdfaProfile profile) - throws IOException, InterruptedException { - Path outputPdf = workingDir.resolve("gs_output.pdf"); - ColorProfiles colorProfiles = prepareColorProfiles(workingDir); - Path pdfaDefFile = createPdfaDefFile(workingDir, colorProfiles, profile); + private static int countGlyphs(String charSet) { + if (charSet == null || charSet.isEmpty()) return 0; + // CharSet format: /glyph1/glyph2/glyph3... + return (int) charSet.chars().filter(c -> c == '/').count(); + } - // Preprocess PDF for PDF/A compliance - Path preprocessedPdf = inputPdf; - - // For PDF/A-1, clean CIDSet issues that may cause validation failures - if (profile.getPart() == 1) { - Path cidSetCleaned = cleanCidSetWithQpdf(inputPdf); - if (cidSetCleaned != null) { - preprocessedPdf = cidSetCleaned; - } - } - - // Normalize PDF with qpdf before Ghostscript conversion to ensure proper font program - // handling - Path normalizedInputPdf = normalizePdfWithQpdf(preprocessedPdf); - Path inputForGs = (normalizedInputPdf != null) ? normalizedInputPdf : preprocessedPdf; - - try { - List command = - buildGhostscriptCommand( - inputForGs, outputPdf, colorProfiles, workingDir, profile, pdfaDefFile); - - ProcessExecutorResult result = - ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) - .runCommandWithOutputHandling(command); - - if (result.getRc() != 0) { - throw new IOException("Ghostscript exited with code " + result.getRc()); - } - - if (!Files.exists(outputPdf)) { - throw new IOException("Ghostscript did not produce an output file"); - } - - return Files.readAllBytes(outputPdf); - } finally { - // Clean up temporary files - if (normalizedInputPdf != null && !normalizedInputPdf.equals(preprocessedPdf)) { - try { - Files.deleteIfExists(normalizedInputPdf); - } catch (IOException e) { - log.debug("Failed to delete temporary normalized file", e); + private static void sanitizePdfA(COSBase base, int pdfaPart) { + if (base instanceof COSDictionary dict) { + if (pdfaPart == 3) { + COSName type = dict.getCOSName(COSName.TYPE); + if (COSName.FILESPEC.equals(type) || dict.containsKey(COSName.EF)) { + return; // Don't sanitize embedded file structures } } - if (preprocessedPdf != null && !preprocessedPdf.equals(inputPdf)) { - try { - Files.deleteIfExists(preprocessedPdf); - } catch (IOException e) { - log.debug("Failed to delete temporary CIDSet cleaned file", e); + + if (pdfaPart == 1) { + COSBase group = dict.getDictionaryObject(COSName.GROUP); + if (group instanceof COSDictionary gDict + && COSName.TRANSPARENCY.equals(gDict.getCOSName(COSName.S))) { + dict.removeItem(COSName.GROUP); } + + dict.removeItem(COSName.SMASK); + dict.removeItem(COSName.CA); + dict.removeItem(COSName.getPDFName("ca")); + } + + if (dict.containsKey(COSName.INTERPOLATE) + && dict.getBoolean(COSName.INTERPOLATE, true)) { + dict.setBoolean(COSName.INTERPOLATE, false); + } + + dict.removeItem(COSName.JAVA_SCRIPT); + dict.removeItem(COSName.getPDFName("JS")); + dict.removeItem(COSName.getPDFName("RichMedia")); + dict.removeItem(COSName.getPDFName("Movie")); + dict.removeItem(COSName.getPDFName("Sound")); + dict.removeItem(COSName.getPDFName("Launch")); + + if (pdfaPart != 3) { + dict.removeItem(COSName.URI); + } + dict.removeItem(COSName.getPDFName("GoToR")); + + if (pdfaPart != 3) { + dict.removeItem(COSName.EMBEDDED_FILES); + dict.removeItem(COSName.FILESPEC); + } + + for (Map.Entry entry : dict.entrySet()) { + if (pdfaPart == 3) { + COSName key = entry.getKey(); + if (COSName.EF.equals(key) + || COSName.EMBEDDED_FILES.equals(key) + || COSName.FILESPEC.equals(key) + || COSName.F.equals(key) + || COSName.UF.equals(key)) { + continue; // Don't recurse into embedded file content + } + } + sanitizePdfA(entry.getValue(), pdfaPart); + } + + } else if (base instanceof COSArray arr) { + for (COSBase item : arr) { + sanitizePdfA(item, pdfaPart); } } } - private static void fixType1FontCharSet(PDDocument document) throws IOException { - for (PDPage page : document.getPages()) { - PDResources resources = page.getResources(); - if (resources == null) continue; + private static void removeElementsForPdfA(PDDocument doc, int pdfaPart) { - for (COSName fontName : resources.getFontNames()) { - try { - PDFont font = resources.getFont(fontName); - if (font == null) continue; + if (pdfaPart == 1) { + doc.getDocumentCatalog().getCOSObject().removeItem(COSName.getPDFName("OCProperties")); + } - String fontNameStr = font.getName(); - if (fontNameStr == null) continue; + if (pdfaPart == 3) { + ensureEmbeddedFilesAFRelationship(doc); + } - PDFontDescriptor descriptor = font.getFontDescriptor(); - if (descriptor == null) continue; + for (PDPage page : doc.getPages()) { + if (pdfaPart == 1) { + page.setAnnotations(Collections.emptyList()); + } + PDResources res = page.getResources(); + sanitizePdfA(page.getCOSObject(), pdfaPart); - // Check if this is a Type1 font - if (fontNameStr.contains("Type1") - || descriptor.getFontFile() != null - || (descriptor.getFontFile2() == null - && descriptor.getFontFile3() == null)) { - - // Check if CharSet is missing or suspicious - String existingCharSet = - descriptor.getCOSObject().getString(COSName.CHAR_SET); - if (existingCharSet == null || existingCharSet.trim().isEmpty()) { - - // Build a CharSet from commonly used glyphs - // For Type1 fonts, include standard PDF glyphs - String glyphSet = buildStandardType1GlyphSet(); - if (!glyphSet.isEmpty()) { - descriptor.getCOSObject().setString(COSName.CHAR_SET, glyphSet); - log.debug( - "Fixed CharSet for Type1 font {} with {} glyphs", - fontNameStr, - glyphSet.split(" ").length); - } + if (res != null) { + for (COSName name : res.getXObjectNames()) { + try { + PDXObject xo = res.getXObject(name); + if (xo instanceof PDFormXObject form) { + sanitizePdfA(form.getCOSObject(), pdfaPart); + } else if (xo instanceof PDImageXObject img) { + sanitizePdfA(img.getCOSObject(), pdfaPart); } + } catch (IOException ioe) { + log.error("Cannot load XObject {}: {}", name.getName(), ioe.getMessage()); } - } catch (Exception e) { - log.warn( - "Error processing font descriptor for page resource: {}", - e.getMessage()); } } } @@ -1038,74 +1144,32 @@ public class ConvertPDFToPDFA { return baos.toByteArray(); } - private Path runLibreOfficeConversion(Path tempInputFile, int pdfaPart) throws Exception { - // Create temp output directory - Path tempOutputDir = Files.createTempDirectory("output_"); + private static void ensureEmbeddedFilesAFRelationship(PDDocument doc) { + PDDocumentCatalog catalog = doc.getDocumentCatalog(); + PDDocumentNameDictionary names = catalog.getNames(); + if (names == null) return; - // Determine PDF/A filter based on requested format - String pdfFilter = - pdfaPart == 2 - ? "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"2\"}}" - : "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"1\"}}"; + PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles(); + if (embeddedFiles == null) return; - Path libreOfficeProfile = Files.createTempDirectory("libreoffice_profile_"); try { - // Prepare LibreOffice command - List command = - new ArrayList<>( - Arrays.asList( - runtimePathConfig.getSOfficePath(), - "-env:UserInstallation=" - + libreOfficeProfile.toUri().toString(), - "--headless", - "--nologo", - "--convert-to", - pdfFilter, - "--outdir", - tempOutputDir.toString(), - tempInputFile.toString())); - - ProcessExecutorResult returnCode = - ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE) - .runCommandWithOutputHandling(command); - - if (returnCode.getRc() != 0) { - log.error("PDF/A conversion failed with return code: {}", returnCode.getRc()); - throw ExceptionUtils.createPdfaConversionFailedException(); - } - } finally { - FileUtils.deleteQuietly(libreOfficeProfile.toFile()); + processEmbeddedFilesForAFRelationship(embeddedFiles); + } catch (IOException e) { + log.warn("Could not process embedded files AFRelationship: {}", e.getMessage()); } - - // Get the output file - File[] outputFiles = tempOutputDir.toFile().listFiles(); - if (outputFiles == null || outputFiles.length != 1) { - throw ExceptionUtils.createPdfaConversionFailedException(); - } - return outputFiles[0].toPath(); } - private byte[] convertWithGhostscriptX(Path inputPdf, Path workingDir, PdfXProfile profile) - throws IOException, InterruptedException { - Path outputPdf = workingDir.resolve("gs_output_pdfx.pdf"); - ColorProfiles colorProfiles = prepareColorProfiles(workingDir); + private static void processEmbeddedFilesForAFRelationship( + PDEmbeddedFilesNameTreeNode embeddedFiles) throws IOException { + Map fileSpecs = embeddedFiles.getNames(); + if (fileSpecs == null) return; - List command = - buildGhostscriptCommandX(inputPdf, outputPdf, colorProfiles, workingDir, profile); - - ProcessExecutorResult result = - ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) - .runCommandWithOutputHandling(command); - - if (result.getRc() != 0) { - throw new IOException("Ghostscript exited with code " + result.getRc()); + for (PDComplexFileSpecification fileSpec : fileSpecs.values()) { + COSDictionary fileSpecDict = fileSpec.getCOSObject(); + if (!fileSpecDict.containsKey(COS_AF_RELATIONSHIP)) { + fileSpecDict.setName(COS_AF_RELATIONSHIP, AF_RELATIONSHIP_UNSPECIFIED); + } } - - if (!Files.exists(outputPdf)) { - throw new IOException("Ghostscript did not produce an output file"); - } - - return Files.readAllBytes(outputPdf); } private static boolean isTransparencyGroup(COSDictionary dict) { @@ -1139,71 +1203,156 @@ public class ConvertPDFToPDFA { return false; } - private static void sanitizePdfA(COSBase base, int pdfaPart) { - if (base instanceof COSDictionary dict) { - if (pdfaPart == 1) { - COSBase group = dict.getDictionaryObject(COSName.GROUP); - if (group instanceof COSDictionary gDict - && COSName.TRANSPARENCY.equals(gDict.getCOSName(COSName.S))) { - dict.removeItem(COSName.GROUP); + private static File preProcessHighlights(File inputPdf) throws Exception { + + try (PDDocument document = Loader.loadPDF(inputPdf)) { + + for (PDPage page : document.getPages()) { + List annotations = page.getAnnotations(); + for (PDAnnotation annot : annotations) { + if (ANNOTATION_HIGHLIGHT.equals(annot.getSubtype()) + && annot instanceof PDAnnotationTextMarkup highlight) { + float[] colorComponents = + highlight.getColor() != null + ? highlight.getColor().getComponents() + : new float[] {1f, 1f, 0f}; + Color highlightColor = + new Color( + colorComponents[0], colorComponents[1], colorComponents[2]); + + float[] quadPoints = highlight.getQuadPoints(); + if (quadPoints != null) { + try (PDPageContentStream cs = + new PDPageContentStream( + document, + page, + PDPageContentStream.AppendMode.PREPEND, + true, + true)) { + + cs.setStrokingColor(highlightColor); + cs.setLineWidth(0.05f); + float spacing = 2f; + for (int i = 0; i < quadPoints.length; i += 8) { + float minX = + Math.min( + Math.min(quadPoints[i], quadPoints[i + 2]), + Math.min(quadPoints[i + 4], quadPoints[i + 6])); + float maxX = + Math.max( + Math.max(quadPoints[i], quadPoints[i + 2]), + Math.max(quadPoints[i + 4], quadPoints[i + 6])); + float minY = + Math.min( + Math.min(quadPoints[i + 1], quadPoints[i + 3]), + Math.min(quadPoints[i + 5], quadPoints[i + 7])); + float maxY = + Math.max( + Math.max(quadPoints[i + 1], quadPoints[i + 3]), + Math.max(quadPoints[i + 5], quadPoints[i + 7])); + + float width = maxX - minX; + float height = maxY - minY; + + for (float y = minY; y <= maxY; y += spacing) { + float len = Math.min(width, maxY - y); + cs.moveTo(minX, y); + cs.lineTo(minX + len, y + len); + } + for (float x = minX + spacing; x <= maxX; x += spacing) { + float len = Math.min(maxX - x, height); + cs.moveTo(x, minY); + cs.lineTo(x + len, minY + len); + } + } + + cs.stroke(); + } + } + + page.getAnnotations().remove(highlight); + COSDictionary pageDict = page.getCOSObject(); + + if (pageDict.containsKey(COSName.GROUP)) { + COSDictionary groupDict = + (COSDictionary) pageDict.getDictionaryObject(COSName.GROUP); + + if (groupDict != null + && COSName.TRANSPARENCY + .getName() + .equalsIgnoreCase( + groupDict.getNameAsString(COSName.S))) { + pageDict.removeItem(COSName.GROUP); + } + } + } + } + } + // Save the modified document to a temporary file. + File preProcessedFile = Files.createTempFile("preprocessed_", ".pdf").toFile(); + document.save(preProcessedFile); + return preProcessedFile; + } + } + + private static void sanitizeFontResources(PDDocument doc) throws IOException { + for (PDPage page : doc.getPages()) { + PDResources res = page.getResources(); + if (res == null) continue; + + for (COSName fontName : res.getFontNames()) { + PDFont font = res.getFont(fontName); + if (font == null) continue; + + PDFontDescriptor desc = font.getFontDescriptor(); + if (desc == null) continue; + + COSDictionary descDict = desc.getCOSObject(); + + if (descDict.containsKey(COSName.getPDFName("CIDSet"))) { + descDict.removeItem(COSName.getPDFName("CIDSet")); } - dict.removeItem(COSName.SMASK); - dict.removeItem(COSName.CA); - dict.removeItem(COSName.getPDFName("ca")); - } - - if (dict.containsKey(COSName.INTERPOLATE) - && dict.getBoolean(COSName.INTERPOLATE, true)) { - dict.setBoolean(COSName.INTERPOLATE, false); - } - - dict.removeItem(COSName.JAVA_SCRIPT); - dict.removeItem(COSName.getPDFName("JS")); - dict.removeItem(COSName.getPDFName("RichMedia")); - dict.removeItem(COSName.getPDFName("Movie")); - dict.removeItem(COSName.getPDFName("Sound")); - dict.removeItem(COSName.getPDFName("Launch")); - dict.removeItem(COSName.URI); - dict.removeItem(COSName.getPDFName("GoToR")); - dict.removeItem(COSName.EMBEDDED_FILES); - dict.removeItem(COSName.FILESPEC); - - for (Map.Entry entry : dict.entrySet()) { - sanitizePdfA(entry.getValue(), pdfaPart); - } - - } else if (base instanceof COSArray arr) { - for (COSBase item : arr) { - sanitizePdfA(item, pdfaPart); + if (isType1Font(font)) { + if (descDict.containsKey(COSName.CHAR_SET)) { + String existingCharSet = descDict.getString(COSName.CHAR_SET); + if (existingCharSet == null + || existingCharSet.trim().isEmpty() + || "/.notdef".equals(existingCharSet)) { + descDict.removeItem(COSName.CHAR_SET); + log.debug( + "Removed invalid CharSet from Type 1 font: {}", font.getName()); + } + } + } } } } - private static void removeElementsForPdfA(PDDocument doc, int pdfaPart) { + private static boolean isType1Font(PDFont font) { + return font instanceof PDType1Font || font instanceof PDType1CFont; + } - if (pdfaPart == 1) { - doc.getDocumentCatalog().getCOSObject().removeItem(COSName.getPDFName("OCProperties")); - } + private static void fixOptionalContentGroups(PDDocument doc) { + PDDocumentCatalog catalog = doc.getDocumentCatalog(); + PDOptionalContentProperties ocProps = catalog.getOCProperties(); - for (PDPage page : doc.getPages()) { - if (pdfaPart == 1) { - page.setAnnotations(Collections.emptyList()); - } - PDResources res = page.getResources(); - sanitizePdfA(page.getCOSObject(), pdfaPart); + if (ocProps == null) return; - if (res != null) { - for (COSName name : res.getXObjectNames()) { - try { - PDXObject xo = res.getXObject(name); - if (xo instanceof PDFormXObject form) { - sanitizePdfA(form.getCOSObject(), pdfaPart); - } else if (xo instanceof PDImageXObject img) { - sanitizePdfA(img.getCOSObject(), pdfaPart); - } - } catch (IOException ioe) { - log.error("Cannot load XObject {}: {}", name.getName(), ioe.getMessage()); + COSBase ocPropsBase = + catalog.getCOSObject().getDictionaryObject(COSName.getPDFName("OCProperties")); + if (!(ocPropsBase instanceof COSDictionary ocPropsDict)) return; + COSBase ocgs = ocPropsDict.getDictionaryObject(COSName.OCGS); + + if (ocgs instanceof COSArray ocgArray) { + int unnamedCount = 1; + + for (COSBase base : ocgArray) { + if (base instanceof COSDictionary ocgDict) { + if (!ocgDict.containsKey(COSName.NAME)) { + String newName = "Layer " + unnamedCount++; + ocgDict.setString(COSName.NAME, newName); + log.debug("Fixed OCG missing name, set to: {}", newName); } } } @@ -1323,151 +1472,117 @@ public class ConvertPDFToPDFA { document.getDocumentCatalog().setMetadata(newMetadata); } - private static File preProcessHighlights(File inputPdf) throws Exception { + private byte[] convertWithGhostscript(Path inputPdf, Path workingDir, PdfaProfile profile) + throws IOException, InterruptedException { + Path outputPdf = workingDir.resolve("gs_output.pdf"); + ColorProfiles colorProfiles = prepareColorProfiles(workingDir); + Path pdfaDefFile = createPdfaDefFile(workingDir, colorProfiles, profile); - try (PDDocument document = Loader.loadPDF(inputPdf)) { + // Preprocess PDF for PDF/A compliance using the sanitizer + Path sanitizedInputPdf = sanitizePdfWithPdfBox(inputPdf); + Path preprocessedPdf = sanitizedInputPdf != null ? sanitizedInputPdf : inputPdf; - for (PDPage page : document.getPages()) { - List annotations = page.getAnnotations(); - for (PDAnnotation annot : annotations) { - if ("Highlight".equals(annot.getSubtype()) - && annot instanceof PDAnnotationTextMarkup highlight) { - float[] colorComponents = - highlight.getColor() != null - ? highlight.getColor().getComponents() - : new float[] {1f, 1f, 0f}; - Color highlightColor = - new Color( - colorComponents[0], colorComponents[1], colorComponents[2]); + // For PDF/A-1, clean CIDSet issues that may cause validation failures + if (profile.getPart() == 1) { + Path cidSetCleaned = cleanCidSetWithQpdf(preprocessedPdf); + if (cidSetCleaned != null) { + preprocessedPdf = cidSetCleaned; + } + } - float[] quadPoints = highlight.getQuadPoints(); - if (quadPoints != null) { - try (PDPageContentStream cs = - new PDPageContentStream( - document, - page, - PDPageContentStream.AppendMode.PREPEND, - true, - true)) { + // Normalize PDF with qpdf before Ghostscript conversion to ensure proper font program + // handling + Path normalizedInputPdf = normalizePdfWithQpdf(preprocessedPdf); + Path inputForGs = (normalizedInputPdf != null) ? normalizedInputPdf : preprocessedPdf; - cs.setStrokingColor(highlightColor); - cs.setLineWidth(0.05f); - float spacing = 2f; - for (int i = 0; i < quadPoints.length; i += 8) { - float minX = - Math.min( - Math.min(quadPoints[i], quadPoints[i + 2]), - Math.min(quadPoints[i + 4], quadPoints[i + 6])); - float maxX = - Math.max( - Math.max(quadPoints[i], quadPoints[i + 2]), - Math.max(quadPoints[i + 4], quadPoints[i + 6])); - float minY = - Math.min( - Math.min(quadPoints[i + 1], quadPoints[i + 3]), - Math.min(quadPoints[i + 5], quadPoints[i + 7])); - float maxY = - Math.max( - Math.max(quadPoints[i + 1], quadPoints[i + 3]), - Math.max(quadPoints[i + 5], quadPoints[i + 7])); + try { + List command = + buildGhostscriptCommand( + inputForGs, outputPdf, colorProfiles, workingDir, profile, pdfaDefFile); - float width = maxX - minX; - float height = maxY - minY; + ProcessExecutorResult result = + ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) + .runCommandWithOutputHandling(command); - for (float y = minY; y <= maxY; y += spacing) { - float len = Math.min(width, maxY - y); - cs.moveTo(minX, y); - cs.lineTo(minX + len, y + len); - } - for (float x = minX + spacing; x <= maxX; x += spacing) { - float len = Math.min(maxX - x, height); - cs.moveTo(x, minY); - cs.lineTo(x + len, minY + len); - } - } + if (result.getRc() != 0) { + throw new IOException("Ghostscript exited with code " + result.getRc()); + } - cs.stroke(); - } - } + if (!Files.exists(outputPdf)) { + throw new IOException("Ghostscript did not produce an output file"); + } - page.getAnnotations().remove(highlight); - COSDictionary pageDict = page.getCOSObject(); - - if (pageDict.containsKey(COSName.GROUP)) { - COSDictionary groupDict = - (COSDictionary) pageDict.getDictionaryObject(COSName.GROUP); - - if (groupDict != null - && COSName.TRANSPARENCY - .getName() - .equalsIgnoreCase( - groupDict.getNameAsString(COSName.S))) { - pageDict.removeItem(COSName.GROUP); - } - } - } + return Files.readAllBytes(outputPdf); + } finally { + // Clean up temporary files + if (normalizedInputPdf != null && !normalizedInputPdf.equals(preprocessedPdf)) { + try { + Files.deleteIfExists(normalizedInputPdf); + } catch (IOException e) { + log.debug("Failed to delete temporary normalized file", e); + } + } + if (preprocessedPdf != null && !preprocessedPdf.equals(inputPdf)) { + try { + Files.deleteIfExists(preprocessedPdf); + } catch (IOException e) { + log.debug("Failed to delete temporary sanitized or CIDSet cleaned file", e); + } + } + if (sanitizedInputPdf != null && !sanitizedInputPdf.equals(inputPdf)) { + try { + Files.deleteIfExists(sanitizedInputPdf); + } catch (IOException e) { + log.debug("Failed to delete temporary sanitized file", e); } } - // Save the modified document to a temporary file. - File preProcessedFile = Files.createTempFile("preprocessed_", ".pdf").toFile(); - document.save(preProcessedFile); - return preProcessedFile; } } - private ResponseEntity handlePdfAConversion( - MultipartFile inputFile, String outputFormat) throws Exception { - PdfaProfile profile = PdfaProfile.fromRequest(outputFormat); + private Path runLibreOfficeConversion(Path tempInputFile, int pdfaPart) throws Exception { + // Create temp output directory + Path tempOutputDir = Files.createTempDirectory("output_"); - // Get the original filename without extension - String originalFileName = Filenames.toSimpleFileName(inputFile.getOriginalFilename()); - if (originalFileName == null || originalFileName.trim().isEmpty()) { - originalFileName = "output.pdf"; - } - String baseFileName = - originalFileName.contains(".") - ? originalFileName.substring(0, originalFileName.lastIndexOf('.')) - : originalFileName; - - Path workingDir = Files.createTempDirectory("pdfa_conversion_"); - Path inputPath = workingDir.resolve("input.pdf"); - inputFile.transferTo(inputPath); + // Determine PDF/A filter based on requested format + String pdfFilter = + pdfaPart == 2 + ? "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"2\"}}" + : "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"1\"}}"; + Path libreOfficeProfile = Files.createTempDirectory("libreoffice_profile_"); try { - byte[] converted; + // Prepare LibreOffice command + List command = + new ArrayList<>( + Arrays.asList( + runtimePathConfig.getSOfficePath(), + "-env:UserInstallation=" + libreOfficeProfile.toUri(), + "--headless", + "--nologo", + "--convert-to", + pdfFilter, + "--outdir", + tempOutputDir.toString(), + tempInputFile.toString())); - // Try Ghostscript first (preferred method) - if (isGhostscriptAvailable()) { - log.info("Using Ghostscript for PDF/A conversion to {}", profile.getDisplayName()); - try { - converted = convertWithGhostscript(inputPath, workingDir, profile); - String outputFilename = baseFileName + profile.outputSuffix(); + ProcessExecutorResult returnCode = + ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE) + .runCommandWithOutputHandling(command); - validateAndWarnPdfA(converted, profile, "Ghostscript"); - - return WebResponseUtils.bytesToWebResponse( - converted, outputFilename, MediaType.APPLICATION_PDF); - } catch (Exception e) { - log.warn( - "Ghostscript conversion failed, falling back to PDFBox/LibreOffice method", - e); - } - } else { - log.info("Ghostscript not available, using PDFBox/LibreOffice fallback method"); + if (returnCode.getRc() != 0) { + log.error("PDF/A conversion failed with return code: {}", returnCode.getRc()); + throw ExceptionUtils.createPdfaConversionFailedException(); } - - converted = convertWithPdfBoxMethod(inputPath, profile); - String outputFilename = baseFileName + profile.outputSuffix(); - - // Validate with PDFBox preflight and warn if issues found - validateAndWarnPdfA(converted, profile, "PDFBox/LibreOffice"); - - return WebResponseUtils.bytesToWebResponse( - converted, outputFilename, MediaType.APPLICATION_PDF); - } finally { - deleteQuietly(workingDir); + FileUtils.deleteQuietly(libreOfficeProfile.toFile()); } + + // Get the output file + File[] outputFiles = tempOutputDir.toFile().listFiles(); + if (outputFiles == null || outputFiles.length != 1) { + throw ExceptionUtils.createPdfaConversionFailedException(); + } + return outputFiles[0].toPath(); } private Path normalizePdfWithQpdf(Path inputPdf) { @@ -1599,6 +1714,415 @@ public class ConvertPDFToPDFA { } } + private byte[] convertWithGhostscriptX(Path inputPdf, Path workingDir, PdfXProfile profile) + throws IOException, InterruptedException { + Path outputPdf = workingDir.resolve("gs_output_pdfx.pdf"); + ColorProfiles colorProfiles = prepareColorProfiles(workingDir); + + // Sanitize the PDF before PDF/X conversion for better Ghostscript compatibility + Path sanitizedInputPdf = sanitizePdfWithPdfBox(inputPdf); + Path inputForGs = sanitizedInputPdf != null ? sanitizedInputPdf : inputPdf; + + List command = + buildGhostscriptCommandX(inputForGs, outputPdf, colorProfiles, workingDir, profile); + + ProcessExecutorResult result = + ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) + .runCommandWithOutputHandling(command); + + if (result.getRc() != 0) { + throw new IOException("Ghostscript exited with code " + result.getRc()); + } + + if (!Files.exists(outputPdf)) { + throw new IOException("Ghostscript did not produce an output file"); + } + + return Files.readAllBytes(outputPdf); + } + + private ResponseEntity handlePdfAConversion( + MultipartFile inputFile, String outputFormat) throws Exception { + PdfaProfile profile = PdfaProfile.fromRequest(outputFormat); + + // Get the original filename without extension + String originalFileName = Filenames.toSimpleFileName(inputFile.getOriginalFilename()); + if (originalFileName == null || originalFileName.trim().isEmpty()) { + originalFileName = "output.pdf"; + } + String baseFileName = + originalFileName.contains(".") + ? originalFileName.substring(0, originalFileName.lastIndexOf('.')) + : originalFileName; + + Path workingDir = Files.createTempDirectory("pdfa_conversion_"); + Path inputPath = workingDir.resolve("input.pdf"); + inputFile.transferTo(inputPath); + + try { + byte[] converted; + + // Try Ghostscript first (preferred method) + if (isGhostscriptAvailable()) { + log.info("Using Ghostscript for PDF/A conversion to {}", profile.getDisplayName()); + try { + converted = convertWithGhostscript(inputPath, workingDir, profile); + String outputFilename = baseFileName + profile.outputSuffix(); + + validateAndWarnPdfA(converted, profile, "Ghostscript"); + + return WebResponseUtils.bytesToWebResponse( + converted, outputFilename, MediaType.APPLICATION_PDF); + } catch (IOException | InterruptedException e) { + log.warn( + "Ghostscript conversion failed, falling back to PDFBox/LibreOffice method", + e); + } + } else { + log.info("Ghostscript not available, using PDFBox/LibreOffice fallback method"); + } + + converted = convertWithPdfBoxMethod(inputPath, profile); + String outputFilename = baseFileName + profile.outputSuffix(); + + // Validate with PDFBox preflight and warn if issues found + validateAndWarnPdfA(converted, profile, "PDFBox/LibreOffice"); + + return WebResponseUtils.bytesToWebResponse( + converted, outputFilename, MediaType.APPLICATION_PDF); + + } finally { + deleteQuietly(workingDir); + } + } + + private Path sanitizePdfWithPdfBox(Path inputPdf) { + try { + Path sanitizedPath = + inputPdf.getParent().resolve("sanitized_" + inputPdf.getFileName().toString()); + + sanitizeDocument(inputPdf, sanitizedPath); + + log.info("PDF sanitized with PDFBox for better Ghostscript compatibility"); + return sanitizedPath; + } catch (IOException e) { + log.warn( + "PDF sanitization I/O error, proceeding with original file: {}", + e.getMessage()); + return null; + } + } + + private void sanitizeDocument(Path inputPath, Path outputPath) throws IOException { + try (PDDocument doc = Loader.loadPDF(inputPath.toFile())) { + Map sanitizers = new LinkedHashMap<>(); + sanitizers.put("Flatten highlight annotations", this::flattenHighlightsToContent); + sanitizers.put("Sanitize font resources", ConvertPDFToPDFA::sanitizeFontResources); + sanitizers.put("Clean metadata", this::sanitizeMetadata); + sanitizers.put("Remove forbidden actions", this::removeForbiddenActions); + sanitizers.put("Ensure annotation appearances", this::ensureAnnotationAppearances); + sanitizers.put("Ensure embedded file compliance", this::ensureEmbeddedFileCompliance); + sanitizers.put( + "Fix optional content groups", ConvertPDFToPDFA::fixOptionalContentGroups); + + for (Map.Entry entry : sanitizers.entrySet()) { + try { + entry.getValue().sanitize(doc); + log.debug("Sanitization step completed: {}", entry.getKey()); + } catch (Exception e) { + log.warn( + "Sanitization step '{}' failed, continuing: {}", + entry.getKey(), + e.getMessage()); + } + } + + doc.save(outputPath.toFile()); + } + } + + private void flattenHighlightsToContent(PDDocument doc) throws IOException { + for (PDPage page : doc.getPages()) { + List annotations = new ArrayList<>(page.getAnnotations()); + List toRemove = new ArrayList<>(); + + try (PDPageContentStream cs = + new PDPageContentStream( + doc, page, PDPageContentStream.AppendMode.PREPEND, true, true)) { + + for (PDAnnotation annot : annotations) { + if (annot instanceof PDAnnotationTextMarkup highlight + && ANNOTATION_HIGHLIGHT.equals(annot.getSubtype())) { + + PDColor color = highlight.getColor(); + if (color != null) { + cs.setNonStrokingColor(color); + } else { + cs.setNonStrokingColor(Color.YELLOW); + } + + float[] quads = highlight.getQuadPoints(); + if (!isValidQuadPoints(quads)) { + log.warn( + "Invalid quad points array for highlight annotation: {}", + quads != null ? "length=" + quads.length : "null"); + continue; + } + + for (int i = 0; i <= quads.length - 8; i += 8) { + float minX = Float.MAX_VALUE, minY = Float.MAX_VALUE; + float maxX = -Float.MAX_VALUE, maxY = -Float.MAX_VALUE; + + for (int j = 0; j < 8; j += 2) { + float x = quads[i + j]; + float y = quads[i + j + 1]; + minX = Math.min(minX, x); + maxX = Math.max(maxX, x); + minY = Math.min(minY, y); + maxY = Math.max(maxY, y); + } + + // Only draw if we have a valid rectangle + float width = maxX - minX; + float height = maxY - minY; + if (width > 0 && height > 0) { + cs.addRect(minX, minY, width, height); + cs.fill(); + } + } + toRemove.add(annot); + } + } + } + page.getAnnotations().removeAll(toRemove); + } + } + + private boolean isValidQuadPoints(float[] quads) { + return quads != null && quads.length >= 8 && quads.length % 8 == 0; + } + + private void sanitizeMetadata(PDDocument doc) { + PDDocumentInformation info = doc.getDocumentInformation(); + if (info == null) { + info = new PDDocumentInformation(); + doc.setDocumentInformation(info); + } + + Set keys = info.getMetadataKeys(); + if (keys != null) { // Add null check + for (String key : + new HashSet<>(keys)) { // Copy to avoid ConcurrentModificationException + String value = info.getCustomMetadataValue(key); + if (value != null) { + String clean = NON_PRINTABLE_ASCII.matcher(value).replaceAll(""); + info.setCustomMetadataValue(key, clean); + } + } + } + + info.setProducer("Stirling-PDF Sanitizer"); + } + + private void removeForbiddenActions(PDDocument doc) { + doc.getDocumentCatalog().setOpenAction(null); + doc.getDocumentCatalog().getCOSObject().removeItem(COSName.JAVA_SCRIPT); + } + + private void ensureAnnotationAppearances(PDDocument doc) throws IOException { + for (PDPage page : doc.getPages()) { + List annotations = page.getAnnotations(); + List toRemove = new ArrayList<>(); + + for (PDAnnotation annot : annotations) { + String subtype = annot.getSubtype(); + + if (ANNOTATION_POPUP.equals(subtype) || ANNOTATION_LINK.equals(subtype)) { + continue; + } + + PDRectangle rect = annot.getRectangle(); + if (rect != null && isZeroSizeRect(rect)) { + continue; + } + + PDAppearanceDictionary appearanceDict = annot.getAppearance(); + if (appearanceDict == null || appearanceDict.getNormalAppearance() == null) { + if (!tryGenerateAppearance(doc, page, annot)) { + log.warn("Removing annotation without appearance: {} on page", subtype); + toRemove.add(annot); + } + } + } + + if (!toRemove.isEmpty()) { + annotations.removeAll(toRemove); + } + } + } + + private boolean isZeroSizeRect(PDRectangle rect) { + return Float.compare(rect.getLowerLeftX(), rect.getUpperRightX()) == 0 + && Float.compare(rect.getLowerLeftY(), rect.getUpperRightY()) == 0; + } + + private boolean tryGenerateAppearance(PDDocument doc, PDPage page, PDAnnotation annot) { + try { + if (annot instanceof PDAnnotationWidget) { + annot.constructAppearances(); + return annot.getAppearance() != null; + } + + if (annot instanceof PDAnnotationTextMarkup) { + return false; // Will be handled by flattening + } + + annot.constructAppearances(); + return annot.getAppearance() != null; + + } catch (Exception e) { + log.debug("Could not generate appearance for annotation: {}", e.getMessage()); + return false; + } + } + + public void ensureEmbeddedFileCompliance(PDDocument doc) { + PDDocumentCatalog catalog = doc.getDocumentCatalog(); + PDDocumentNameDictionary names = catalog.getNames(); + if (names == null) return; + + PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles(); + if (embeddedFiles == null) return; + + try { + Map fileSpecs = embeddedFiles.getNames(); + if (fileSpecs == null || fileSpecs.isEmpty()) return; + + COSArray afArray = new COSArray(); + if (catalog.getCOSObject().containsKey(COS_AF)) { + try { + afArray = (COSArray) catalog.getCOSObject().getDictionaryObject(COS_AF); + } catch (Exception e) { + afArray = new COSArray(); + } + } + + boolean afArrayModified = false; + + for (Map.Entry entry : fileSpecs.entrySet()) { + String fileName = entry.getKey(); + PDComplexFileSpecification fileSpec = entry.getValue(); + COSDictionary fileSpecDict = fileSpec.getCOSObject(); + + if (!fileSpecDict.containsKey(COS_AF_RELATIONSHIP)) { + fileSpecDict.setName(COS_AF_RELATIONSHIP, AF_RELATIONSHIP_UNSPECIFIED); + log.debug("Added AFRelationship 'Unspecified' to embedded file: {}", fileName); + } + + if (fileSpec.getFile() == null || fileSpec.getFile().isEmpty()) { + fileSpec.setFile(fileName); + } + if (!fileSpecDict.containsKey(COS_UF)) { + fileSpecDict.setString(COS_UF, fileName); + } + + ensureEmbeddedFileMimeType(fileSpec, fileName); + + boolean alreadyInAf = false; + for (int i = 0; i < afArray.size(); i++) { + if (afArray.getObject(i) == fileSpecDict) { + alreadyInAf = true; + break; + } + } + + if (!alreadyInAf) { + afArray.add(fileSpecDict); + afArrayModified = true; + } + } + + if (afArrayModified) { + catalog.getCOSObject().setItem(COS_AF, afArray); + log.debug( + "Updated Document Catalog 'AF' array with {} associated files", + afArray.size()); + } + + } catch (IOException e) { + log.warn("Could not process embedded files for PDF/A-3 compliance: {}", e.getMessage()); + } + } + + private void ensureEmbeddedFileMimeType(PDComplexFileSpecification fileSpec, String fileName) { + PDEmbeddedFile embeddedFile = fileSpec.getEmbeddedFileUnicode(); + if (embeddedFile == null) { + embeddedFile = fileSpec.getEmbeddedFile(); + } + + if (embeddedFile != null) { + String currentSubtype = embeddedFile.getSubtype(); + if (currentSubtype == null || currentSubtype.isEmpty()) { + String mimeType = detectMimeTypeFromFilename(fileName); + embeddedFile.setSubtype(mimeType); + log.debug("Set MIME type '{}' for embedded file: {}", mimeType, fileName); + } + } + } + + private String detectMimeTypeFromFilename(String fileName) { + if (fileName == null || fileName.isEmpty()) { + return DEFAULT_MIME_TYPE; + } + + String lowerName = fileName.toLowerCase(Locale.ROOT); + + return MIME_TYPE_MAP.entrySet().stream() + .filter(entry -> lowerName.endsWith(entry.getKey())) + .map(Map.Entry::getValue) + .findFirst() + .orElse(DEFAULT_MIME_TYPE); + } + + public byte[] convertPDDocumentToPDFA(PDDocument document, String outputFormat) + throws IOException { + PdfaProfile profile = PdfaProfile.fromRequest(outputFormat); + + Path workingDir = Files.createTempDirectory("pdfa_conversion_"); + Path inputPath = workingDir.resolve("input.pdf"); + + try { + document.save(inputPath.toFile()); + + if (isGhostscriptAvailable()) { + log.info("Using Ghostscript for PDF/A conversion to {}", profile.getDisplayName()); + try { + byte[] converted = convertWithGhostscript(inputPath, workingDir, profile); + validateAndWarnPdfA(converted, profile, "Ghostscript"); + return converted; + } catch (IOException | InterruptedException e) { + log.warn( + "Ghostscript conversion failed, falling back to PDFBox/LibreOffice method", + e); + } + } else { + log.info("Ghostscript not available, using PDFBox/LibreOffice fallback method"); + } + + byte[] converted; + try { + converted = convertWithPdfBoxMethod(inputPath, profile); + } catch (Exception e) { + throw new IOException("PDF/A conversion failed", e); + } + validateAndWarnPdfA(converted, profile, "PDFBox/LibreOffice"); + return converted; + + } finally { + deleteQuietly(workingDir); + } + } + private void copyResourceIcc(Path target) throws IOException { try (InputStream in = getClass().getResourceAsStream(ICC_RESOURCE_PATH)) { if (in == null) { @@ -1730,5 +2254,10 @@ public class ConvertPDFToPDFA { } } + @FunctionalInterface + private interface DocumentSanitizer { + void sanitize(PDDocument doc) throws IOException; + } + private record ColorProfiles(Path rgb, Path gray) {} } diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/AttachmentController.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/AttachmentController.java index a356582c3..197a9f52d 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/AttachmentController.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/AttachmentController.java @@ -1,5 +1,6 @@ package stirling.software.SPDF.controller.api.misc; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.List; import java.util.Optional; @@ -17,8 +18,12 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import stirling.software.SPDF.config.swagger.StandardPdfResponse; +import stirling.software.SPDF.controller.api.converters.ConvertPDFToPDFA; import stirling.software.SPDF.model.api.misc.AddAttachmentRequest; +import stirling.software.SPDF.model.api.misc.DeleteAttachmentRequest; import stirling.software.SPDF.model.api.misc.ExtractAttachmentsRequest; +import stirling.software.SPDF.model.api.misc.ListAttachmentsRequest; +import stirling.software.SPDF.model.api.misc.RenameAttachmentRequest; import stirling.software.SPDF.service.AttachmentServiceInterface; import stirling.software.common.annotations.AutoJobPostMapping; import stirling.software.common.annotations.api.MiscApi; @@ -36,6 +41,8 @@ public class AttachmentController { private final AttachmentServiceInterface pdfAttachmentService; + private final ConvertPDFToPDFA convertPDFToPDFA; + @AutoJobPostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/add-attachments") @StandardPdfResponse @Operation( @@ -43,19 +50,87 @@ public class AttachmentController { description = "This endpoint adds attachments to a PDF. Input:PDF, Output:PDF Type:MISO") public ResponseEntity addAttachments(@ModelAttribute AddAttachmentRequest request) - throws IOException { + throws Exception { MultipartFile fileInput = request.getFileInput(); List attachments = request.getAttachments(); + boolean convertToPdfA3b = request.isConvertToPdfA3b(); - PDDocument document = - pdfAttachmentService.addAttachment( - pdfDocumentFactory.load(fileInput, false), attachments); + validateAttachmentRequest(attachments); - return WebResponseUtils.pdfDocToWebResponse( - document, - GeneralUtils.generateFilename( - Filenames.toSimpleFileName(fileInput.getOriginalFilename()), - "_with_attachments.pdf")); + String originalFileName = Filenames.toSimpleFileName(fileInput.getOriginalFilename()); + if (originalFileName == null || originalFileName.isEmpty()) { + originalFileName = "document"; + } + String baseFileName = + originalFileName.contains(".") + ? originalFileName.substring(0, originalFileName.lastIndexOf('.')) + : originalFileName; + + if (convertToPdfA3b) { + byte[] pdfaBytes; + try (PDDocument document = pdfDocumentFactory.load(request, false)) { + pdfaBytes = convertPDFToPDFA.convertPDDocumentToPDFA(document, "pdfa-3b"); + } + + try (PDDocument pdfaDocument = org.apache.pdfbox.Loader.loadPDF(pdfaBytes)) { + pdfAttachmentService.addAttachment(pdfaDocument, attachments); + + convertPDFToPDFA.ensureEmbeddedFileCompliance(pdfaDocument); + + ConvertPDFToPDFA.fixType1FontCharSet(pdfaDocument); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + pdfaDocument.save(baos); + byte[] resultBytes = baos.toByteArray(); + + String outputFilename = baseFileName + "_with_attachments_PDFA-3b.pdf"; + return WebResponseUtils.bytesToWebResponse( + resultBytes, outputFilename, MediaType.APPLICATION_PDF); + } + } else { + try (PDDocument document = pdfDocumentFactory.load(request, false)) { + pdfAttachmentService.addAttachment(document, attachments); + return WebResponseUtils.pdfDocToWebResponse( + document, + GeneralUtils.generateFilename( + Filenames.toSimpleFileName(fileInput.getOriginalFilename()), + "_with_attachments.pdf")); + } + } + } + + private void validateAttachmentRequest(List attachments) { + if (attachments == null || attachments.isEmpty()) { + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentsRequired", "At least one attachment is required"); + } + + final long maxAttachmentSize = 50L * 1024 * 1024; // 50 MB per attachment + final long maxTotalSize = 200L * 1024 * 1024; // 200 MB total + + long totalSize = 0; + for (MultipartFile attachment : attachments) { + if (attachment == null || attachment.isEmpty()) { + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentEmpty", "Attachment files cannot be null or empty"); + } + if (attachment.getSize() > maxAttachmentSize) { + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentTooLarge", + "Attachment ''{0}'' exceeds maximum size of {1} bytes", + attachment.getOriginalFilename(), + maxAttachmentSize); + } + totalSize += attachment.getSize(); + } + + if (totalSize > maxTotalSize) { + throw ExceptionUtils.createIllegalArgumentException( + "error.totalAttachmentsTooLarge", + "Total attachment size {0} exceeds maximum of {1} bytes", + totalSize, + maxTotalSize); + } } @AutoJobPostMapping( @@ -88,4 +163,82 @@ public class AttachmentController { extracted.get(), outputName, MediaType.APPLICATION_OCTET_STREAM); } } + + @AutoJobPostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/list-attachments") + @Operation( + summary = "List attachments in PDF", + description = + "This endpoint lists all embedded attachments in a PDF. Input:PDF Output:JSON Type:SISO") + public ResponseEntity> + listAttachments(@ModelAttribute ListAttachmentsRequest request) throws IOException { + try (PDDocument document = pdfDocumentFactory.load(request, true)) { + List attachments = + pdfAttachmentService.listAttachments(document); + + return ResponseEntity.ok(attachments); + } + } + + @AutoJobPostMapping( + consumes = MediaType.MULTIPART_FORM_DATA_VALUE, + value = "/rename-attachment") + @StandardPdfResponse + @Operation( + summary = "Rename attachment in PDF", + description = + "This endpoint renames an embedded attachment in a PDF. Input:PDF Output:PDF Type:MISO") + public ResponseEntity renameAttachment(@ModelAttribute RenameAttachmentRequest request) + throws Exception { + MultipartFile fileInput = request.getFileInput(); + String attachmentName = request.getAttachmentName(); + String newName = request.getNewName(); + + if (attachmentName == null || attachmentName.isBlank()) { + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentNameRequired", "Attachment name cannot be null or empty"); + } + if (newName == null || newName.isBlank()) { + throw ExceptionUtils.createIllegalArgumentException( + "error.newNameRequired", "New attachment name cannot be null or empty"); + } + + try (PDDocument document = pdfDocumentFactory.load(request, false)) { + pdfAttachmentService.renameAttachment(document, attachmentName, newName); + + return WebResponseUtils.pdfDocToWebResponse( + document, + GeneralUtils.generateFilename( + Filenames.toSimpleFileName(fileInput.getOriginalFilename()), + "_attachment_renamed.pdf")); + } + } + + @AutoJobPostMapping( + consumes = MediaType.MULTIPART_FORM_DATA_VALUE, + value = "/delete-attachment") + @StandardPdfResponse + @Operation( + summary = "Delete attachment from PDF", + description = + "This endpoint deletes an embedded attachment from a PDF. Input:PDF Output:PDF Type:MISO") + public ResponseEntity deleteAttachment(@ModelAttribute DeleteAttachmentRequest request) + throws Exception { + MultipartFile fileInput = request.getFileInput(); + String attachmentName = request.getAttachmentName(); + + if (attachmentName == null || attachmentName.isBlank()) { + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentNameRequired", "Attachment name cannot be null or empty"); + } + + try (PDDocument document = pdfDocumentFactory.load(request, false)) { + pdfAttachmentService.deleteAttachment(document, attachmentName); + + return WebResponseUtils.pdfDocToWebResponse( + document, + GeneralUtils.generateFilename( + Filenames.toSimpleFileName(fileInput.getOriginalFilename()), + "_attachment_deleted.pdf")); + } + } } diff --git a/app/core/src/main/java/stirling/software/SPDF/exception/GlobalExceptionHandler.java b/app/core/src/main/java/stirling/software/SPDF/exception/GlobalExceptionHandler.java index fd9224a41..82d1b2bb9 100644 --- a/app/core/src/main/java/stirling/software/SPDF/exception/GlobalExceptionHandler.java +++ b/app/core/src/main/java/stirling/software/SPDF/exception/GlobalExceptionHandler.java @@ -12,6 +12,7 @@ import org.springframework.http.HttpStatus; import org.springframework.http.ProblemDetail; import org.springframework.http.ResponseEntity; import org.springframework.http.converter.HttpMessageNotReadableException; +import org.springframework.web.HttpMediaTypeNotAcceptableException; import org.springframework.web.HttpMediaTypeNotSupportedException; import org.springframework.web.HttpRequestMethodNotSupportedException; import org.springframework.web.bind.MethodArgumentNotValidException; @@ -23,6 +24,7 @@ import org.springframework.web.multipart.support.MissingServletRequestPartExcept import org.springframework.web.servlet.NoHandlerFoundException; import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -808,6 +810,56 @@ public class GlobalExceptionHandler { .body(problemDetail); } + /** + * Handle 406 Not Acceptable errors when error responses cannot match client Accept header. + * + *

When thrown: When the client sends Accept: application/pdf but the server needs to return + * a JSON error response (e.g., when an attachment is not found). + * + *

This handler writes directly to HttpServletResponse to bypass Spring's content negotiation + * and ensure error responses are always delivered as JSON. + * + * @param ex the HttpMediaTypeNotAcceptableException + * @param request the HTTP servlet request + * @param response the HTTP servlet response + */ + @ExceptionHandler(HttpMediaTypeNotAcceptableException.class) + public void handleMediaTypeNotAcceptable( + HttpMediaTypeNotAcceptableException ex, + HttpServletRequest request, + HttpServletResponse response) + throws IOException { + + log.warn( + "Media type not acceptable at {}: client accepts {}, server supports {}", + request.getRequestURI(), + request.getHeader("Accept"), + ex.getSupportedMediaTypes()); + + // Write JSON error response directly, bypassing content negotiation + response.setStatus(HttpStatus.NOT_ACCEPTABLE.value()); + response.setContentType("application/problem+json"); + response.setCharacterEncoding("UTF-8"); + + String errorJson = + String.format( + """ + { + "type": "about:blank", + "title": "Not Acceptable", + "status": 406, + "detail": "The requested resource could not be returned in an acceptable format. Error responses are returned as JSON.", + "instance": "%s", + "timestamp": "%s", + "hints": ["Error responses are always returned as application/json or application/problem+json", "Set Accept header to include application/json for proper error handling"] + } + """, + request.getRequestURI(), Instant.now().toString()); + + response.getWriter().write(errorJson); + response.getWriter().flush(); + } + // =========================================================================================== // JAVA STANDARD EXCEPTIONS // =========================================================================================== @@ -963,9 +1015,8 @@ public class GlobalExceptionHandler { // Check if this RuntimeException wraps a typed exception from job execution Throwable cause = ex.getCause(); - if (cause instanceof BaseAppException) { + if (cause instanceof BaseAppException appEx) { // Delegate to specific BaseAppException handlers - BaseAppException appEx = (BaseAppException) cause; if (appEx instanceof PdfPasswordException) { return handlePdfPassword((PdfPasswordException) appEx, request); } else if (appEx instanceof PdfCorruptedException @@ -979,9 +1030,8 @@ public class GlobalExceptionHandler { } else { return handleBaseApp(appEx, request); } - } else if (cause instanceof BaseValidationException) { + } else if (cause instanceof BaseValidationException valEx) { // Delegate to validation exception handlers - BaseValidationException valEx = (BaseValidationException) cause; if (valEx instanceof CbrFormatException || valEx instanceof CbzFormatException || valEx instanceof EmlFormatException) { @@ -992,6 +1042,9 @@ public class GlobalExceptionHandler { } else if (cause instanceof IOException) { // Unwrap and handle IOException (may contain PDF-specific errors) return handleIOException((IOException) cause, request); + } else if (cause instanceof IllegalArgumentException) { + // Unwrap and handle IllegalArgumentException (business logic validation errors) + return handleIllegalArgument((IllegalArgumentException) cause, request); } // Not a wrapped exception - treat as unexpected error diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AddAttachmentRequest.java b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AddAttachmentRequest.java index cf85451f4..48a749098 100644 --- a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AddAttachmentRequest.java +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AddAttachmentRequest.java @@ -20,4 +20,10 @@ public class AddAttachmentRequest extends PDFFile { requiredMode = Schema.RequiredMode.REQUIRED, format = "binary") private List attachments; + + @Schema( + description = "Convert the resulting PDF to PDF/A-3b format after adding attachments", + requiredMode = Schema.RequiredMode.NOT_REQUIRED, + defaultValue = "false") + private boolean convertToPdfA3b = false; } diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AttachmentInfo.java b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AttachmentInfo.java new file mode 100644 index 000000000..b80139be4 --- /dev/null +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AttachmentInfo.java @@ -0,0 +1,17 @@ +package stirling.software.SPDF.model.api.misc; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@NoArgsConstructor +@AllArgsConstructor +public class AttachmentInfo { + private String filename; + private Long size; + private String contentType; + private String description; + private String creationDate; + private String modificationDate; +} diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/DeleteAttachmentRequest.java b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/DeleteAttachmentRequest.java new file mode 100644 index 000000000..22b7d017b --- /dev/null +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/DeleteAttachmentRequest.java @@ -0,0 +1,18 @@ +package stirling.software.SPDF.model.api.misc; + +import io.swagger.v3.oas.annotations.media.Schema; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +import stirling.software.common.model.api.PDFFile; + +@Data +@EqualsAndHashCode(callSuper = true) +public class DeleteAttachmentRequest extends PDFFile { + + @Schema( + description = "The name of the attachment to delete", + requiredMode = Schema.RequiredMode.REQUIRED) + private String attachmentName; +} diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/ListAttachmentsRequest.java b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/ListAttachmentsRequest.java new file mode 100644 index 000000000..f30fc7540 --- /dev/null +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/ListAttachmentsRequest.java @@ -0,0 +1,10 @@ +package stirling.software.SPDF.model.api.misc; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +import stirling.software.common.model.api.PDFFile; + +@Data +@EqualsAndHashCode(callSuper = true) +public class ListAttachmentsRequest extends PDFFile {} diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/RenameAttachmentRequest.java b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/RenameAttachmentRequest.java new file mode 100644 index 000000000..07731fce3 --- /dev/null +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/RenameAttachmentRequest.java @@ -0,0 +1,23 @@ +package stirling.software.SPDF.model.api.misc; + +import io.swagger.v3.oas.annotations.media.Schema; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +import stirling.software.common.model.api.PDFFile; + +@Data +@EqualsAndHashCode(callSuper = true) +public class RenameAttachmentRequest extends PDFFile { + + @Schema( + description = "The current name of the attachment to rename", + requiredMode = Schema.RequiredMode.REQUIRED) + private String attachmentName; + + @Schema( + description = "The new name for the attachment", + requiredMode = Schema.RequiredMode.REQUIRED) + private String newName; +} diff --git a/app/core/src/main/java/stirling/software/SPDF/service/AttachmentService.java b/app/core/src/main/java/stirling/software/SPDF/service/AttachmentService.java index 029e7086c..0f73632d3 100644 --- a/app/core/src/main/java/stirling/software/SPDF/service/AttachmentService.java +++ b/app/core/src/main/java/stirling/software/SPDF/service/AttachmentService.java @@ -8,6 +8,7 @@ import java.nio.file.attribute.FileTime; import java.time.Instant; import java.time.ZoneId; import java.time.ZonedDateTime; +import java.util.ArrayList; import java.util.GregorianCalendar; import java.util.HashMap; import java.util.HashSet; @@ -36,6 +37,9 @@ import io.github.pixee.security.Filenames; import lombok.extern.slf4j.Slf4j; +import stirling.software.SPDF.model.api.misc.AttachmentInfo; +import stirling.software.common.util.ExceptionUtils; + @Slf4j @Service public class AttachmentService implements AttachmentServiceInterface { @@ -216,6 +220,142 @@ public class AttachmentService implements AttachmentServiceInterface { } } + @Override + public List listAttachments(PDDocument document) throws IOException { + List attachments = new ArrayList<>(); + + PDDocumentCatalog catalog = document.getDocumentCatalog(); + if (catalog == null) { + return attachments; + } + + PDDocumentNameDictionary documentNames = catalog.getNames(); + if (documentNames == null) { + return attachments; + } + + PDEmbeddedFilesNameTreeNode embeddedFilesTree = documentNames.getEmbeddedFiles(); + if (embeddedFilesTree == null) { + return attachments; + } + + Map embeddedFiles = new LinkedHashMap<>(); + collectEmbeddedFiles(embeddedFilesTree, embeddedFiles); + + for (Map.Entry entry : embeddedFiles.entrySet()) { + PDComplexFileSpecification fileSpecification = entry.getValue(); + PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpecification); + + if (embeddedFile != null) { + String filename = determineFilename(entry.getKey(), fileSpecification); + String description = fileSpecification.getFileDescription(); + String contentType = embeddedFile.getSubtype(); + Long size = (long) embeddedFile.getSize(); + + String creationDate = null; + if (embeddedFile.getCreationDate() != null) { + creationDate = embeddedFile.getCreationDate().getTime().toString(); + } + + String modificationDate = null; + if (embeddedFile.getModDate() != null) { + modificationDate = embeddedFile.getModDate().getTime().toString(); + } + + AttachmentInfo attachmentInfo = + new AttachmentInfo( + filename, + size, + contentType, + description, + creationDate, + modificationDate); + + attachments.add(attachmentInfo); + } + } + + return attachments; + } + + @Override + public PDDocument renameAttachment(PDDocument document, String attachmentName, String newName) + throws IOException { + PDEmbeddedFilesNameTreeNode embeddedFilesTree = getEmbeddedFilesTree(document); + + Map allEmbeddedFiles = new LinkedHashMap<>(); + collectEmbeddedFiles(embeddedFilesTree, allEmbeddedFiles); + + PDComplexFileSpecification fileToRename = null; + String keyToRename = null; + + for (Map.Entry entry : allEmbeddedFiles.entrySet()) { + String currentName = determineFilename(entry.getKey(), entry.getValue()); + if (currentName.equals(attachmentName)) { + fileToRename = entry.getValue(); + keyToRename = entry.getKey(); + break; + } + } + + if (fileToRename == null || keyToRename == null) { + log.warn("Attachment '{}' not found for renaming", attachmentName); + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentNotFound", + "Attachment ''{0}'' not found for renaming", + attachmentName); + } + + fileToRename.setFile(newName); + fileToRename.setFileUnicode(newName); + + allEmbeddedFiles.remove(keyToRename); + allEmbeddedFiles.put(newName, fileToRename); + + embeddedFilesTree.setKids(null); + + embeddedFilesTree.setNames(allEmbeddedFiles); + log.info("Renamed attachment from '{}' to '{}'", attachmentName, newName); + + return document; + } + + @Override + public PDDocument deleteAttachment(PDDocument document, String attachmentName) + throws IOException { + PDEmbeddedFilesNameTreeNode embeddedFilesTree = getEmbeddedFilesTree(document); + + Map allEmbeddedFiles = new LinkedHashMap<>(); + collectEmbeddedFiles(embeddedFilesTree, allEmbeddedFiles); + + String keyToRemove = null; + + for (Map.Entry entry : allEmbeddedFiles.entrySet()) { + String currentName = determineFilename(entry.getKey(), entry.getValue()); + if (currentName.equals(attachmentName)) { + keyToRemove = entry.getKey(); + break; + } + } + + if (keyToRemove == null) { + log.warn("Attachment '{}' not found for deletion", attachmentName); + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentNotFound", + "Attachment ''{0}'' not found for deletion", + attachmentName); + } + + allEmbeddedFiles.remove(keyToRemove); + + embeddedFilesTree.setKids(null); + + embeddedFilesTree.setNames(allEmbeddedFiles); + log.info("Deleted attachment: '{}'", attachmentName); + + return document; + } + private String sanitizeFilename(String candidate) { String sanitized = Filenames.toSimpleFileName(candidate); if (StringUtils.isBlank(sanitized)) { diff --git a/app/core/src/main/java/stirling/software/SPDF/service/AttachmentServiceInterface.java b/app/core/src/main/java/stirling/software/SPDF/service/AttachmentServiceInterface.java index f9e1bfb67..2a6973107 100644 --- a/app/core/src/main/java/stirling/software/SPDF/service/AttachmentServiceInterface.java +++ b/app/core/src/main/java/stirling/software/SPDF/service/AttachmentServiceInterface.java @@ -7,10 +7,19 @@ import java.util.Optional; import org.apache.pdfbox.pdmodel.PDDocument; import org.springframework.web.multipart.MultipartFile; +import stirling.software.SPDF.model.api.misc.AttachmentInfo; + public interface AttachmentServiceInterface { PDDocument addAttachment(PDDocument document, List attachments) throws IOException; Optional extractAttachments(PDDocument document) throws IOException; + + List listAttachments(PDDocument document) throws IOException; + + PDDocument renameAttachment(PDDocument document, String attachmentName, String newName) + throws IOException; + + PDDocument deleteAttachment(PDDocument document, String attachmentName) throws IOException; } diff --git a/app/core/src/test/java/stirling/software/SPDF/controller/api/misc/AttachmentControllerTest.java b/app/core/src/test/java/stirling/software/SPDF/controller/api/misc/AttachmentControllerTest.java index afec68778..fe0e2ca2d 100644 --- a/app/core/src/test/java/stirling/software/SPDF/controller/api/misc/AttachmentControllerTest.java +++ b/app/core/src/test/java/stirling/software/SPDF/controller/api/misc/AttachmentControllerTest.java @@ -67,16 +67,16 @@ class AttachmentControllerTest { } @Test - void addAttachments_Success() throws IOException { + void addAttachments_Success() throws Exception { List attachments = List.of(attachment1, attachment2); request.setAttachments(attachments); request.setFileInput(pdfFile); ResponseEntity expectedResponse = ResponseEntity.ok("modified PDF content".getBytes()); - when(pdfDocumentFactory.load(pdfFile, false)).thenReturn(mockDocument); + when(pdfDocumentFactory.load(request, false)).thenReturn(mockDocument); when(pdfAttachmentService.addAttachment(mockDocument, attachments)) - .thenReturn(modifiedMockDocument); + .thenReturn(mockDocument); try (MockedStatic mockedWebResponseUtils = mockStatic(WebResponseUtils.class)) { @@ -84,8 +84,7 @@ class AttachmentControllerTest { .when( () -> WebResponseUtils.pdfDocToWebResponse( - eq(modifiedMockDocument), - eq("test_with_attachments.pdf"))) + eq(mockDocument), eq("test_with_attachments.pdf"))) .thenReturn(expectedResponse); ResponseEntity response = attachmentController.addAttachments(request); @@ -93,22 +92,22 @@ class AttachmentControllerTest { assertNotNull(response); assertEquals(HttpStatus.OK, response.getStatusCode()); assertNotNull(response.getBody()); - verify(pdfDocumentFactory).load(pdfFile, false); + verify(pdfDocumentFactory).load(request, false); verify(pdfAttachmentService).addAttachment(mockDocument, attachments); } } @Test - void addAttachments_SingleAttachment() throws IOException { + void addAttachments_SingleAttachment() throws Exception { List attachments = List.of(attachment1); request.setAttachments(attachments); request.setFileInput(pdfFile); ResponseEntity expectedResponse = ResponseEntity.ok("modified PDF content".getBytes()); - when(pdfDocumentFactory.load(pdfFile, false)).thenReturn(mockDocument); + when(pdfDocumentFactory.load(request, false)).thenReturn(mockDocument); when(pdfAttachmentService.addAttachment(mockDocument, attachments)) - .thenReturn(modifiedMockDocument); + .thenReturn(mockDocument); try (MockedStatic mockedWebResponseUtils = mockStatic(WebResponseUtils.class)) { @@ -116,8 +115,7 @@ class AttachmentControllerTest { .when( () -> WebResponseUtils.pdfDocToWebResponse( - eq(modifiedMockDocument), - eq("test_with_attachments.pdf"))) + eq(mockDocument), eq("test_with_attachments.pdf"))) .thenReturn(expectedResponse); ResponseEntity response = attachmentController.addAttachments(request); @@ -125,33 +123,33 @@ class AttachmentControllerTest { assertNotNull(response); assertEquals(HttpStatus.OK, response.getStatusCode()); assertNotNull(response.getBody()); - verify(pdfDocumentFactory).load(pdfFile, false); + verify(pdfDocumentFactory).load(request, false); verify(pdfAttachmentService).addAttachment(mockDocument, attachments); } } @Test - void addAttachments_IOExceptionFromPDFLoad() throws IOException { + void addAttachments_IOExceptionFromPDFLoad() throws Exception { List attachments = List.of(attachment1); request.setAttachments(attachments); request.setFileInput(pdfFile); IOException ioException = new IOException("Failed to load PDF"); - when(pdfDocumentFactory.load(pdfFile, false)).thenThrow(ioException); + when(pdfDocumentFactory.load(request, false)).thenThrow(ioException); assertThrows(IOException.class, () -> attachmentController.addAttachments(request)); - verify(pdfDocumentFactory).load(pdfFile, false); + verify(pdfDocumentFactory).load(request, false); verifyNoInteractions(pdfAttachmentService); } @Test - void addAttachments_IOExceptionFromAttachmentService() throws IOException { + void addAttachments_IOExceptionFromAttachmentService() throws Exception { List attachments = List.of(attachment1); request.setAttachments(attachments); request.setFileInput(pdfFile); IOException ioException = new IOException("Failed to add attachment"); - when(pdfDocumentFactory.load(pdfFile, false)).thenReturn(mockDocument); + when(pdfDocumentFactory.load(request, false)).thenReturn(mockDocument); when(pdfAttachmentService.addAttachment(mockDocument, attachments)).thenThrow(ioException); assertThrows(IOException.class, () -> attachmentController.addAttachments(request)); diff --git a/frontend/public/locales/en-GB/translation.toml b/frontend/public/locales/en-GB/translation.toml index f3bd4c5d6..6d27d6f84 100644 --- a/frontend/public/locales/en-GB/translation.toml +++ b/frontend/public/locales/en-GB/translation.toml @@ -1394,6 +1394,11 @@ header = "Add Attachments" add = "Add Attachment" remove = "Remove Attachment" embed = "Embed Attachment" +convertToPdfA3b = "Convert to PDF/A-3b" +convertToPdfA3bDescription = "Creates an archival PDF with embedded attachments" +convertToPdfA3bTooltip = "PDF/A-3b is an archival format ensuring long-term preservation. It allows embedding arbitrary file formats as attachments. Conversion requires Ghostscript and may take longer for large files." +convertToPdfA3bTooltipHeader = "About PDF/A-3b Conversion" +convertToPdfA3bTooltipTitle = "What it does" submit = "Add Attachments" [watermark] diff --git a/frontend/src/core/components/tools/addAttachments/AddAttachmentsSettings.tsx b/frontend/src/core/components/tools/addAttachments/AddAttachmentsSettings.tsx index 33d9e8b33..2108c3c0b 100644 --- a/frontend/src/core/components/tools/addAttachments/AddAttachmentsSettings.tsx +++ b/frontend/src/core/components/tools/addAttachments/AddAttachmentsSettings.tsx @@ -1,13 +1,14 @@ /** * AddAttachmentsSettings - Shared settings component for both tool UI and automation * - * Allows selecting files to attach to PDFs. + * Allows selecting files to attach to PDFs with optional PDF/A-3b conversion support. */ -import { Stack, Text, Group, ActionIcon, ScrollArea, Button } from "@mantine/core"; +import { Stack, Text, Group, ActionIcon, ScrollArea, Button, Checkbox } from "@mantine/core"; import { useTranslation } from "react-i18next"; import { AddAttachmentsParameters } from "@app/hooks/tools/addAttachments/useAddAttachmentsParameters"; import LocalIcon from "@app/components/shared/LocalIcon"; +import { Tooltip } from "@app/components/shared/Tooltip"; interface AddAttachmentsSettingsProps { parameters: AddAttachmentsParameters; @@ -103,6 +104,40 @@ const AddAttachmentsSettings = ({ parameters, onParameterChange, disabled = fals )} + + {/* PDF/A-3b conversion option with informative tooltip */} + + + {t("attachments.convertToPdfA3b", "Convert to PDF/A-3b")} + + + + + } + description={t("attachments.convertToPdfA3bDescription", "Creates an archival PDF with embedded attachments")} + checked={parameters.convertToPdfA3b} + onChange={(event) => onParameterChange('convertToPdfA3b', event.currentTarget.checked)} + disabled={disabled} + styles={{ root: { flex: 1 } }} + /> + ); }; diff --git a/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsOperation.ts b/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsOperation.ts index 9785fa998..4614a9cbf 100644 --- a/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsOperation.ts +++ b/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsOperation.ts @@ -16,6 +16,8 @@ const buildFormData = (parameters: AddAttachmentsParameters, file: File): FormDa if (attachment) formData.append("attachments", attachment); }); + formData.append("convertToPdfA3b", String(parameters.convertToPdfA3b)); + return formData; }; diff --git a/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsParameters.ts b/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsParameters.ts index ce21e3869..1e66120b7 100644 --- a/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsParameters.ts +++ b/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsParameters.ts @@ -2,10 +2,12 @@ import { useState } from 'react'; export interface AddAttachmentsParameters { attachments: File[]; + convertToPdfA3b: boolean; } const defaultParameters: AddAttachmentsParameters = { - attachments: [] + attachments: [], + convertToPdfA3b: false }; export const useAddAttachmentsParameters = () => { @@ -33,3 +35,5 @@ export const useAddAttachmentsParameters = () => { validateParameters }; }; + +export const DEFAULT_ADD_ATTACHMENTS_PARAMETERS: AddAttachmentsParameters = defaultParameters;