From 54cd8043190d87570db77fb314f7faeeac79a642 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Sz=C3=BCcs?= <127139797+balazs-szucs@users.noreply.github.com> Date: Wed, 24 Dec 2025 22:35:36 +0100 Subject: [PATCH] [V2] feat(attachments): add PDF/A-3b conversion, attachment listing, renaming, and deletion (#5304) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description of Changes This pull request introduces major improvements to the PDF attachment API, adding new endpoints for listing, renaming, and deleting attachments in PDFs, as well as improving error handling and content negotiation. It also adds support for converting PDFs to PDF/A-3b format when adding attachments and introduces stricter validation for attachment uploads. The exception handling is improved to ensure consistent JSON error responses, even when the client requests a PDF. **API Feature Additions:** * Added new endpoints in `AttachmentController` for listing (`/list-attachments`), renaming (`/rename-attachment`), and deleting (`/delete-attachment`) PDF attachments, with corresponding request and response models: `ListAttachmentsRequest`, `RenameAttachmentRequest`, `DeleteAttachmentRequest`, and `AttachmentInfo`. * Enhanced the `/add-attachments` endpoint to optionally convert the resulting PDF to PDF/A-3b format, controlled by a new `convertToPdfA3b` flag in `AddAttachmentRequest`. **Validation and Robustness:** * Introduced strict validation for attachment uploads, enforcing non-empty attachments, a maximum size per attachment (50 MB), and a total size limit (200 MB). **Content Negotiation:** * Updated `WebMvcConfig` to configure content negotiation, allowing both PDF and JSON responses, and preventing 406 errors when clients request PDFs but errors must be returned as JSON. image image image image --- ## Checklist ### General - [X] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [X] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [X] I have performed a self-review of my own code - [X] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Translations (if applicable) - [ ] I ran [`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md) ### UI Changes (if applicable) - [X] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [X] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Signed-off-by: Balázs Szücs --- .../api/converters/ConvertPDFToPDFA.java | 1211 ++++++++++++----- .../api/misc/AttachmentController.java | 171 ++- .../exception/GlobalExceptionHandler.java | 61 +- .../model/api/misc/AddAttachmentRequest.java | 6 + .../SPDF/model/api/misc/AttachmentInfo.java | 17 + .../api/misc/DeleteAttachmentRequest.java | 18 + .../api/misc/ListAttachmentsRequest.java | 10 + .../api/misc/RenameAttachmentRequest.java | 23 + .../SPDF/service/AttachmentService.java | 140 ++ .../service/AttachmentServiceInterface.java | 9 + .../api/misc/AttachmentControllerTest.java | 32 +- .../public/locales/en-GB/translation.toml | 5 + .../addAttachments/AddAttachmentsSettings.tsx | 39 +- .../useAddAttachmentsOperation.ts | 2 + .../useAddAttachmentsParameters.ts | 6 +- 15 files changed, 1376 insertions(+), 374 deletions(-) create mode 100644 app/core/src/main/java/stirling/software/SPDF/model/api/misc/AttachmentInfo.java create mode 100644 app/core/src/main/java/stirling/software/SPDF/model/api/misc/DeleteAttachmentRequest.java create mode 100644 app/core/src/main/java/stirling/software/SPDF/model/api/misc/ListAttachmentsRequest.java create mode 100644 app/core/src/main/java/stirling/software/SPDF/model/api/misc/RenameAttachmentRequest.java diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java index 5c388b504..f08675f7f 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java @@ -11,6 +11,7 @@ import java.time.Instant; import java.time.ZoneId; import java.time.ZonedDateTime; import java.util.*; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -26,22 +27,33 @@ import org.apache.pdfbox.pdfwriter.compress.CompressParameters; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDDocumentInformation; +import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; +import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDMetadata; +import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDStream; +import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification; +import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDFontDescriptor; import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont; import org.apache.pdfbox.pdmodel.font.PDType0Font; +import org.apache.pdfbox.pdmodel.font.PDType1CFont; +import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent; import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; +import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentProperties; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; import org.apache.pdfbox.pdmodel.interactive.viewerpreferences.PDViewerPreferences; import org.apache.pdfbox.preflight.Format; import org.apache.pdfbox.preflight.PreflightConfiguration; @@ -88,11 +100,93 @@ import stirling.software.common.util.WebResponseUtils; @RequiredArgsConstructor public class ConvertPDFToPDFA { + private static final Pattern NON_PRINTABLE_ASCII = Pattern.compile("[^\\x20-\\x7E]"); private final RuntimePathConfig runtimePathConfig; private static final String ICC_RESOURCE_PATH = "/icc/sRGB2014.icc"; private static final int PDFA_COMPATIBILITY_POLICY = 1; + private static final String ANNOTATION_HIGHLIGHT = "Highlight"; + private static final String ANNOTATION_POPUP = "Popup"; + private static final String ANNOTATION_LINK = "Link"; + + private static final COSName COS_AF_RELATIONSHIP = COSName.getPDFName("AFRelationship"); + private static final COSName COS_AF = COSName.getPDFName("AF"); // The Associated Files Array + private static final COSName COS_UF = COSName.getPDFName("UF"); + private static final String AF_RELATIONSHIP_UNSPECIFIED = "Unspecified"; + + private static final Map MIME_TYPE_MAP = + Map.ofEntries( + Map.entry(".xml", "application/xml"), + Map.entry(".json", "application/json"), + Map.entry(".txt", "text/plain"), + Map.entry(".csv", "text/csv"), + Map.entry(".pdf", "application/pdf"), + Map.entry(".png", "image/png"), + Map.entry(".jpg", "image/jpeg"), + Map.entry(".jpeg", "image/jpeg"), + Map.entry(".gif", "image/gif"), + Map.entry(".html", "text/html"), + Map.entry(".htm", "text/html"), + Map.entry(".zip", "application/zip"), + Map.entry(".doc", "application/msword"), + Map.entry( + ".docx", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document"), + Map.entry(".xls", "application/vnd.ms-excel"), + Map.entry( + ".xlsx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), + Map.entry(".ppt", "application/vnd.ms-powerpoint"), + Map.entry( + ".pptx", + "application/vnd.openxmlformats-officedocument.presentationml.presentation"), + Map.entry(".svg", "image/svg+xml"), + Map.entry(".webp", "image/webp"), + Map.entry(".mp3", "audio/mpeg"), + Map.entry(".mp4", "video/mp4"), + Map.entry(".wav", "audio/wav"), + Map.entry(".avi", "video/x-msvideo"), + Map.entry(".tar", "application/x-tar"), + Map.entry(".gz", "application/gzip"), + Map.entry(".rar", "application/vnd.rar"), + Map.entry(".7z", "application/x-7z-compressed")); + + private static final String DEFAULT_MIME_TYPE = "application/octet-stream"; + + private static void fixCidSetIssues(PDDocument document) { + for (PDPage page : document.getPages()) { + PDResources resources = page.getResources(); + if (resources == null) continue; + + for (COSName fontName : resources.getFontNames()) { + try { + PDFont font = resources.getFont(fontName); + if (font == null) continue; + + PDFontDescriptor descriptor = font.getFontDescriptor(); + if (descriptor == null) continue; + + COSDictionary fontDict = descriptor.getCOSObject(); + + // Remove invalid or incomplete CIDSet entries for PDF/A-1 compliance + // PDF/A-1 requires CIDSet to be present and complete for subsetted CIDFonts + // For PDF/A-2+, CIDSet is optional but must be complete if present + COSBase cidSet = fontDict.getDictionaryObject(COSName.getPDFName("CIDSet")); + if (cidSet != null) { + // If CIDSet exists but may be invalid, remove it to avoid validation errors + // This is safer than trying to fix incomplete CIDSet streams + fontDict.removeItem(COSName.getPDFName("CIDSet")); + log.debug( + "Removed potentially invalid CIDSet from font {}", font.getName()); + } + } catch (Exception e) { + log.debug("Error processing CIDSet for font: {}", e.getMessage()); + } + } + } + } + private static void validateAndWarnPdfA(byte[] pdfBytes, PdfaProfile profile, String method) { Path tempPdfPath = null; try { @@ -562,7 +656,7 @@ public class ConvertPDFToPDFA { } } - private static void fixCidSetIssues(PDDocument document) throws IOException { + public static void fixType1FontCharSet(PDDocument document) throws IOException { for (PDPage page : document.getPages()) { PDResources resources = page.getResources(); if (resources == null) continue; @@ -572,24 +666,39 @@ public class ConvertPDFToPDFA { PDFont font = resources.getFont(fontName); if (font == null) continue; + String fontNameStr = font.getName(); + if (fontNameStr == null) continue; + PDFontDescriptor descriptor = font.getFontDescriptor(); if (descriptor == null) continue; - COSDictionary fontDict = descriptor.getCOSObject(); + // Check if this is a Type1 font + if (fontNameStr.contains("Type1") + || descriptor.getFontFile() != null + || (descriptor.getFontFile2() == null + && descriptor.getFontFile3() == null)) { - // Remove invalid or incomplete CIDSet entries for PDF/A-1 compliance - // PDF/A-1 requires CIDSet to be present and complete for subsetted CIDFonts - // For PDF/A-2+, CIDSet is optional but must be complete if present - COSBase cidSet = fontDict.getDictionaryObject(COSName.getPDFName("CIDSet")); - if (cidSet != null) { - // If CIDSet exists but may be invalid, remove it to avoid validation errors - // This is safer than trying to fix incomplete CIDSet streams - fontDict.removeItem(COSName.getPDFName("CIDSet")); - log.debug( - "Removed potentially invalid CIDSet from font {}", font.getName()); + String existingCharSet = + descriptor.getCOSObject().getString(COSName.CHAR_SET); + + String glyphSet = buildStandardType1GlyphSet(); + if (!glyphSet.isEmpty()) { + if (existingCharSet == null + || existingCharSet.trim().isEmpty() + || countGlyphs(existingCharSet) < countGlyphs(glyphSet)) { + descriptor.getCOSObject().setString(COSName.CHAR_SET, glyphSet); + log.debug( + "Fixed CharSet for Type1 font {} with {} glyphs (was: {})", + fontNameStr, + countGlyphs(glyphSet), + existingCharSet != null ? countGlyphs(existingCharSet) : 0); + } + } } } catch (Exception e) { - log.debug("Error processing CIDSet for font: {}", e.getMessage()); + log.warn( + "Error processing font descriptor for page resource: {}", + e.getMessage()); } } } @@ -694,108 +803,105 @@ public class ConvertPDFToPDFA { } } - private byte[] convertWithGhostscript(Path inputPdf, Path workingDir, PdfaProfile profile) - throws IOException, InterruptedException { - Path outputPdf = workingDir.resolve("gs_output.pdf"); - ColorProfiles colorProfiles = prepareColorProfiles(workingDir); - Path pdfaDefFile = createPdfaDefFile(workingDir, colorProfiles, profile); + private static int countGlyphs(String charSet) { + if (charSet == null || charSet.isEmpty()) return 0; + // CharSet format: /glyph1/glyph2/glyph3... + return (int) charSet.chars().filter(c -> c == '/').count(); + } - // Preprocess PDF for PDF/A compliance - Path preprocessedPdf = inputPdf; - - // For PDF/A-1, clean CIDSet issues that may cause validation failures - if (profile.getPart() == 1) { - Path cidSetCleaned = cleanCidSetWithQpdf(inputPdf); - if (cidSetCleaned != null) { - preprocessedPdf = cidSetCleaned; - } - } - - // Normalize PDF with qpdf before Ghostscript conversion to ensure proper font program - // handling - Path normalizedInputPdf = normalizePdfWithQpdf(preprocessedPdf); - Path inputForGs = (normalizedInputPdf != null) ? normalizedInputPdf : preprocessedPdf; - - try { - List command = - buildGhostscriptCommand( - inputForGs, outputPdf, colorProfiles, workingDir, profile, pdfaDefFile); - - ProcessExecutorResult result = - ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) - .runCommandWithOutputHandling(command); - - if (result.getRc() != 0) { - throw new IOException("Ghostscript exited with code " + result.getRc()); - } - - if (!Files.exists(outputPdf)) { - throw new IOException("Ghostscript did not produce an output file"); - } - - return Files.readAllBytes(outputPdf); - } finally { - // Clean up temporary files - if (normalizedInputPdf != null && !normalizedInputPdf.equals(preprocessedPdf)) { - try { - Files.deleteIfExists(normalizedInputPdf); - } catch (IOException e) { - log.debug("Failed to delete temporary normalized file", e); + private static void sanitizePdfA(COSBase base, int pdfaPart) { + if (base instanceof COSDictionary dict) { + if (pdfaPart == 3) { + COSName type = dict.getCOSName(COSName.TYPE); + if (COSName.FILESPEC.equals(type) || dict.containsKey(COSName.EF)) { + return; // Don't sanitize embedded file structures } } - if (preprocessedPdf != null && !preprocessedPdf.equals(inputPdf)) { - try { - Files.deleteIfExists(preprocessedPdf); - } catch (IOException e) { - log.debug("Failed to delete temporary CIDSet cleaned file", e); + + if (pdfaPart == 1) { + COSBase group = dict.getDictionaryObject(COSName.GROUP); + if (group instanceof COSDictionary gDict + && COSName.TRANSPARENCY.equals(gDict.getCOSName(COSName.S))) { + dict.removeItem(COSName.GROUP); } + + dict.removeItem(COSName.SMASK); + dict.removeItem(COSName.CA); + dict.removeItem(COSName.getPDFName("ca")); + } + + if (dict.containsKey(COSName.INTERPOLATE) + && dict.getBoolean(COSName.INTERPOLATE, true)) { + dict.setBoolean(COSName.INTERPOLATE, false); + } + + dict.removeItem(COSName.JAVA_SCRIPT); + dict.removeItem(COSName.getPDFName("JS")); + dict.removeItem(COSName.getPDFName("RichMedia")); + dict.removeItem(COSName.getPDFName("Movie")); + dict.removeItem(COSName.getPDFName("Sound")); + dict.removeItem(COSName.getPDFName("Launch")); + + if (pdfaPart != 3) { + dict.removeItem(COSName.URI); + } + dict.removeItem(COSName.getPDFName("GoToR")); + + if (pdfaPart != 3) { + dict.removeItem(COSName.EMBEDDED_FILES); + dict.removeItem(COSName.FILESPEC); + } + + for (Map.Entry entry : dict.entrySet()) { + if (pdfaPart == 3) { + COSName key = entry.getKey(); + if (COSName.EF.equals(key) + || COSName.EMBEDDED_FILES.equals(key) + || COSName.FILESPEC.equals(key) + || COSName.F.equals(key) + || COSName.UF.equals(key)) { + continue; // Don't recurse into embedded file content + } + } + sanitizePdfA(entry.getValue(), pdfaPart); + } + + } else if (base instanceof COSArray arr) { + for (COSBase item : arr) { + sanitizePdfA(item, pdfaPart); } } } - private static void fixType1FontCharSet(PDDocument document) throws IOException { - for (PDPage page : document.getPages()) { - PDResources resources = page.getResources(); - if (resources == null) continue; + private static void removeElementsForPdfA(PDDocument doc, int pdfaPart) { - for (COSName fontName : resources.getFontNames()) { - try { - PDFont font = resources.getFont(fontName); - if (font == null) continue; + if (pdfaPart == 1) { + doc.getDocumentCatalog().getCOSObject().removeItem(COSName.getPDFName("OCProperties")); + } - String fontNameStr = font.getName(); - if (fontNameStr == null) continue; + if (pdfaPart == 3) { + ensureEmbeddedFilesAFRelationship(doc); + } - PDFontDescriptor descriptor = font.getFontDescriptor(); - if (descriptor == null) continue; + for (PDPage page : doc.getPages()) { + if (pdfaPart == 1) { + page.setAnnotations(Collections.emptyList()); + } + PDResources res = page.getResources(); + sanitizePdfA(page.getCOSObject(), pdfaPart); - // Check if this is a Type1 font - if (fontNameStr.contains("Type1") - || descriptor.getFontFile() != null - || (descriptor.getFontFile2() == null - && descriptor.getFontFile3() == null)) { - - // Check if CharSet is missing or suspicious - String existingCharSet = - descriptor.getCOSObject().getString(COSName.CHAR_SET); - if (existingCharSet == null || existingCharSet.trim().isEmpty()) { - - // Build a CharSet from commonly used glyphs - // For Type1 fonts, include standard PDF glyphs - String glyphSet = buildStandardType1GlyphSet(); - if (!glyphSet.isEmpty()) { - descriptor.getCOSObject().setString(COSName.CHAR_SET, glyphSet); - log.debug( - "Fixed CharSet for Type1 font {} with {} glyphs", - fontNameStr, - glyphSet.split(" ").length); - } + if (res != null) { + for (COSName name : res.getXObjectNames()) { + try { + PDXObject xo = res.getXObject(name); + if (xo instanceof PDFormXObject form) { + sanitizePdfA(form.getCOSObject(), pdfaPart); + } else if (xo instanceof PDImageXObject img) { + sanitizePdfA(img.getCOSObject(), pdfaPart); } + } catch (IOException ioe) { + log.error("Cannot load XObject {}: {}", name.getName(), ioe.getMessage()); } - } catch (Exception e) { - log.warn( - "Error processing font descriptor for page resource: {}", - e.getMessage()); } } } @@ -1038,74 +1144,32 @@ public class ConvertPDFToPDFA { return baos.toByteArray(); } - private Path runLibreOfficeConversion(Path tempInputFile, int pdfaPart) throws Exception { - // Create temp output directory - Path tempOutputDir = Files.createTempDirectory("output_"); + private static void ensureEmbeddedFilesAFRelationship(PDDocument doc) { + PDDocumentCatalog catalog = doc.getDocumentCatalog(); + PDDocumentNameDictionary names = catalog.getNames(); + if (names == null) return; - // Determine PDF/A filter based on requested format - String pdfFilter = - pdfaPart == 2 - ? "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"2\"}}" - : "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"1\"}}"; + PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles(); + if (embeddedFiles == null) return; - Path libreOfficeProfile = Files.createTempDirectory("libreoffice_profile_"); try { - // Prepare LibreOffice command - List command = - new ArrayList<>( - Arrays.asList( - runtimePathConfig.getSOfficePath(), - "-env:UserInstallation=" - + libreOfficeProfile.toUri().toString(), - "--headless", - "--nologo", - "--convert-to", - pdfFilter, - "--outdir", - tempOutputDir.toString(), - tempInputFile.toString())); - - ProcessExecutorResult returnCode = - ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE) - .runCommandWithOutputHandling(command); - - if (returnCode.getRc() != 0) { - log.error("PDF/A conversion failed with return code: {}", returnCode.getRc()); - throw ExceptionUtils.createPdfaConversionFailedException(); - } - } finally { - FileUtils.deleteQuietly(libreOfficeProfile.toFile()); + processEmbeddedFilesForAFRelationship(embeddedFiles); + } catch (IOException e) { + log.warn("Could not process embedded files AFRelationship: {}", e.getMessage()); } - - // Get the output file - File[] outputFiles = tempOutputDir.toFile().listFiles(); - if (outputFiles == null || outputFiles.length != 1) { - throw ExceptionUtils.createPdfaConversionFailedException(); - } - return outputFiles[0].toPath(); } - private byte[] convertWithGhostscriptX(Path inputPdf, Path workingDir, PdfXProfile profile) - throws IOException, InterruptedException { - Path outputPdf = workingDir.resolve("gs_output_pdfx.pdf"); - ColorProfiles colorProfiles = prepareColorProfiles(workingDir); + private static void processEmbeddedFilesForAFRelationship( + PDEmbeddedFilesNameTreeNode embeddedFiles) throws IOException { + Map fileSpecs = embeddedFiles.getNames(); + if (fileSpecs == null) return; - List command = - buildGhostscriptCommandX(inputPdf, outputPdf, colorProfiles, workingDir, profile); - - ProcessExecutorResult result = - ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) - .runCommandWithOutputHandling(command); - - if (result.getRc() != 0) { - throw new IOException("Ghostscript exited with code " + result.getRc()); + for (PDComplexFileSpecification fileSpec : fileSpecs.values()) { + COSDictionary fileSpecDict = fileSpec.getCOSObject(); + if (!fileSpecDict.containsKey(COS_AF_RELATIONSHIP)) { + fileSpecDict.setName(COS_AF_RELATIONSHIP, AF_RELATIONSHIP_UNSPECIFIED); + } } - - if (!Files.exists(outputPdf)) { - throw new IOException("Ghostscript did not produce an output file"); - } - - return Files.readAllBytes(outputPdf); } private static boolean isTransparencyGroup(COSDictionary dict) { @@ -1139,71 +1203,156 @@ public class ConvertPDFToPDFA { return false; } - private static void sanitizePdfA(COSBase base, int pdfaPart) { - if (base instanceof COSDictionary dict) { - if (pdfaPart == 1) { - COSBase group = dict.getDictionaryObject(COSName.GROUP); - if (group instanceof COSDictionary gDict - && COSName.TRANSPARENCY.equals(gDict.getCOSName(COSName.S))) { - dict.removeItem(COSName.GROUP); + private static File preProcessHighlights(File inputPdf) throws Exception { + + try (PDDocument document = Loader.loadPDF(inputPdf)) { + + for (PDPage page : document.getPages()) { + List annotations = page.getAnnotations(); + for (PDAnnotation annot : annotations) { + if (ANNOTATION_HIGHLIGHT.equals(annot.getSubtype()) + && annot instanceof PDAnnotationTextMarkup highlight) { + float[] colorComponents = + highlight.getColor() != null + ? highlight.getColor().getComponents() + : new float[] {1f, 1f, 0f}; + Color highlightColor = + new Color( + colorComponents[0], colorComponents[1], colorComponents[2]); + + float[] quadPoints = highlight.getQuadPoints(); + if (quadPoints != null) { + try (PDPageContentStream cs = + new PDPageContentStream( + document, + page, + PDPageContentStream.AppendMode.PREPEND, + true, + true)) { + + cs.setStrokingColor(highlightColor); + cs.setLineWidth(0.05f); + float spacing = 2f; + for (int i = 0; i < quadPoints.length; i += 8) { + float minX = + Math.min( + Math.min(quadPoints[i], quadPoints[i + 2]), + Math.min(quadPoints[i + 4], quadPoints[i + 6])); + float maxX = + Math.max( + Math.max(quadPoints[i], quadPoints[i + 2]), + Math.max(quadPoints[i + 4], quadPoints[i + 6])); + float minY = + Math.min( + Math.min(quadPoints[i + 1], quadPoints[i + 3]), + Math.min(quadPoints[i + 5], quadPoints[i + 7])); + float maxY = + Math.max( + Math.max(quadPoints[i + 1], quadPoints[i + 3]), + Math.max(quadPoints[i + 5], quadPoints[i + 7])); + + float width = maxX - minX; + float height = maxY - minY; + + for (float y = minY; y <= maxY; y += spacing) { + float len = Math.min(width, maxY - y); + cs.moveTo(minX, y); + cs.lineTo(minX + len, y + len); + } + for (float x = minX + spacing; x <= maxX; x += spacing) { + float len = Math.min(maxX - x, height); + cs.moveTo(x, minY); + cs.lineTo(x + len, minY + len); + } + } + + cs.stroke(); + } + } + + page.getAnnotations().remove(highlight); + COSDictionary pageDict = page.getCOSObject(); + + if (pageDict.containsKey(COSName.GROUP)) { + COSDictionary groupDict = + (COSDictionary) pageDict.getDictionaryObject(COSName.GROUP); + + if (groupDict != null + && COSName.TRANSPARENCY + .getName() + .equalsIgnoreCase( + groupDict.getNameAsString(COSName.S))) { + pageDict.removeItem(COSName.GROUP); + } + } + } + } + } + // Save the modified document to a temporary file. + File preProcessedFile = Files.createTempFile("preprocessed_", ".pdf").toFile(); + document.save(preProcessedFile); + return preProcessedFile; + } + } + + private static void sanitizeFontResources(PDDocument doc) throws IOException { + for (PDPage page : doc.getPages()) { + PDResources res = page.getResources(); + if (res == null) continue; + + for (COSName fontName : res.getFontNames()) { + PDFont font = res.getFont(fontName); + if (font == null) continue; + + PDFontDescriptor desc = font.getFontDescriptor(); + if (desc == null) continue; + + COSDictionary descDict = desc.getCOSObject(); + + if (descDict.containsKey(COSName.getPDFName("CIDSet"))) { + descDict.removeItem(COSName.getPDFName("CIDSet")); } - dict.removeItem(COSName.SMASK); - dict.removeItem(COSName.CA); - dict.removeItem(COSName.getPDFName("ca")); - } - - if (dict.containsKey(COSName.INTERPOLATE) - && dict.getBoolean(COSName.INTERPOLATE, true)) { - dict.setBoolean(COSName.INTERPOLATE, false); - } - - dict.removeItem(COSName.JAVA_SCRIPT); - dict.removeItem(COSName.getPDFName("JS")); - dict.removeItem(COSName.getPDFName("RichMedia")); - dict.removeItem(COSName.getPDFName("Movie")); - dict.removeItem(COSName.getPDFName("Sound")); - dict.removeItem(COSName.getPDFName("Launch")); - dict.removeItem(COSName.URI); - dict.removeItem(COSName.getPDFName("GoToR")); - dict.removeItem(COSName.EMBEDDED_FILES); - dict.removeItem(COSName.FILESPEC); - - for (Map.Entry entry : dict.entrySet()) { - sanitizePdfA(entry.getValue(), pdfaPart); - } - - } else if (base instanceof COSArray arr) { - for (COSBase item : arr) { - sanitizePdfA(item, pdfaPart); + if (isType1Font(font)) { + if (descDict.containsKey(COSName.CHAR_SET)) { + String existingCharSet = descDict.getString(COSName.CHAR_SET); + if (existingCharSet == null + || existingCharSet.trim().isEmpty() + || "/.notdef".equals(existingCharSet)) { + descDict.removeItem(COSName.CHAR_SET); + log.debug( + "Removed invalid CharSet from Type 1 font: {}", font.getName()); + } + } + } } } } - private static void removeElementsForPdfA(PDDocument doc, int pdfaPart) { + private static boolean isType1Font(PDFont font) { + return font instanceof PDType1Font || font instanceof PDType1CFont; + } - if (pdfaPart == 1) { - doc.getDocumentCatalog().getCOSObject().removeItem(COSName.getPDFName("OCProperties")); - } + private static void fixOptionalContentGroups(PDDocument doc) { + PDDocumentCatalog catalog = doc.getDocumentCatalog(); + PDOptionalContentProperties ocProps = catalog.getOCProperties(); - for (PDPage page : doc.getPages()) { - if (pdfaPart == 1) { - page.setAnnotations(Collections.emptyList()); - } - PDResources res = page.getResources(); - sanitizePdfA(page.getCOSObject(), pdfaPart); + if (ocProps == null) return; - if (res != null) { - for (COSName name : res.getXObjectNames()) { - try { - PDXObject xo = res.getXObject(name); - if (xo instanceof PDFormXObject form) { - sanitizePdfA(form.getCOSObject(), pdfaPart); - } else if (xo instanceof PDImageXObject img) { - sanitizePdfA(img.getCOSObject(), pdfaPart); - } - } catch (IOException ioe) { - log.error("Cannot load XObject {}: {}", name.getName(), ioe.getMessage()); + COSBase ocPropsBase = + catalog.getCOSObject().getDictionaryObject(COSName.getPDFName("OCProperties")); + if (!(ocPropsBase instanceof COSDictionary ocPropsDict)) return; + COSBase ocgs = ocPropsDict.getDictionaryObject(COSName.OCGS); + + if (ocgs instanceof COSArray ocgArray) { + int unnamedCount = 1; + + for (COSBase base : ocgArray) { + if (base instanceof COSDictionary ocgDict) { + if (!ocgDict.containsKey(COSName.NAME)) { + String newName = "Layer " + unnamedCount++; + ocgDict.setString(COSName.NAME, newName); + log.debug("Fixed OCG missing name, set to: {}", newName); } } } @@ -1323,151 +1472,117 @@ public class ConvertPDFToPDFA { document.getDocumentCatalog().setMetadata(newMetadata); } - private static File preProcessHighlights(File inputPdf) throws Exception { + private byte[] convertWithGhostscript(Path inputPdf, Path workingDir, PdfaProfile profile) + throws IOException, InterruptedException { + Path outputPdf = workingDir.resolve("gs_output.pdf"); + ColorProfiles colorProfiles = prepareColorProfiles(workingDir); + Path pdfaDefFile = createPdfaDefFile(workingDir, colorProfiles, profile); - try (PDDocument document = Loader.loadPDF(inputPdf)) { + // Preprocess PDF for PDF/A compliance using the sanitizer + Path sanitizedInputPdf = sanitizePdfWithPdfBox(inputPdf); + Path preprocessedPdf = sanitizedInputPdf != null ? sanitizedInputPdf : inputPdf; - for (PDPage page : document.getPages()) { - List annotations = page.getAnnotations(); - for (PDAnnotation annot : annotations) { - if ("Highlight".equals(annot.getSubtype()) - && annot instanceof PDAnnotationTextMarkup highlight) { - float[] colorComponents = - highlight.getColor() != null - ? highlight.getColor().getComponents() - : new float[] {1f, 1f, 0f}; - Color highlightColor = - new Color( - colorComponents[0], colorComponents[1], colorComponents[2]); + // For PDF/A-1, clean CIDSet issues that may cause validation failures + if (profile.getPart() == 1) { + Path cidSetCleaned = cleanCidSetWithQpdf(preprocessedPdf); + if (cidSetCleaned != null) { + preprocessedPdf = cidSetCleaned; + } + } - float[] quadPoints = highlight.getQuadPoints(); - if (quadPoints != null) { - try (PDPageContentStream cs = - new PDPageContentStream( - document, - page, - PDPageContentStream.AppendMode.PREPEND, - true, - true)) { + // Normalize PDF with qpdf before Ghostscript conversion to ensure proper font program + // handling + Path normalizedInputPdf = normalizePdfWithQpdf(preprocessedPdf); + Path inputForGs = (normalizedInputPdf != null) ? normalizedInputPdf : preprocessedPdf; - cs.setStrokingColor(highlightColor); - cs.setLineWidth(0.05f); - float spacing = 2f; - for (int i = 0; i < quadPoints.length; i += 8) { - float minX = - Math.min( - Math.min(quadPoints[i], quadPoints[i + 2]), - Math.min(quadPoints[i + 4], quadPoints[i + 6])); - float maxX = - Math.max( - Math.max(quadPoints[i], quadPoints[i + 2]), - Math.max(quadPoints[i + 4], quadPoints[i + 6])); - float minY = - Math.min( - Math.min(quadPoints[i + 1], quadPoints[i + 3]), - Math.min(quadPoints[i + 5], quadPoints[i + 7])); - float maxY = - Math.max( - Math.max(quadPoints[i + 1], quadPoints[i + 3]), - Math.max(quadPoints[i + 5], quadPoints[i + 7])); + try { + List command = + buildGhostscriptCommand( + inputForGs, outputPdf, colorProfiles, workingDir, profile, pdfaDefFile); - float width = maxX - minX; - float height = maxY - minY; + ProcessExecutorResult result = + ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) + .runCommandWithOutputHandling(command); - for (float y = minY; y <= maxY; y += spacing) { - float len = Math.min(width, maxY - y); - cs.moveTo(minX, y); - cs.lineTo(minX + len, y + len); - } - for (float x = minX + spacing; x <= maxX; x += spacing) { - float len = Math.min(maxX - x, height); - cs.moveTo(x, minY); - cs.lineTo(x + len, minY + len); - } - } + if (result.getRc() != 0) { + throw new IOException("Ghostscript exited with code " + result.getRc()); + } - cs.stroke(); - } - } + if (!Files.exists(outputPdf)) { + throw new IOException("Ghostscript did not produce an output file"); + } - page.getAnnotations().remove(highlight); - COSDictionary pageDict = page.getCOSObject(); - - if (pageDict.containsKey(COSName.GROUP)) { - COSDictionary groupDict = - (COSDictionary) pageDict.getDictionaryObject(COSName.GROUP); - - if (groupDict != null - && COSName.TRANSPARENCY - .getName() - .equalsIgnoreCase( - groupDict.getNameAsString(COSName.S))) { - pageDict.removeItem(COSName.GROUP); - } - } - } + return Files.readAllBytes(outputPdf); + } finally { + // Clean up temporary files + if (normalizedInputPdf != null && !normalizedInputPdf.equals(preprocessedPdf)) { + try { + Files.deleteIfExists(normalizedInputPdf); + } catch (IOException e) { + log.debug("Failed to delete temporary normalized file", e); + } + } + if (preprocessedPdf != null && !preprocessedPdf.equals(inputPdf)) { + try { + Files.deleteIfExists(preprocessedPdf); + } catch (IOException e) { + log.debug("Failed to delete temporary sanitized or CIDSet cleaned file", e); + } + } + if (sanitizedInputPdf != null && !sanitizedInputPdf.equals(inputPdf)) { + try { + Files.deleteIfExists(sanitizedInputPdf); + } catch (IOException e) { + log.debug("Failed to delete temporary sanitized file", e); } } - // Save the modified document to a temporary file. - File preProcessedFile = Files.createTempFile("preprocessed_", ".pdf").toFile(); - document.save(preProcessedFile); - return preProcessedFile; } } - private ResponseEntity handlePdfAConversion( - MultipartFile inputFile, String outputFormat) throws Exception { - PdfaProfile profile = PdfaProfile.fromRequest(outputFormat); + private Path runLibreOfficeConversion(Path tempInputFile, int pdfaPart) throws Exception { + // Create temp output directory + Path tempOutputDir = Files.createTempDirectory("output_"); - // Get the original filename without extension - String originalFileName = Filenames.toSimpleFileName(inputFile.getOriginalFilename()); - if (originalFileName == null || originalFileName.trim().isEmpty()) { - originalFileName = "output.pdf"; - } - String baseFileName = - originalFileName.contains(".") - ? originalFileName.substring(0, originalFileName.lastIndexOf('.')) - : originalFileName; - - Path workingDir = Files.createTempDirectory("pdfa_conversion_"); - Path inputPath = workingDir.resolve("input.pdf"); - inputFile.transferTo(inputPath); + // Determine PDF/A filter based on requested format + String pdfFilter = + pdfaPart == 2 + ? "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"2\"}}" + : "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"1\"}}"; + Path libreOfficeProfile = Files.createTempDirectory("libreoffice_profile_"); try { - byte[] converted; + // Prepare LibreOffice command + List command = + new ArrayList<>( + Arrays.asList( + runtimePathConfig.getSOfficePath(), + "-env:UserInstallation=" + libreOfficeProfile.toUri(), + "--headless", + "--nologo", + "--convert-to", + pdfFilter, + "--outdir", + tempOutputDir.toString(), + tempInputFile.toString())); - // Try Ghostscript first (preferred method) - if (isGhostscriptAvailable()) { - log.info("Using Ghostscript for PDF/A conversion to {}", profile.getDisplayName()); - try { - converted = convertWithGhostscript(inputPath, workingDir, profile); - String outputFilename = baseFileName + profile.outputSuffix(); + ProcessExecutorResult returnCode = + ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE) + .runCommandWithOutputHandling(command); - validateAndWarnPdfA(converted, profile, "Ghostscript"); - - return WebResponseUtils.bytesToWebResponse( - converted, outputFilename, MediaType.APPLICATION_PDF); - } catch (Exception e) { - log.warn( - "Ghostscript conversion failed, falling back to PDFBox/LibreOffice method", - e); - } - } else { - log.info("Ghostscript not available, using PDFBox/LibreOffice fallback method"); + if (returnCode.getRc() != 0) { + log.error("PDF/A conversion failed with return code: {}", returnCode.getRc()); + throw ExceptionUtils.createPdfaConversionFailedException(); } - - converted = convertWithPdfBoxMethod(inputPath, profile); - String outputFilename = baseFileName + profile.outputSuffix(); - - // Validate with PDFBox preflight and warn if issues found - validateAndWarnPdfA(converted, profile, "PDFBox/LibreOffice"); - - return WebResponseUtils.bytesToWebResponse( - converted, outputFilename, MediaType.APPLICATION_PDF); - } finally { - deleteQuietly(workingDir); + FileUtils.deleteQuietly(libreOfficeProfile.toFile()); } + + // Get the output file + File[] outputFiles = tempOutputDir.toFile().listFiles(); + if (outputFiles == null || outputFiles.length != 1) { + throw ExceptionUtils.createPdfaConversionFailedException(); + } + return outputFiles[0].toPath(); } private Path normalizePdfWithQpdf(Path inputPdf) { @@ -1599,6 +1714,415 @@ public class ConvertPDFToPDFA { } } + private byte[] convertWithGhostscriptX(Path inputPdf, Path workingDir, PdfXProfile profile) + throws IOException, InterruptedException { + Path outputPdf = workingDir.resolve("gs_output_pdfx.pdf"); + ColorProfiles colorProfiles = prepareColorProfiles(workingDir); + + // Sanitize the PDF before PDF/X conversion for better Ghostscript compatibility + Path sanitizedInputPdf = sanitizePdfWithPdfBox(inputPdf); + Path inputForGs = sanitizedInputPdf != null ? sanitizedInputPdf : inputPdf; + + List command = + buildGhostscriptCommandX(inputForGs, outputPdf, colorProfiles, workingDir, profile); + + ProcessExecutorResult result = + ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) + .runCommandWithOutputHandling(command); + + if (result.getRc() != 0) { + throw new IOException("Ghostscript exited with code " + result.getRc()); + } + + if (!Files.exists(outputPdf)) { + throw new IOException("Ghostscript did not produce an output file"); + } + + return Files.readAllBytes(outputPdf); + } + + private ResponseEntity handlePdfAConversion( + MultipartFile inputFile, String outputFormat) throws Exception { + PdfaProfile profile = PdfaProfile.fromRequest(outputFormat); + + // Get the original filename without extension + String originalFileName = Filenames.toSimpleFileName(inputFile.getOriginalFilename()); + if (originalFileName == null || originalFileName.trim().isEmpty()) { + originalFileName = "output.pdf"; + } + String baseFileName = + originalFileName.contains(".") + ? originalFileName.substring(0, originalFileName.lastIndexOf('.')) + : originalFileName; + + Path workingDir = Files.createTempDirectory("pdfa_conversion_"); + Path inputPath = workingDir.resolve("input.pdf"); + inputFile.transferTo(inputPath); + + try { + byte[] converted; + + // Try Ghostscript first (preferred method) + if (isGhostscriptAvailable()) { + log.info("Using Ghostscript for PDF/A conversion to {}", profile.getDisplayName()); + try { + converted = convertWithGhostscript(inputPath, workingDir, profile); + String outputFilename = baseFileName + profile.outputSuffix(); + + validateAndWarnPdfA(converted, profile, "Ghostscript"); + + return WebResponseUtils.bytesToWebResponse( + converted, outputFilename, MediaType.APPLICATION_PDF); + } catch (IOException | InterruptedException e) { + log.warn( + "Ghostscript conversion failed, falling back to PDFBox/LibreOffice method", + e); + } + } else { + log.info("Ghostscript not available, using PDFBox/LibreOffice fallback method"); + } + + converted = convertWithPdfBoxMethod(inputPath, profile); + String outputFilename = baseFileName + profile.outputSuffix(); + + // Validate with PDFBox preflight and warn if issues found + validateAndWarnPdfA(converted, profile, "PDFBox/LibreOffice"); + + return WebResponseUtils.bytesToWebResponse( + converted, outputFilename, MediaType.APPLICATION_PDF); + + } finally { + deleteQuietly(workingDir); + } + } + + private Path sanitizePdfWithPdfBox(Path inputPdf) { + try { + Path sanitizedPath = + inputPdf.getParent().resolve("sanitized_" + inputPdf.getFileName().toString()); + + sanitizeDocument(inputPdf, sanitizedPath); + + log.info("PDF sanitized with PDFBox for better Ghostscript compatibility"); + return sanitizedPath; + } catch (IOException e) { + log.warn( + "PDF sanitization I/O error, proceeding with original file: {}", + e.getMessage()); + return null; + } + } + + private void sanitizeDocument(Path inputPath, Path outputPath) throws IOException { + try (PDDocument doc = Loader.loadPDF(inputPath.toFile())) { + Map sanitizers = new LinkedHashMap<>(); + sanitizers.put("Flatten highlight annotations", this::flattenHighlightsToContent); + sanitizers.put("Sanitize font resources", ConvertPDFToPDFA::sanitizeFontResources); + sanitizers.put("Clean metadata", this::sanitizeMetadata); + sanitizers.put("Remove forbidden actions", this::removeForbiddenActions); + sanitizers.put("Ensure annotation appearances", this::ensureAnnotationAppearances); + sanitizers.put("Ensure embedded file compliance", this::ensureEmbeddedFileCompliance); + sanitizers.put( + "Fix optional content groups", ConvertPDFToPDFA::fixOptionalContentGroups); + + for (Map.Entry entry : sanitizers.entrySet()) { + try { + entry.getValue().sanitize(doc); + log.debug("Sanitization step completed: {}", entry.getKey()); + } catch (Exception e) { + log.warn( + "Sanitization step '{}' failed, continuing: {}", + entry.getKey(), + e.getMessage()); + } + } + + doc.save(outputPath.toFile()); + } + } + + private void flattenHighlightsToContent(PDDocument doc) throws IOException { + for (PDPage page : doc.getPages()) { + List annotations = new ArrayList<>(page.getAnnotations()); + List toRemove = new ArrayList<>(); + + try (PDPageContentStream cs = + new PDPageContentStream( + doc, page, PDPageContentStream.AppendMode.PREPEND, true, true)) { + + for (PDAnnotation annot : annotations) { + if (annot instanceof PDAnnotationTextMarkup highlight + && ANNOTATION_HIGHLIGHT.equals(annot.getSubtype())) { + + PDColor color = highlight.getColor(); + if (color != null) { + cs.setNonStrokingColor(color); + } else { + cs.setNonStrokingColor(Color.YELLOW); + } + + float[] quads = highlight.getQuadPoints(); + if (!isValidQuadPoints(quads)) { + log.warn( + "Invalid quad points array for highlight annotation: {}", + quads != null ? "length=" + quads.length : "null"); + continue; + } + + for (int i = 0; i <= quads.length - 8; i += 8) { + float minX = Float.MAX_VALUE, minY = Float.MAX_VALUE; + float maxX = -Float.MAX_VALUE, maxY = -Float.MAX_VALUE; + + for (int j = 0; j < 8; j += 2) { + float x = quads[i + j]; + float y = quads[i + j + 1]; + minX = Math.min(minX, x); + maxX = Math.max(maxX, x); + minY = Math.min(minY, y); + maxY = Math.max(maxY, y); + } + + // Only draw if we have a valid rectangle + float width = maxX - minX; + float height = maxY - minY; + if (width > 0 && height > 0) { + cs.addRect(minX, minY, width, height); + cs.fill(); + } + } + toRemove.add(annot); + } + } + } + page.getAnnotations().removeAll(toRemove); + } + } + + private boolean isValidQuadPoints(float[] quads) { + return quads != null && quads.length >= 8 && quads.length % 8 == 0; + } + + private void sanitizeMetadata(PDDocument doc) { + PDDocumentInformation info = doc.getDocumentInformation(); + if (info == null) { + info = new PDDocumentInformation(); + doc.setDocumentInformation(info); + } + + Set keys = info.getMetadataKeys(); + if (keys != null) { // Add null check + for (String key : + new HashSet<>(keys)) { // Copy to avoid ConcurrentModificationException + String value = info.getCustomMetadataValue(key); + if (value != null) { + String clean = NON_PRINTABLE_ASCII.matcher(value).replaceAll(""); + info.setCustomMetadataValue(key, clean); + } + } + } + + info.setProducer("Stirling-PDF Sanitizer"); + } + + private void removeForbiddenActions(PDDocument doc) { + doc.getDocumentCatalog().setOpenAction(null); + doc.getDocumentCatalog().getCOSObject().removeItem(COSName.JAVA_SCRIPT); + } + + private void ensureAnnotationAppearances(PDDocument doc) throws IOException { + for (PDPage page : doc.getPages()) { + List annotations = page.getAnnotations(); + List toRemove = new ArrayList<>(); + + for (PDAnnotation annot : annotations) { + String subtype = annot.getSubtype(); + + if (ANNOTATION_POPUP.equals(subtype) || ANNOTATION_LINK.equals(subtype)) { + continue; + } + + PDRectangle rect = annot.getRectangle(); + if (rect != null && isZeroSizeRect(rect)) { + continue; + } + + PDAppearanceDictionary appearanceDict = annot.getAppearance(); + if (appearanceDict == null || appearanceDict.getNormalAppearance() == null) { + if (!tryGenerateAppearance(doc, page, annot)) { + log.warn("Removing annotation without appearance: {} on page", subtype); + toRemove.add(annot); + } + } + } + + if (!toRemove.isEmpty()) { + annotations.removeAll(toRemove); + } + } + } + + private boolean isZeroSizeRect(PDRectangle rect) { + return Float.compare(rect.getLowerLeftX(), rect.getUpperRightX()) == 0 + && Float.compare(rect.getLowerLeftY(), rect.getUpperRightY()) == 0; + } + + private boolean tryGenerateAppearance(PDDocument doc, PDPage page, PDAnnotation annot) { + try { + if (annot instanceof PDAnnotationWidget) { + annot.constructAppearances(); + return annot.getAppearance() != null; + } + + if (annot instanceof PDAnnotationTextMarkup) { + return false; // Will be handled by flattening + } + + annot.constructAppearances(); + return annot.getAppearance() != null; + + } catch (Exception e) { + log.debug("Could not generate appearance for annotation: {}", e.getMessage()); + return false; + } + } + + public void ensureEmbeddedFileCompliance(PDDocument doc) { + PDDocumentCatalog catalog = doc.getDocumentCatalog(); + PDDocumentNameDictionary names = catalog.getNames(); + if (names == null) return; + + PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles(); + if (embeddedFiles == null) return; + + try { + Map fileSpecs = embeddedFiles.getNames(); + if (fileSpecs == null || fileSpecs.isEmpty()) return; + + COSArray afArray = new COSArray(); + if (catalog.getCOSObject().containsKey(COS_AF)) { + try { + afArray = (COSArray) catalog.getCOSObject().getDictionaryObject(COS_AF); + } catch (Exception e) { + afArray = new COSArray(); + } + } + + boolean afArrayModified = false; + + for (Map.Entry entry : fileSpecs.entrySet()) { + String fileName = entry.getKey(); + PDComplexFileSpecification fileSpec = entry.getValue(); + COSDictionary fileSpecDict = fileSpec.getCOSObject(); + + if (!fileSpecDict.containsKey(COS_AF_RELATIONSHIP)) { + fileSpecDict.setName(COS_AF_RELATIONSHIP, AF_RELATIONSHIP_UNSPECIFIED); + log.debug("Added AFRelationship 'Unspecified' to embedded file: {}", fileName); + } + + if (fileSpec.getFile() == null || fileSpec.getFile().isEmpty()) { + fileSpec.setFile(fileName); + } + if (!fileSpecDict.containsKey(COS_UF)) { + fileSpecDict.setString(COS_UF, fileName); + } + + ensureEmbeddedFileMimeType(fileSpec, fileName); + + boolean alreadyInAf = false; + for (int i = 0; i < afArray.size(); i++) { + if (afArray.getObject(i) == fileSpecDict) { + alreadyInAf = true; + break; + } + } + + if (!alreadyInAf) { + afArray.add(fileSpecDict); + afArrayModified = true; + } + } + + if (afArrayModified) { + catalog.getCOSObject().setItem(COS_AF, afArray); + log.debug( + "Updated Document Catalog 'AF' array with {} associated files", + afArray.size()); + } + + } catch (IOException e) { + log.warn("Could not process embedded files for PDF/A-3 compliance: {}", e.getMessage()); + } + } + + private void ensureEmbeddedFileMimeType(PDComplexFileSpecification fileSpec, String fileName) { + PDEmbeddedFile embeddedFile = fileSpec.getEmbeddedFileUnicode(); + if (embeddedFile == null) { + embeddedFile = fileSpec.getEmbeddedFile(); + } + + if (embeddedFile != null) { + String currentSubtype = embeddedFile.getSubtype(); + if (currentSubtype == null || currentSubtype.isEmpty()) { + String mimeType = detectMimeTypeFromFilename(fileName); + embeddedFile.setSubtype(mimeType); + log.debug("Set MIME type '{}' for embedded file: {}", mimeType, fileName); + } + } + } + + private String detectMimeTypeFromFilename(String fileName) { + if (fileName == null || fileName.isEmpty()) { + return DEFAULT_MIME_TYPE; + } + + String lowerName = fileName.toLowerCase(Locale.ROOT); + + return MIME_TYPE_MAP.entrySet().stream() + .filter(entry -> lowerName.endsWith(entry.getKey())) + .map(Map.Entry::getValue) + .findFirst() + .orElse(DEFAULT_MIME_TYPE); + } + + public byte[] convertPDDocumentToPDFA(PDDocument document, String outputFormat) + throws IOException { + PdfaProfile profile = PdfaProfile.fromRequest(outputFormat); + + Path workingDir = Files.createTempDirectory("pdfa_conversion_"); + Path inputPath = workingDir.resolve("input.pdf"); + + try { + document.save(inputPath.toFile()); + + if (isGhostscriptAvailable()) { + log.info("Using Ghostscript for PDF/A conversion to {}", profile.getDisplayName()); + try { + byte[] converted = convertWithGhostscript(inputPath, workingDir, profile); + validateAndWarnPdfA(converted, profile, "Ghostscript"); + return converted; + } catch (IOException | InterruptedException e) { + log.warn( + "Ghostscript conversion failed, falling back to PDFBox/LibreOffice method", + e); + } + } else { + log.info("Ghostscript not available, using PDFBox/LibreOffice fallback method"); + } + + byte[] converted; + try { + converted = convertWithPdfBoxMethod(inputPath, profile); + } catch (Exception e) { + throw new IOException("PDF/A conversion failed", e); + } + validateAndWarnPdfA(converted, profile, "PDFBox/LibreOffice"); + return converted; + + } finally { + deleteQuietly(workingDir); + } + } + private void copyResourceIcc(Path target) throws IOException { try (InputStream in = getClass().getResourceAsStream(ICC_RESOURCE_PATH)) { if (in == null) { @@ -1730,5 +2254,10 @@ public class ConvertPDFToPDFA { } } + @FunctionalInterface + private interface DocumentSanitizer { + void sanitize(PDDocument doc) throws IOException; + } + private record ColorProfiles(Path rgb, Path gray) {} } diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/AttachmentController.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/AttachmentController.java index a356582c3..197a9f52d 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/AttachmentController.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/AttachmentController.java @@ -1,5 +1,6 @@ package stirling.software.SPDF.controller.api.misc; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.List; import java.util.Optional; @@ -17,8 +18,12 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import stirling.software.SPDF.config.swagger.StandardPdfResponse; +import stirling.software.SPDF.controller.api.converters.ConvertPDFToPDFA; import stirling.software.SPDF.model.api.misc.AddAttachmentRequest; +import stirling.software.SPDF.model.api.misc.DeleteAttachmentRequest; import stirling.software.SPDF.model.api.misc.ExtractAttachmentsRequest; +import stirling.software.SPDF.model.api.misc.ListAttachmentsRequest; +import stirling.software.SPDF.model.api.misc.RenameAttachmentRequest; import stirling.software.SPDF.service.AttachmentServiceInterface; import stirling.software.common.annotations.AutoJobPostMapping; import stirling.software.common.annotations.api.MiscApi; @@ -36,6 +41,8 @@ public class AttachmentController { private final AttachmentServiceInterface pdfAttachmentService; + private final ConvertPDFToPDFA convertPDFToPDFA; + @AutoJobPostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/add-attachments") @StandardPdfResponse @Operation( @@ -43,19 +50,87 @@ public class AttachmentController { description = "This endpoint adds attachments to a PDF. Input:PDF, Output:PDF Type:MISO") public ResponseEntity addAttachments(@ModelAttribute AddAttachmentRequest request) - throws IOException { + throws Exception { MultipartFile fileInput = request.getFileInput(); List attachments = request.getAttachments(); + boolean convertToPdfA3b = request.isConvertToPdfA3b(); - PDDocument document = - pdfAttachmentService.addAttachment( - pdfDocumentFactory.load(fileInput, false), attachments); + validateAttachmentRequest(attachments); - return WebResponseUtils.pdfDocToWebResponse( - document, - GeneralUtils.generateFilename( - Filenames.toSimpleFileName(fileInput.getOriginalFilename()), - "_with_attachments.pdf")); + String originalFileName = Filenames.toSimpleFileName(fileInput.getOriginalFilename()); + if (originalFileName == null || originalFileName.isEmpty()) { + originalFileName = "document"; + } + String baseFileName = + originalFileName.contains(".") + ? originalFileName.substring(0, originalFileName.lastIndexOf('.')) + : originalFileName; + + if (convertToPdfA3b) { + byte[] pdfaBytes; + try (PDDocument document = pdfDocumentFactory.load(request, false)) { + pdfaBytes = convertPDFToPDFA.convertPDDocumentToPDFA(document, "pdfa-3b"); + } + + try (PDDocument pdfaDocument = org.apache.pdfbox.Loader.loadPDF(pdfaBytes)) { + pdfAttachmentService.addAttachment(pdfaDocument, attachments); + + convertPDFToPDFA.ensureEmbeddedFileCompliance(pdfaDocument); + + ConvertPDFToPDFA.fixType1FontCharSet(pdfaDocument); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + pdfaDocument.save(baos); + byte[] resultBytes = baos.toByteArray(); + + String outputFilename = baseFileName + "_with_attachments_PDFA-3b.pdf"; + return WebResponseUtils.bytesToWebResponse( + resultBytes, outputFilename, MediaType.APPLICATION_PDF); + } + } else { + try (PDDocument document = pdfDocumentFactory.load(request, false)) { + pdfAttachmentService.addAttachment(document, attachments); + return WebResponseUtils.pdfDocToWebResponse( + document, + GeneralUtils.generateFilename( + Filenames.toSimpleFileName(fileInput.getOriginalFilename()), + "_with_attachments.pdf")); + } + } + } + + private void validateAttachmentRequest(List attachments) { + if (attachments == null || attachments.isEmpty()) { + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentsRequired", "At least one attachment is required"); + } + + final long maxAttachmentSize = 50L * 1024 * 1024; // 50 MB per attachment + final long maxTotalSize = 200L * 1024 * 1024; // 200 MB total + + long totalSize = 0; + for (MultipartFile attachment : attachments) { + if (attachment == null || attachment.isEmpty()) { + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentEmpty", "Attachment files cannot be null or empty"); + } + if (attachment.getSize() > maxAttachmentSize) { + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentTooLarge", + "Attachment ''{0}'' exceeds maximum size of {1} bytes", + attachment.getOriginalFilename(), + maxAttachmentSize); + } + totalSize += attachment.getSize(); + } + + if (totalSize > maxTotalSize) { + throw ExceptionUtils.createIllegalArgumentException( + "error.totalAttachmentsTooLarge", + "Total attachment size {0} exceeds maximum of {1} bytes", + totalSize, + maxTotalSize); + } } @AutoJobPostMapping( @@ -88,4 +163,82 @@ public class AttachmentController { extracted.get(), outputName, MediaType.APPLICATION_OCTET_STREAM); } } + + @AutoJobPostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/list-attachments") + @Operation( + summary = "List attachments in PDF", + description = + "This endpoint lists all embedded attachments in a PDF. Input:PDF Output:JSON Type:SISO") + public ResponseEntity> + listAttachments(@ModelAttribute ListAttachmentsRequest request) throws IOException { + try (PDDocument document = pdfDocumentFactory.load(request, true)) { + List attachments = + pdfAttachmentService.listAttachments(document); + + return ResponseEntity.ok(attachments); + } + } + + @AutoJobPostMapping( + consumes = MediaType.MULTIPART_FORM_DATA_VALUE, + value = "/rename-attachment") + @StandardPdfResponse + @Operation( + summary = "Rename attachment in PDF", + description = + "This endpoint renames an embedded attachment in a PDF. Input:PDF Output:PDF Type:MISO") + public ResponseEntity renameAttachment(@ModelAttribute RenameAttachmentRequest request) + throws Exception { + MultipartFile fileInput = request.getFileInput(); + String attachmentName = request.getAttachmentName(); + String newName = request.getNewName(); + + if (attachmentName == null || attachmentName.isBlank()) { + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentNameRequired", "Attachment name cannot be null or empty"); + } + if (newName == null || newName.isBlank()) { + throw ExceptionUtils.createIllegalArgumentException( + "error.newNameRequired", "New attachment name cannot be null or empty"); + } + + try (PDDocument document = pdfDocumentFactory.load(request, false)) { + pdfAttachmentService.renameAttachment(document, attachmentName, newName); + + return WebResponseUtils.pdfDocToWebResponse( + document, + GeneralUtils.generateFilename( + Filenames.toSimpleFileName(fileInput.getOriginalFilename()), + "_attachment_renamed.pdf")); + } + } + + @AutoJobPostMapping( + consumes = MediaType.MULTIPART_FORM_DATA_VALUE, + value = "/delete-attachment") + @StandardPdfResponse + @Operation( + summary = "Delete attachment from PDF", + description = + "This endpoint deletes an embedded attachment from a PDF. Input:PDF Output:PDF Type:MISO") + public ResponseEntity deleteAttachment(@ModelAttribute DeleteAttachmentRequest request) + throws Exception { + MultipartFile fileInput = request.getFileInput(); + String attachmentName = request.getAttachmentName(); + + if (attachmentName == null || attachmentName.isBlank()) { + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentNameRequired", "Attachment name cannot be null or empty"); + } + + try (PDDocument document = pdfDocumentFactory.load(request, false)) { + pdfAttachmentService.deleteAttachment(document, attachmentName); + + return WebResponseUtils.pdfDocToWebResponse( + document, + GeneralUtils.generateFilename( + Filenames.toSimpleFileName(fileInput.getOriginalFilename()), + "_attachment_deleted.pdf")); + } + } } diff --git a/app/core/src/main/java/stirling/software/SPDF/exception/GlobalExceptionHandler.java b/app/core/src/main/java/stirling/software/SPDF/exception/GlobalExceptionHandler.java index fd9224a41..82d1b2bb9 100644 --- a/app/core/src/main/java/stirling/software/SPDF/exception/GlobalExceptionHandler.java +++ b/app/core/src/main/java/stirling/software/SPDF/exception/GlobalExceptionHandler.java @@ -12,6 +12,7 @@ import org.springframework.http.HttpStatus; import org.springframework.http.ProblemDetail; import org.springframework.http.ResponseEntity; import org.springframework.http.converter.HttpMessageNotReadableException; +import org.springframework.web.HttpMediaTypeNotAcceptableException; import org.springframework.web.HttpMediaTypeNotSupportedException; import org.springframework.web.HttpRequestMethodNotSupportedException; import org.springframework.web.bind.MethodArgumentNotValidException; @@ -23,6 +24,7 @@ import org.springframework.web.multipart.support.MissingServletRequestPartExcept import org.springframework.web.servlet.NoHandlerFoundException; import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -808,6 +810,56 @@ public class GlobalExceptionHandler { .body(problemDetail); } + /** + * Handle 406 Not Acceptable errors when error responses cannot match client Accept header. + * + *

When thrown: When the client sends Accept: application/pdf but the server needs to return + * a JSON error response (e.g., when an attachment is not found). + * + *

This handler writes directly to HttpServletResponse to bypass Spring's content negotiation + * and ensure error responses are always delivered as JSON. + * + * @param ex the HttpMediaTypeNotAcceptableException + * @param request the HTTP servlet request + * @param response the HTTP servlet response + */ + @ExceptionHandler(HttpMediaTypeNotAcceptableException.class) + public void handleMediaTypeNotAcceptable( + HttpMediaTypeNotAcceptableException ex, + HttpServletRequest request, + HttpServletResponse response) + throws IOException { + + log.warn( + "Media type not acceptable at {}: client accepts {}, server supports {}", + request.getRequestURI(), + request.getHeader("Accept"), + ex.getSupportedMediaTypes()); + + // Write JSON error response directly, bypassing content negotiation + response.setStatus(HttpStatus.NOT_ACCEPTABLE.value()); + response.setContentType("application/problem+json"); + response.setCharacterEncoding("UTF-8"); + + String errorJson = + String.format( + """ + { + "type": "about:blank", + "title": "Not Acceptable", + "status": 406, + "detail": "The requested resource could not be returned in an acceptable format. Error responses are returned as JSON.", + "instance": "%s", + "timestamp": "%s", + "hints": ["Error responses are always returned as application/json or application/problem+json", "Set Accept header to include application/json for proper error handling"] + } + """, + request.getRequestURI(), Instant.now().toString()); + + response.getWriter().write(errorJson); + response.getWriter().flush(); + } + // =========================================================================================== // JAVA STANDARD EXCEPTIONS // =========================================================================================== @@ -963,9 +1015,8 @@ public class GlobalExceptionHandler { // Check if this RuntimeException wraps a typed exception from job execution Throwable cause = ex.getCause(); - if (cause instanceof BaseAppException) { + if (cause instanceof BaseAppException appEx) { // Delegate to specific BaseAppException handlers - BaseAppException appEx = (BaseAppException) cause; if (appEx instanceof PdfPasswordException) { return handlePdfPassword((PdfPasswordException) appEx, request); } else if (appEx instanceof PdfCorruptedException @@ -979,9 +1030,8 @@ public class GlobalExceptionHandler { } else { return handleBaseApp(appEx, request); } - } else if (cause instanceof BaseValidationException) { + } else if (cause instanceof BaseValidationException valEx) { // Delegate to validation exception handlers - BaseValidationException valEx = (BaseValidationException) cause; if (valEx instanceof CbrFormatException || valEx instanceof CbzFormatException || valEx instanceof EmlFormatException) { @@ -992,6 +1042,9 @@ public class GlobalExceptionHandler { } else if (cause instanceof IOException) { // Unwrap and handle IOException (may contain PDF-specific errors) return handleIOException((IOException) cause, request); + } else if (cause instanceof IllegalArgumentException) { + // Unwrap and handle IllegalArgumentException (business logic validation errors) + return handleIllegalArgument((IllegalArgumentException) cause, request); } // Not a wrapped exception - treat as unexpected error diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AddAttachmentRequest.java b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AddAttachmentRequest.java index cf85451f4..48a749098 100644 --- a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AddAttachmentRequest.java +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AddAttachmentRequest.java @@ -20,4 +20,10 @@ public class AddAttachmentRequest extends PDFFile { requiredMode = Schema.RequiredMode.REQUIRED, format = "binary") private List attachments; + + @Schema( + description = "Convert the resulting PDF to PDF/A-3b format after adding attachments", + requiredMode = Schema.RequiredMode.NOT_REQUIRED, + defaultValue = "false") + private boolean convertToPdfA3b = false; } diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AttachmentInfo.java b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AttachmentInfo.java new file mode 100644 index 000000000..b80139be4 --- /dev/null +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/AttachmentInfo.java @@ -0,0 +1,17 @@ +package stirling.software.SPDF.model.api.misc; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@NoArgsConstructor +@AllArgsConstructor +public class AttachmentInfo { + private String filename; + private Long size; + private String contentType; + private String description; + private String creationDate; + private String modificationDate; +} diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/DeleteAttachmentRequest.java b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/DeleteAttachmentRequest.java new file mode 100644 index 000000000..22b7d017b --- /dev/null +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/DeleteAttachmentRequest.java @@ -0,0 +1,18 @@ +package stirling.software.SPDF.model.api.misc; + +import io.swagger.v3.oas.annotations.media.Schema; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +import stirling.software.common.model.api.PDFFile; + +@Data +@EqualsAndHashCode(callSuper = true) +public class DeleteAttachmentRequest extends PDFFile { + + @Schema( + description = "The name of the attachment to delete", + requiredMode = Schema.RequiredMode.REQUIRED) + private String attachmentName; +} diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/ListAttachmentsRequest.java b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/ListAttachmentsRequest.java new file mode 100644 index 000000000..f30fc7540 --- /dev/null +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/ListAttachmentsRequest.java @@ -0,0 +1,10 @@ +package stirling.software.SPDF.model.api.misc; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +import stirling.software.common.model.api.PDFFile; + +@Data +@EqualsAndHashCode(callSuper = true) +public class ListAttachmentsRequest extends PDFFile {} diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/RenameAttachmentRequest.java b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/RenameAttachmentRequest.java new file mode 100644 index 000000000..07731fce3 --- /dev/null +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/RenameAttachmentRequest.java @@ -0,0 +1,23 @@ +package stirling.software.SPDF.model.api.misc; + +import io.swagger.v3.oas.annotations.media.Schema; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +import stirling.software.common.model.api.PDFFile; + +@Data +@EqualsAndHashCode(callSuper = true) +public class RenameAttachmentRequest extends PDFFile { + + @Schema( + description = "The current name of the attachment to rename", + requiredMode = Schema.RequiredMode.REQUIRED) + private String attachmentName; + + @Schema( + description = "The new name for the attachment", + requiredMode = Schema.RequiredMode.REQUIRED) + private String newName; +} diff --git a/app/core/src/main/java/stirling/software/SPDF/service/AttachmentService.java b/app/core/src/main/java/stirling/software/SPDF/service/AttachmentService.java index 029e7086c..0f73632d3 100644 --- a/app/core/src/main/java/stirling/software/SPDF/service/AttachmentService.java +++ b/app/core/src/main/java/stirling/software/SPDF/service/AttachmentService.java @@ -8,6 +8,7 @@ import java.nio.file.attribute.FileTime; import java.time.Instant; import java.time.ZoneId; import java.time.ZonedDateTime; +import java.util.ArrayList; import java.util.GregorianCalendar; import java.util.HashMap; import java.util.HashSet; @@ -36,6 +37,9 @@ import io.github.pixee.security.Filenames; import lombok.extern.slf4j.Slf4j; +import stirling.software.SPDF.model.api.misc.AttachmentInfo; +import stirling.software.common.util.ExceptionUtils; + @Slf4j @Service public class AttachmentService implements AttachmentServiceInterface { @@ -216,6 +220,142 @@ public class AttachmentService implements AttachmentServiceInterface { } } + @Override + public List listAttachments(PDDocument document) throws IOException { + List attachments = new ArrayList<>(); + + PDDocumentCatalog catalog = document.getDocumentCatalog(); + if (catalog == null) { + return attachments; + } + + PDDocumentNameDictionary documentNames = catalog.getNames(); + if (documentNames == null) { + return attachments; + } + + PDEmbeddedFilesNameTreeNode embeddedFilesTree = documentNames.getEmbeddedFiles(); + if (embeddedFilesTree == null) { + return attachments; + } + + Map embeddedFiles = new LinkedHashMap<>(); + collectEmbeddedFiles(embeddedFilesTree, embeddedFiles); + + for (Map.Entry entry : embeddedFiles.entrySet()) { + PDComplexFileSpecification fileSpecification = entry.getValue(); + PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpecification); + + if (embeddedFile != null) { + String filename = determineFilename(entry.getKey(), fileSpecification); + String description = fileSpecification.getFileDescription(); + String contentType = embeddedFile.getSubtype(); + Long size = (long) embeddedFile.getSize(); + + String creationDate = null; + if (embeddedFile.getCreationDate() != null) { + creationDate = embeddedFile.getCreationDate().getTime().toString(); + } + + String modificationDate = null; + if (embeddedFile.getModDate() != null) { + modificationDate = embeddedFile.getModDate().getTime().toString(); + } + + AttachmentInfo attachmentInfo = + new AttachmentInfo( + filename, + size, + contentType, + description, + creationDate, + modificationDate); + + attachments.add(attachmentInfo); + } + } + + return attachments; + } + + @Override + public PDDocument renameAttachment(PDDocument document, String attachmentName, String newName) + throws IOException { + PDEmbeddedFilesNameTreeNode embeddedFilesTree = getEmbeddedFilesTree(document); + + Map allEmbeddedFiles = new LinkedHashMap<>(); + collectEmbeddedFiles(embeddedFilesTree, allEmbeddedFiles); + + PDComplexFileSpecification fileToRename = null; + String keyToRename = null; + + for (Map.Entry entry : allEmbeddedFiles.entrySet()) { + String currentName = determineFilename(entry.getKey(), entry.getValue()); + if (currentName.equals(attachmentName)) { + fileToRename = entry.getValue(); + keyToRename = entry.getKey(); + break; + } + } + + if (fileToRename == null || keyToRename == null) { + log.warn("Attachment '{}' not found for renaming", attachmentName); + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentNotFound", + "Attachment ''{0}'' not found for renaming", + attachmentName); + } + + fileToRename.setFile(newName); + fileToRename.setFileUnicode(newName); + + allEmbeddedFiles.remove(keyToRename); + allEmbeddedFiles.put(newName, fileToRename); + + embeddedFilesTree.setKids(null); + + embeddedFilesTree.setNames(allEmbeddedFiles); + log.info("Renamed attachment from '{}' to '{}'", attachmentName, newName); + + return document; + } + + @Override + public PDDocument deleteAttachment(PDDocument document, String attachmentName) + throws IOException { + PDEmbeddedFilesNameTreeNode embeddedFilesTree = getEmbeddedFilesTree(document); + + Map allEmbeddedFiles = new LinkedHashMap<>(); + collectEmbeddedFiles(embeddedFilesTree, allEmbeddedFiles); + + String keyToRemove = null; + + for (Map.Entry entry : allEmbeddedFiles.entrySet()) { + String currentName = determineFilename(entry.getKey(), entry.getValue()); + if (currentName.equals(attachmentName)) { + keyToRemove = entry.getKey(); + break; + } + } + + if (keyToRemove == null) { + log.warn("Attachment '{}' not found for deletion", attachmentName); + throw ExceptionUtils.createIllegalArgumentException( + "error.attachmentNotFound", + "Attachment ''{0}'' not found for deletion", + attachmentName); + } + + allEmbeddedFiles.remove(keyToRemove); + + embeddedFilesTree.setKids(null); + + embeddedFilesTree.setNames(allEmbeddedFiles); + log.info("Deleted attachment: '{}'", attachmentName); + + return document; + } + private String sanitizeFilename(String candidate) { String sanitized = Filenames.toSimpleFileName(candidate); if (StringUtils.isBlank(sanitized)) { diff --git a/app/core/src/main/java/stirling/software/SPDF/service/AttachmentServiceInterface.java b/app/core/src/main/java/stirling/software/SPDF/service/AttachmentServiceInterface.java index f9e1bfb67..2a6973107 100644 --- a/app/core/src/main/java/stirling/software/SPDF/service/AttachmentServiceInterface.java +++ b/app/core/src/main/java/stirling/software/SPDF/service/AttachmentServiceInterface.java @@ -7,10 +7,19 @@ import java.util.Optional; import org.apache.pdfbox.pdmodel.PDDocument; import org.springframework.web.multipart.MultipartFile; +import stirling.software.SPDF.model.api.misc.AttachmentInfo; + public interface AttachmentServiceInterface { PDDocument addAttachment(PDDocument document, List attachments) throws IOException; Optional extractAttachments(PDDocument document) throws IOException; + + List listAttachments(PDDocument document) throws IOException; + + PDDocument renameAttachment(PDDocument document, String attachmentName, String newName) + throws IOException; + + PDDocument deleteAttachment(PDDocument document, String attachmentName) throws IOException; } diff --git a/app/core/src/test/java/stirling/software/SPDF/controller/api/misc/AttachmentControllerTest.java b/app/core/src/test/java/stirling/software/SPDF/controller/api/misc/AttachmentControllerTest.java index afec68778..fe0e2ca2d 100644 --- a/app/core/src/test/java/stirling/software/SPDF/controller/api/misc/AttachmentControllerTest.java +++ b/app/core/src/test/java/stirling/software/SPDF/controller/api/misc/AttachmentControllerTest.java @@ -67,16 +67,16 @@ class AttachmentControllerTest { } @Test - void addAttachments_Success() throws IOException { + void addAttachments_Success() throws Exception { List attachments = List.of(attachment1, attachment2); request.setAttachments(attachments); request.setFileInput(pdfFile); ResponseEntity expectedResponse = ResponseEntity.ok("modified PDF content".getBytes()); - when(pdfDocumentFactory.load(pdfFile, false)).thenReturn(mockDocument); + when(pdfDocumentFactory.load(request, false)).thenReturn(mockDocument); when(pdfAttachmentService.addAttachment(mockDocument, attachments)) - .thenReturn(modifiedMockDocument); + .thenReturn(mockDocument); try (MockedStatic mockedWebResponseUtils = mockStatic(WebResponseUtils.class)) { @@ -84,8 +84,7 @@ class AttachmentControllerTest { .when( () -> WebResponseUtils.pdfDocToWebResponse( - eq(modifiedMockDocument), - eq("test_with_attachments.pdf"))) + eq(mockDocument), eq("test_with_attachments.pdf"))) .thenReturn(expectedResponse); ResponseEntity response = attachmentController.addAttachments(request); @@ -93,22 +92,22 @@ class AttachmentControllerTest { assertNotNull(response); assertEquals(HttpStatus.OK, response.getStatusCode()); assertNotNull(response.getBody()); - verify(pdfDocumentFactory).load(pdfFile, false); + verify(pdfDocumentFactory).load(request, false); verify(pdfAttachmentService).addAttachment(mockDocument, attachments); } } @Test - void addAttachments_SingleAttachment() throws IOException { + void addAttachments_SingleAttachment() throws Exception { List attachments = List.of(attachment1); request.setAttachments(attachments); request.setFileInput(pdfFile); ResponseEntity expectedResponse = ResponseEntity.ok("modified PDF content".getBytes()); - when(pdfDocumentFactory.load(pdfFile, false)).thenReturn(mockDocument); + when(pdfDocumentFactory.load(request, false)).thenReturn(mockDocument); when(pdfAttachmentService.addAttachment(mockDocument, attachments)) - .thenReturn(modifiedMockDocument); + .thenReturn(mockDocument); try (MockedStatic mockedWebResponseUtils = mockStatic(WebResponseUtils.class)) { @@ -116,8 +115,7 @@ class AttachmentControllerTest { .when( () -> WebResponseUtils.pdfDocToWebResponse( - eq(modifiedMockDocument), - eq("test_with_attachments.pdf"))) + eq(mockDocument), eq("test_with_attachments.pdf"))) .thenReturn(expectedResponse); ResponseEntity response = attachmentController.addAttachments(request); @@ -125,33 +123,33 @@ class AttachmentControllerTest { assertNotNull(response); assertEquals(HttpStatus.OK, response.getStatusCode()); assertNotNull(response.getBody()); - verify(pdfDocumentFactory).load(pdfFile, false); + verify(pdfDocumentFactory).load(request, false); verify(pdfAttachmentService).addAttachment(mockDocument, attachments); } } @Test - void addAttachments_IOExceptionFromPDFLoad() throws IOException { + void addAttachments_IOExceptionFromPDFLoad() throws Exception { List attachments = List.of(attachment1); request.setAttachments(attachments); request.setFileInput(pdfFile); IOException ioException = new IOException("Failed to load PDF"); - when(pdfDocumentFactory.load(pdfFile, false)).thenThrow(ioException); + when(pdfDocumentFactory.load(request, false)).thenThrow(ioException); assertThrows(IOException.class, () -> attachmentController.addAttachments(request)); - verify(pdfDocumentFactory).load(pdfFile, false); + verify(pdfDocumentFactory).load(request, false); verifyNoInteractions(pdfAttachmentService); } @Test - void addAttachments_IOExceptionFromAttachmentService() throws IOException { + void addAttachments_IOExceptionFromAttachmentService() throws Exception { List attachments = List.of(attachment1); request.setAttachments(attachments); request.setFileInput(pdfFile); IOException ioException = new IOException("Failed to add attachment"); - when(pdfDocumentFactory.load(pdfFile, false)).thenReturn(mockDocument); + when(pdfDocumentFactory.load(request, false)).thenReturn(mockDocument); when(pdfAttachmentService.addAttachment(mockDocument, attachments)).thenThrow(ioException); assertThrows(IOException.class, () -> attachmentController.addAttachments(request)); diff --git a/frontend/public/locales/en-GB/translation.toml b/frontend/public/locales/en-GB/translation.toml index f3bd4c5d6..6d27d6f84 100644 --- a/frontend/public/locales/en-GB/translation.toml +++ b/frontend/public/locales/en-GB/translation.toml @@ -1394,6 +1394,11 @@ header = "Add Attachments" add = "Add Attachment" remove = "Remove Attachment" embed = "Embed Attachment" +convertToPdfA3b = "Convert to PDF/A-3b" +convertToPdfA3bDescription = "Creates an archival PDF with embedded attachments" +convertToPdfA3bTooltip = "PDF/A-3b is an archival format ensuring long-term preservation. It allows embedding arbitrary file formats as attachments. Conversion requires Ghostscript and may take longer for large files." +convertToPdfA3bTooltipHeader = "About PDF/A-3b Conversion" +convertToPdfA3bTooltipTitle = "What it does" submit = "Add Attachments" [watermark] diff --git a/frontend/src/core/components/tools/addAttachments/AddAttachmentsSettings.tsx b/frontend/src/core/components/tools/addAttachments/AddAttachmentsSettings.tsx index 33d9e8b33..2108c3c0b 100644 --- a/frontend/src/core/components/tools/addAttachments/AddAttachmentsSettings.tsx +++ b/frontend/src/core/components/tools/addAttachments/AddAttachmentsSettings.tsx @@ -1,13 +1,14 @@ /** * AddAttachmentsSettings - Shared settings component for both tool UI and automation * - * Allows selecting files to attach to PDFs. + * Allows selecting files to attach to PDFs with optional PDF/A-3b conversion support. */ -import { Stack, Text, Group, ActionIcon, ScrollArea, Button } from "@mantine/core"; +import { Stack, Text, Group, ActionIcon, ScrollArea, Button, Checkbox } from "@mantine/core"; import { useTranslation } from "react-i18next"; import { AddAttachmentsParameters } from "@app/hooks/tools/addAttachments/useAddAttachmentsParameters"; import LocalIcon from "@app/components/shared/LocalIcon"; +import { Tooltip } from "@app/components/shared/Tooltip"; interface AddAttachmentsSettingsProps { parameters: AddAttachmentsParameters; @@ -103,6 +104,40 @@ const AddAttachmentsSettings = ({ parameters, onParameterChange, disabled = fals )} + + {/* PDF/A-3b conversion option with informative tooltip */} + + + {t("attachments.convertToPdfA3b", "Convert to PDF/A-3b")} + + + + + } + description={t("attachments.convertToPdfA3bDescription", "Creates an archival PDF with embedded attachments")} + checked={parameters.convertToPdfA3b} + onChange={(event) => onParameterChange('convertToPdfA3b', event.currentTarget.checked)} + disabled={disabled} + styles={{ root: { flex: 1 } }} + /> + ); }; diff --git a/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsOperation.ts b/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsOperation.ts index 9785fa998..4614a9cbf 100644 --- a/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsOperation.ts +++ b/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsOperation.ts @@ -16,6 +16,8 @@ const buildFormData = (parameters: AddAttachmentsParameters, file: File): FormDa if (attachment) formData.append("attachments", attachment); }); + formData.append("convertToPdfA3b", String(parameters.convertToPdfA3b)); + return formData; }; diff --git a/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsParameters.ts b/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsParameters.ts index ce21e3869..1e66120b7 100644 --- a/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsParameters.ts +++ b/frontend/src/core/hooks/tools/addAttachments/useAddAttachmentsParameters.ts @@ -2,10 +2,12 @@ import { useState } from 'react'; export interface AddAttachmentsParameters { attachments: File[]; + convertToPdfA3b: boolean; } const defaultParameters: AddAttachmentsParameters = { - attachments: [] + attachments: [], + convertToPdfA3b: false }; export const useAddAttachmentsParameters = () => { @@ -33,3 +35,5 @@ export const useAddAttachmentsParameters = () => { validateParameters }; }; + +export const DEFAULT_ADD_ATTACHMENTS_PARAMETERS: AddAttachmentsParameters = defaultParameters;