From cc1caa993f098cbed7ea0db5c518f09e1ef2681d Mon Sep 17 00:00:00 2001 From: Ludy Date: Thu, 16 Oct 2025 23:23:24 +0200 Subject: [PATCH] =?UTF-8?q?feat(attachments):=20add=20=E2=80=9CExtract=20A?= =?UTF-8?q?ttachments=E2=80=9D=20tool=20to=20export=20embedded=20files=20a?= =?UTF-8?q?s=20ZIP=20(#4645)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This pull request introduces a new feature that allows users to extract all embedded attachments from a PDF and download them as a ZIP archive. The implementation includes backend support for extracting attachments, a new API endpoint, updates to the service layer, internationalization for UI strings, and a new web form for user interaction. **New PDF Attachment Extraction Feature** _Backend/API:_ * Added a new API endpoint `/api/v1/misc/extract-attachments` in `AttachmentController` to extract all embedded attachments from a PDF and return them as a ZIP archive. Returns an error if no attachments are found. * Implemented `extractAttachments` method in `AttachmentService` and its interface to traverse embedded files in the PDF, collect them, and package them into a ZIP. Handles unique filenames and file metadata. [[1]](diffhunk://#diff-31bc10a74e0d6a01a558585a760f1861009719d76de323eedf7205f1cd6dd417R109-R266) [[2]](diffhunk://#diff-67f2128bf0a88be44c20ca02d5b03be5496d70160377da4e66e72379e8f5ddc8R14-R15) * Introduced `ExtractAttachmentsRequest` model to handle extraction requests. _User Interface:_ * Added a new web form (`misc/extract-attachments.html`) for users to upload a PDF and download its attachments as a ZIP. * Created a new web route and controller method to serve the extraction form. * Updated navigation menus to include the new "Extract Attachments" tool. [[1]](diffhunk://#diff-22a78471c93650a470526aa92780ba43739d475858fd528f180272039dfaa543R261-R263) [[2]](diffhunk://#diff-0bdef49007d770c7afb60ea9c618869ad3a4e96366e82a00b9424b88cdaa3f76R296-R298) _Internationalization & Messaging:_ * Added English and German translations for all new UI elements and error messages related to attachment extraction. [[1]](diffhunk://#diff-ee1c6999a33498cfa3abba4a384e73a8b8269856899438de80560c965079a9fdR236) [[2]](diffhunk://#diff-482633b22866efc985222c4a14efc5b7d2487b59f39b953f038273a39d0362f7R236) [[3]](diffhunk://#diff-ee1c6999a33498cfa3abba4a384e73a8b8269856899438de80560c965079a9fdR643-R646) [[4]](diffhunk://#diff-482633b22866efc985222c4a14efc5b7d2487b59f39b953f038273a39d0362f7R643-R646) [[5]](diffhunk://#diff-ee1c6999a33498cfa3abba4a384e73a8b8269856899438de80560c965079a9fdR1342-R1347) [[6]](diffhunk://#diff-482633b22866efc985222c4a14efc5b7d2487b59f39b953f038273a39d0362f7R1342-R1347) These changes together provide a seamless way for users to extract and download all embedded files from a PDF document. ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. - [ ] --- .../api/misc/AttachmentController.java | 32 +++ .../controller/web/OtherWebController.java | 7 + .../api/misc/ExtractAttachmentsRequest.java | 10 + .../SPDF/service/AttachmentService.java | 238 ++++++++++++++++++ .../service/AttachmentServiceInterface.java | 3 + .../main/resources/messages_de_DE.properties | 11 + .../main/resources/messages_en_GB.properties | 11 + .../templates/fragments/navElements.html | 3 + .../main/resources/templates/home-legacy.html | 3 + .../templates/misc/extract-attachments.html | 52 ++++ .../SPDF/service/AttachmentServiceTest.java | 86 +++++++ 11 files changed, 456 insertions(+) create mode 100644 app/core/src/main/java/stirling/software/SPDF/model/api/misc/ExtractAttachmentsRequest.java create mode 100644 app/core/src/main/resources/templates/misc/extract-attachments.html diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/AttachmentController.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/AttachmentController.java index 85f18749e..f994cdb09 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/AttachmentController.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/AttachmentController.java @@ -2,6 +2,7 @@ package stirling.software.SPDF.controller.api.misc; import java.io.IOException; import java.util.List; +import java.util.Optional; import org.apache.pdfbox.pdmodel.PDDocument; import org.springframework.http.MediaType; @@ -20,8 +21,10 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import stirling.software.SPDF.model.api.misc.AddAttachmentRequest; +import stirling.software.SPDF.model.api.misc.ExtractAttachmentsRequest; import stirling.software.SPDF.service.AttachmentServiceInterface; import stirling.software.common.service.CustomPDFDocumentFactory; +import stirling.software.common.util.ExceptionUtils; import stirling.software.common.util.GeneralUtils; import stirling.software.common.util.WebResponseUtils; @@ -56,4 +59,33 @@ public class AttachmentController { Filenames.toSimpleFileName(fileInput.getOriginalFilename()), "_with_attachments.pdf")); } + + @PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/extract-attachments") + @Operation( + summary = "Extract attachments from PDF", + description = + "This endpoint extracts all embedded attachments from a PDF into a ZIP archive." + + " Input:PDF Output:ZIP Type:SISO") + public ResponseEntity extractAttachments( + @ModelAttribute ExtractAttachmentsRequest request) throws IOException { + try (PDDocument document = pdfDocumentFactory.load(request, true)) { + Optional extracted = pdfAttachmentService.extractAttachments(document); + + if (extracted.isEmpty()) { + throw ExceptionUtils.createIllegalArgumentException( + "error.noAttachmentsFound", + "No embedded attachments found in the provided PDF"); + } + + MultipartFile fileInput = request.getFileInput(); + String sourceName = + fileInput != null ? fileInput.getOriginalFilename() : request.getFileId(); + String outputName = + Filenames.toSimpleFileName( + GeneralUtils.generateFilename(sourceName, "_attachments.zip")); + + return WebResponseUtils.bytesToWebResponse( + extracted.get(), outputName, MediaType.APPLICATION_OCTET_STREAM); + } + } } diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java b/app/core/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java index bc63a4b84..09dd46cec 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java @@ -198,4 +198,11 @@ public class OtherWebController { model.addAttribute("currentPage", "add-attachments"); return "misc/add-attachments"; } + + @GetMapping("/extract-attachments") + @Hidden + public String extractAttachmentsForm(Model model) { + model.addAttribute("currentPage", "extract-attachments"); + return "misc/extract-attachments"; + } } diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/misc/ExtractAttachmentsRequest.java b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/ExtractAttachmentsRequest.java new file mode 100644 index 000000000..01d666675 --- /dev/null +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/misc/ExtractAttachmentsRequest.java @@ -0,0 +1,10 @@ +package stirling.software.SPDF.model.api.misc; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +import stirling.software.common.model.api.PDFFile; + +@Data +@EqualsAndHashCode(callSuper = true) +public class ExtractAttachmentsRequest extends PDFFile {} diff --git a/app/core/src/main/java/stirling/software/SPDF/service/AttachmentService.java b/app/core/src/main/java/stirling/software/SPDF/service/AttachmentService.java index 6eb74dfb8..029e7086c 100644 --- a/app/core/src/main/java/stirling/software/SPDF/service/AttachmentService.java +++ b/app/core/src/main/java/stirling/software/SPDF/service/AttachmentService.java @@ -2,14 +2,22 @@ package stirling.software.SPDF.service; import static stirling.software.common.util.AttachmentUtils.setCatalogViewerPreferences; +import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.file.attribute.FileTime; import java.time.Instant; import java.time.ZoneId; import java.time.ZonedDateTime; import java.util.GregorianCalendar; import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; import org.apache.commons.lang3.StringUtils; import org.apache.pdfbox.pdmodel.PDDocument; @@ -17,17 +25,37 @@ import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; import org.apache.pdfbox.pdmodel.PageMode; +import org.apache.pdfbox.pdmodel.common.PDNameTreeNode; import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification; import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile; +import org.apache.pdfbox.pdmodel.common.filespecification.PDFileSpecification; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; +import io.github.pixee.security.Filenames; + import lombok.extern.slf4j.Slf4j; @Slf4j @Service public class AttachmentService implements AttachmentServiceInterface { + private static final long DEFAULT_MAX_ATTACHMENT_SIZE_BYTES = 50L * 1024 * 1024; // 50 MB + private static final long DEFAULT_MAX_TOTAL_ATTACHMENT_SIZE_BYTES = + 200L * 1024 * 1024; // 200 MB + + private final long maxAttachmentSizeBytes; + private final long maxTotalAttachmentSizeBytes; + + public AttachmentService() { + this(DEFAULT_MAX_ATTACHMENT_SIZE_BYTES, DEFAULT_MAX_TOTAL_ATTACHMENT_SIZE_BYTES); + } + + public AttachmentService(long maxAttachmentSizeBytes, long maxTotalAttachmentSizeBytes) { + this.maxAttachmentSizeBytes = maxAttachmentSizeBytes; + this.maxTotalAttachmentSizeBytes = maxTotalAttachmentSizeBytes; + } + @Override public PDDocument addAttachment(PDDocument document, List attachments) throws IOException { @@ -93,6 +121,216 @@ public class AttachmentService implements AttachmentServiceInterface { return document; } + @Override + public Optional extractAttachments(PDDocument document) throws IOException { + PDDocumentCatalog catalog = document.getDocumentCatalog(); + if (catalog == null) { + return Optional.empty(); + } + + PDDocumentNameDictionary documentNames = catalog.getNames(); + if (documentNames == null) { + return Optional.empty(); + } + + PDEmbeddedFilesNameTreeNode embeddedFilesTree = documentNames.getEmbeddedFiles(); + if (embeddedFilesTree == null) { + return Optional.empty(); + } + + Map embeddedFiles = new LinkedHashMap<>(); + collectEmbeddedFiles(embeddedFilesTree, embeddedFiles); + + if (embeddedFiles.isEmpty()) { + return Optional.empty(); + } + + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ZipOutputStream zipOutputStream = new ZipOutputStream(baos)) { + Set usedNames = new HashSet<>(); + boolean hasExtractedAttachments = false; + long totalBytesWritten = 0L; + + for (Map.Entry entry : embeddedFiles.entrySet()) { + PDComplexFileSpecification fileSpecification = entry.getValue(); + PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpecification); + + if (embeddedFile == null) { + log.debug( + "Skipping attachment {} because embedded file was null", + entry.getKey()); + continue; + } + + String filename = determineFilename(entry.getKey(), fileSpecification); + filename = Filenames.toSimpleFileName(filename); + String sanitizedFilename = sanitizeFilename(filename); + + Optional attachmentData = readAttachmentData(embeddedFile); + if (attachmentData.isEmpty()) { + log.warn( + "Skipping attachment '{}' because it exceeds the size limit of {} bytes", + sanitizedFilename, + maxAttachmentSizeBytes); + continue; + } + + byte[] data = attachmentData.get(); + if (maxTotalAttachmentSizeBytes > 0 + && (data.length + totalBytesWritten) > maxTotalAttachmentSizeBytes) { + log.warn( + "Skipping attachment '{}' because the total size would exceed {} bytes", + sanitizedFilename, + maxTotalAttachmentSizeBytes); + continue; + } + + String uniqueFilename = ensureUniqueFilename(sanitizedFilename, usedNames); + + ZipEntry zipEntry = new ZipEntry(uniqueFilename); + if (embeddedFile.getModDate() != null) { + zipEntry.setLastModifiedTime( + FileTime.from(embeddedFile.getModDate().toInstant())); + } + if (embeddedFile.getCreationDate() != null) { + zipEntry.setCreationTime( + FileTime.from(embeddedFile.getCreationDate().toInstant())); + } + zipEntry.setSize(data.length); + + zipOutputStream.putNextEntry(zipEntry); + zipOutputStream.write(data); + zipOutputStream.closeEntry(); + hasExtractedAttachments = true; + totalBytesWritten += data.length; + log.info("Extracted attachment '{}' ({} bytes)", uniqueFilename, data.length); + } + + zipOutputStream.finish(); + + if (!hasExtractedAttachments) { + return Optional.empty(); + } + + return Optional.of(baos.toByteArray()); + } + } + + private String sanitizeFilename(String candidate) { + String sanitized = Filenames.toSimpleFileName(candidate); + if (StringUtils.isBlank(sanitized)) { + sanitized = generateDefaultFilename(); + } + return sanitized; + } + + private String generateDefaultFilename() { + return "unknown_attachment_" + System.currentTimeMillis(); + } + + private Optional readAttachmentData(PDEmbeddedFile embeddedFile) throws IOException { + try (var inputStream = embeddedFile.createInputStream(); + var buffer = new ByteArrayOutputStream()) { + byte[] chunk = new byte[8192]; + long total = 0L; + int read; + while ((read = inputStream.read(chunk)) != -1) { + total += read; + if (maxAttachmentSizeBytes > 0 && total > maxAttachmentSizeBytes) { + return Optional.empty(); + } + buffer.write(chunk, 0, read); + } + return Optional.of(buffer.toByteArray()); + } + } + + private void collectEmbeddedFiles( + PDNameTreeNode node, + Map collector) + throws IOException { + if (node == null) { + return; + } + + Map names = node.getNames(); + if (names != null) { + collector.putAll(names); + } + + List> kids = node.getKids(); + if (kids != null) { + for (PDNameTreeNode kid : kids) { + collectEmbeddedFiles(kid, collector); + } + } + } + + private PDEmbeddedFile getEmbeddedFile(PDFileSpecification fileSpecification) { + if (!(fileSpecification instanceof PDComplexFileSpecification complexSpecification)) { + return null; + } + + if (complexSpecification.getEmbeddedFileUnicode() != null) { + return complexSpecification.getEmbeddedFileUnicode(); + } + if (complexSpecification.getEmbeddedFile() != null) { + return complexSpecification.getEmbeddedFile(); + } + if (complexSpecification.getEmbeddedFileDos() != null) { + return complexSpecification.getEmbeddedFileDos(); + } + if (complexSpecification.getEmbeddedFileMac() != null) { + return complexSpecification.getEmbeddedFileMac(); + } + return complexSpecification.getEmbeddedFileUnix(); + } + + private String determineFilename(String key, PDComplexFileSpecification specification) { + if (specification == null) { + return fallbackFilename(key); + } + + String name = specification.getFileUnicode(); + if (StringUtils.isBlank(name)) { + name = specification.getFilename(); + } + if (StringUtils.isBlank(name)) { + name = specification.getFile(); + } + if (StringUtils.isBlank(name)) { + name = key; + } + return fallbackFilename(name); + } + + private String fallbackFilename(String candidate) { + if (StringUtils.isBlank(candidate)) { + return "unknown_attachment_" + System.currentTimeMillis(); + } + return candidate; + } + + private String ensureUniqueFilename(String filename, Set usedNames) { + String baseName = filename; + String extension = ""; + int lastDot = filename.lastIndexOf('.'); + if (lastDot > 0 && lastDot < filename.length() - 1) { + baseName = filename.substring(0, lastDot); + extension = filename.substring(lastDot); + } + + String uniqueName = filename; + int counter = 1; + while (usedNames.contains(uniqueName)) { + uniqueName = baseName + "_" + counter + extension; + counter++; + } + + usedNames.add(uniqueName); + return uniqueName; + } + private PDEmbeddedFilesNameTreeNode getEmbeddedFilesTree(PDDocument document) { PDDocumentCatalog catalog = document.getDocumentCatalog(); PDDocumentNameDictionary documentNames = catalog.getNames(); diff --git a/app/core/src/main/java/stirling/software/SPDF/service/AttachmentServiceInterface.java b/app/core/src/main/java/stirling/software/SPDF/service/AttachmentServiceInterface.java index c684a429d..f9e1bfb67 100644 --- a/app/core/src/main/java/stirling/software/SPDF/service/AttachmentServiceInterface.java +++ b/app/core/src/main/java/stirling/software/SPDF/service/AttachmentServiceInterface.java @@ -2,6 +2,7 @@ package stirling.software.SPDF.service; import java.io.IOException; import java.util.List; +import java.util.Optional; import org.apache.pdfbox.pdmodel.PDDocument; import org.springframework.web.multipart.MultipartFile; @@ -10,4 +11,6 @@ public interface AttachmentServiceInterface { PDDocument addAttachment(PDDocument document, List attachments) throws IOException; + + Optional extractAttachments(PDDocument document) throws IOException; } diff --git a/app/core/src/main/resources/messages_de_DE.properties b/app/core/src/main/resources/messages_de_DE.properties index 401c9cac2..510c4f5b6 100644 --- a/app/core/src/main/resources/messages_de_DE.properties +++ b/app/core/src/main/resources/messages_de_DE.properties @@ -233,6 +233,7 @@ error.angleNotMultipleOf90=Der Winkel muss ein Vielfaches von 90 sein. error.pdfBookmarksNotFound=Keine PDF-Lesezeichen/Gliederung im Dokument gefunden. error.fontLoadingFailed=Fehler bei der Verarbeitung der Schriftdatei. error.fontDirectoryReadFailed=Konnte das Schriftverzeichnis nicht lesen. +error.noAttachmentsFound=Keine eingebetteten Anhänge im bereitgestellten PDF gefunden. delete=Löschen username=Benutzername password=Passwort @@ -639,6 +640,10 @@ home.attachments.title=Anhänge hinzufügen home.attachments.desc=Eingebettete Dateien (Anhänge) zu einem PDF hinzufügen oder daraus entfernen attachments.tags=einbetten, anhängen, datei, anhang, anhänge +home.extractAttachments.title=Anhänge extrahieren +home.extractAttachments.desc=Eingebettete Dateien (Anhänge) aus einem PDF als ZIP herunterladen +extractAttachments.tags=anhänge,extrahieren,einbetten,dateien,zip + home.watermark.title=Wasserzeichen hinzufügen home.watermark.desc=Fügen Sie ein eigenes Wasserzeichen zu Ihrem PDF hinzu watermark.tags=text,wiederholend,beschriftung,besitzen,urheberrecht,marke,img,jpg,bild,foto @@ -1334,6 +1339,12 @@ attachments.description=Ermöglicht das Hinzufügen von Anhängen zum PDF attachments.descriptionPlaceholder=Beschreibung für die Anhänge eingeben... attachments.addButton=Anhänge hinzufügen +#extractAttachments +extractAttachments.title=Anhänge extrahieren +extractAttachments.header=Anhänge extrahieren +extractAttachments.description=Exportiert alle eingebetteten Dateien aus dem PDF in ein ZIP-Archiv. +extractAttachments.downloadButton=Anhänge extrahieren + #merge merge.title=Zusammenführen merge.header=Mehrere PDFs zusammenführen (2+) diff --git a/app/core/src/main/resources/messages_en_GB.properties b/app/core/src/main/resources/messages_en_GB.properties index c60f48eb1..1eee0ab11 100644 --- a/app/core/src/main/resources/messages_en_GB.properties +++ b/app/core/src/main/resources/messages_en_GB.properties @@ -233,6 +233,7 @@ error.angleNotMultipleOf90=Angle must be a multiple of 90 error.pdfBookmarksNotFound=No PDF bookmarks/outline found in document error.fontLoadingFailed=Error processing font file error.fontDirectoryReadFailed=Failed to read font directory +error.noAttachmentsFound=No embedded attachments were found in the provided PDF. delete=Delete username=Username password=Password @@ -639,6 +640,10 @@ home.attachments.title=Add Attachments home.attachments.desc=Add or remove embedded files (attachments) to/from a PDF attachments.tags=embed,attach,file,attachment,attachments +home.extractAttachments.title=Extract Attachments +home.extractAttachments.desc=Download embedded files (attachments) from a PDF as a ZIP archive +extractAttachments.tags=attachments,extract,embedded,files,zip + home.watermark.title=Add Watermark home.watermark.desc=Add a custom watermark to your PDF document. watermark.tags=Text,repeating,label,own,copyright,trademark,img,jpg,picture,photo @@ -1334,6 +1339,12 @@ attachments.description=Allows you to add attachments to the PDF attachments.descriptionPlaceholder=Enter a description for the attachments... attachments.addButton=Add Attachments +#extractAttachments +extractAttachments.title=Extract Attachments +extractAttachments.header=Extract attachments +extractAttachments.description=Exports all embedded files from the PDF into a ZIP archive. +extractAttachments.downloadButton=Extract Attachments + #merge merge.title=Merge merge.header=Merge multiple PDFs (2+) diff --git a/app/core/src/main/resources/templates/fragments/navElements.html b/app/core/src/main/resources/templates/fragments/navElements.html index 5c7eb9590..d235dbbdf 100644 --- a/app/core/src/main/resources/templates/fragments/navElements.html +++ b/app/core/src/main/resources/templates/fragments/navElements.html @@ -270,6 +270,9 @@
+
+
diff --git a/app/core/src/main/resources/templates/home-legacy.html b/app/core/src/main/resources/templates/home-legacy.html index 3c01bcbd6..ea4cc0295 100644 --- a/app/core/src/main/resources/templates/home-legacy.html +++ b/app/core/src/main/resources/templates/home-legacy.html @@ -293,6 +293,9 @@
+
+
diff --git a/app/core/src/main/resources/templates/misc/extract-attachments.html b/app/core/src/main/resources/templates/misc/extract-attachments.html new file mode 100644 index 000000000..b1b12ca05 --- /dev/null +++ b/app/core/src/main/resources/templates/misc/extract-attachments.html @@ -0,0 +1,52 @@ + + + + + + + + + +
+
+ +

+
+
+
+
+ folder_zip + +
+ +

+ +
+ +
+
+ + + +
+
+
+
+
+ +
+ + + \ No newline at end of file diff --git a/app/core/src/test/java/stirling/software/SPDF/service/AttachmentServiceTest.java b/app/core/src/test/java/stirling/software/SPDF/service/AttachmentServiceTest.java index 0ca86b8da..387ed0e33 100644 --- a/app/core/src/test/java/stirling/software/SPDF/service/AttachmentServiceTest.java +++ b/app/core/src/test/java/stirling/software/SPDF/service/AttachmentServiceTest.java @@ -7,11 +7,15 @@ import static org.mockito.Mockito.when; import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.List; +import java.util.Optional; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.springframework.http.MediaType; +import org.springframework.mock.web.MockMultipartFile; import org.springframework.web.multipart.MultipartFile; class AttachmentServiceTest { @@ -105,4 +109,86 @@ class AttachmentServiceTest { assertNotNull(result.getDocumentCatalog().getNames()); } } + + @Test + void extractAttachments_SanitizesFilenamesAndExtractsData() throws IOException { + attachmentService = new AttachmentService(1024 * 1024, 5 * 1024 * 1024); + + try (var document = new PDDocument()) { + var maliciousAttachment = + new MockMultipartFile( + "file", + "..\\evil/../../tricky.txt", + MediaType.TEXT_PLAIN_VALUE, + "danger".getBytes()); + + attachmentService.addAttachment(document, List.of(maliciousAttachment)); + + Optional extracted = attachmentService.extractAttachments(document); + assertTrue(extracted.isPresent()); + + try (var zipInputStream = + new ZipInputStream(new ByteArrayInputStream(extracted.get()))) { + ZipEntry entry = zipInputStream.getNextEntry(); + assertNotNull(entry); + String sanitizedName = entry.getName(); + + assertFalse(sanitizedName.contains("..")); + assertFalse(sanitizedName.contains("/")); + assertFalse(sanitizedName.contains("\\")); + + byte[] data = zipInputStream.readAllBytes(); + assertArrayEquals("danger".getBytes(), data); + assertNull(zipInputStream.getNextEntry()); + } + } + } + + @Test + void extractAttachments_SkipsAttachmentsExceedingSizeLimit() throws IOException { + attachmentService = new AttachmentService(4, 10); + + try (var document = new PDDocument()) { + var oversizedAttachment = + new MockMultipartFile( + "file", + "large.bin", + MediaType.APPLICATION_OCTET_STREAM_VALUE, + "too big".getBytes()); + + attachmentService.addAttachment(document, List.of(oversizedAttachment)); + + Optional extracted = attachmentService.extractAttachments(document); + assertTrue(extracted.isEmpty()); + } + } + + @Test + void extractAttachments_EnforcesTotalSizeLimit() throws IOException { + attachmentService = new AttachmentService(10, 9); + + try (var document = new PDDocument()) { + var first = + new MockMultipartFile( + "file", "first.txt", MediaType.TEXT_PLAIN_VALUE, "12345".getBytes()); + var second = + new MockMultipartFile( + "file", "second.txt", MediaType.TEXT_PLAIN_VALUE, "67890".getBytes()); + + attachmentService.addAttachment(document, List.of(first, second)); + + Optional extracted = attachmentService.extractAttachments(document); + assertTrue(extracted.isPresent()); + + try (var zipInputStream = + new ZipInputStream(new ByteArrayInputStream(extracted.get()))) { + ZipEntry firstEntry = zipInputStream.getNextEntry(); + assertNotNull(firstEntry); + assertEquals("first.txt", firstEntry.getName()); + byte[] firstData = zipInputStream.readNBytes(5); + assertArrayEquals("12345".getBytes(), firstData); + assertNull(zipInputStream.getNextEntry()); + } + } + } }