feat(attachments): add “Extract Attachments” tool to export embedded files as ZIP (#4645)

This pull request introduces a new feature that allows users to extract all embedded attachments from a PDF and download them as a ZIP archive. The implementation includes backend support for extracting attachments, a new API endpoint, updates to the service layer, internationalization for UI strings, and a new web form for user interaction. **New PDF Attachment Extraction Feature** _Backend/API:_ * Added a new API endpoint `/api/v1/misc/extract-attachments` in `AttachmentController` to extract all embedded attachments from a PDF and return them as a ZIP archive. Returns an error if no attachments are found. * Implemented `extractAttachments` method in `AttachmentService` and its interface to traverse embedded files in the PDF, collect them, and package them into a ZIP. Handles unique filenames and file metadata. [[1]](diffhunk://#diff-31bc10a74e0d6a01a558585a760f1861009719d76de323eedf7205f1cd6dd417R109-R266) [[2]](diffhunk://#diff-67f2128bf0a88be44c20ca02d5b03be5496d70160377da4e66e72379e8f5ddc8R14-R15) * Introduced `ExtractAttachmentsRequest` model to handle extraction requests. _User Interface:_ * Added a new web form (`misc/extract-attachments.html`) for users to upload a PDF and download its attachments as a ZIP. * Created a new web route and controller method to serve the extraction form. * Updated navigation menus to include the new "Extract Attachments" tool. [[1]](diffhunk://#diff-22a78471c93650a470526aa92780ba43739d475858fd528f180272039dfaa543R261-R263) [[2]](diffhunk://#diff-0bdef49007d770c7afb60ea9c618869ad3a4e96366e82a00b9424b88cdaa3f76R296-R298) _Internationalization & Messaging:_ * Added English and German translations for all new UI elements and error messages related to attachment extraction. [[1]](diffhunk://#diff-ee1c6999a33498cfa3abba4a384e73a8b8269856899438de80560c965079a9fdR236) [[2]](diffhunk://#diff-482633b22866efc985222c4a14efc5b7d2487b59f39b953f038273a39d0362f7R236) [[3]](diffhunk://#diff-ee1c6999a33498cfa3abba4a384e73a8b8269856899438de80560c965079a9fdR643-R646) [[4]](diffhunk://#diff-482633b22866efc985222c4a14efc5b7d2487b59f39b953f038273a39d0362f7R643-R646) [[5]](diffhunk://#diff-ee1c6999a33498cfa3abba4a384e73a8b8269856899438de80560c965079a9fdR1342-R1347) [[6]](diffhunk://#diff-482633b22866efc985222c4a14efc5b7d2487b59f39b953f038273a39d0362f7R1342-R1347) These changes together provide a seamless way for users to extract and download all embedded files from a PDF document. ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. - [ ]
2026-02-17 13:52:14 +01:00 · 2025-10-16 23:23:24 +02:00
parent 614d410dce
commit cc1caa993f
11 changed files with 456 additions and 0 deletions
--- a/app/core/src/test/java/stirling/software/SPDF/service/AttachmentServiceTest.java
+++ b/app/core/src/test/java/stirling/software/SPDF/service/AttachmentServiceTest.java
@@ -7,11 +7,15 @@ import static org.mockito.Mockito.when;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.util.List;
+import java.util.Optional;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;

 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.springframework.http.MediaType;
+import org.springframework.mock.web.MockMultipartFile;
 import org.springframework.web.multipart.MultipartFile;

 class AttachmentServiceTest {
@@ -105,4 +109,86 @@ class AttachmentServiceTest {
            assertNotNull(result.getDocumentCatalog().getNames());
        }
    }
+
+    @Test
+    void extractAttachments_SanitizesFilenamesAndExtractsData() throws IOException {
+        attachmentService = new AttachmentService(1024 * 1024, 5 * 1024 * 1024);
+
+        try (var document = new PDDocument()) {
+            var maliciousAttachment =
+                    new MockMultipartFile(
+                            "file",
+                            "..\\evil/../../tricky.txt",
+                            MediaType.TEXT_PLAIN_VALUE,
+                            "danger".getBytes());
+
+            attachmentService.addAttachment(document, List.of(maliciousAttachment));
+
+            Optional<byte[]> extracted = attachmentService.extractAttachments(document);
+            assertTrue(extracted.isPresent());
+
+            try (var zipInputStream =
+                    new ZipInputStream(new ByteArrayInputStream(extracted.get()))) {
+                ZipEntry entry = zipInputStream.getNextEntry();
+                assertNotNull(entry);
+                String sanitizedName = entry.getName();
+
+                assertFalse(sanitizedName.contains(".."));
+                assertFalse(sanitizedName.contains("/"));
+                assertFalse(sanitizedName.contains("\\"));
+
+                byte[] data = zipInputStream.readAllBytes();
+                assertArrayEquals("danger".getBytes(), data);
+                assertNull(zipInputStream.getNextEntry());
+            }
+        }
+    }
+
+    @Test
+    void extractAttachments_SkipsAttachmentsExceedingSizeLimit() throws IOException {
+        attachmentService = new AttachmentService(4, 10);
+
+        try (var document = new PDDocument()) {
+            var oversizedAttachment =
+                    new MockMultipartFile(
+                            "file",
+                            "large.bin",
+                            MediaType.APPLICATION_OCTET_STREAM_VALUE,
+                            "too big".getBytes());
+
+            attachmentService.addAttachment(document, List.of(oversizedAttachment));
+
+            Optional<byte[]> extracted = attachmentService.extractAttachments(document);
+            assertTrue(extracted.isEmpty());
+        }
+    }
+
+    @Test
+    void extractAttachments_EnforcesTotalSizeLimit() throws IOException {
+        attachmentService = new AttachmentService(10, 9);
+
+        try (var document = new PDDocument()) {
+            var first =
+                    new MockMultipartFile(
+                            "file", "first.txt", MediaType.TEXT_PLAIN_VALUE, "12345".getBytes());
+            var second =
+                    new MockMultipartFile(
+                            "file", "second.txt", MediaType.TEXT_PLAIN_VALUE, "67890".getBytes());
+
+            attachmentService.addAttachment(document, List.of(first, second));
+
+            Optional<byte[]> extracted = attachmentService.extractAttachments(document);
+            assertTrue(extracted.isPresent());
+
+            try (var zipInputStream =
+                    new ZipInputStream(new ByteArrayInputStream(extracted.get()))) {
+                ZipEntry firstEntry = zipInputStream.getNextEntry();
+                assertNotNull(firstEntry);
+                assertEquals("first.txt", firstEntry.getName());
+                byte[] firstData = zipInputStream.readNBytes(5);
+                assertArrayEquals("12345".getBytes(), firstData);
+                assertNull(zipInputStream.getNextEntry());
+            }
+        }
+    }
 }