mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-02-17 13:52:14 +01:00
feat(attachments): add “Extract Attachments” tool to export embedded files as ZIP (#4645)
This pull request introduces a new feature that allows users to extract all embedded attachments from a PDF and download them as a ZIP archive. The implementation includes backend support for extracting attachments, a new API endpoint, updates to the service layer, internationalization for UI strings, and a new web form for user interaction. **New PDF Attachment Extraction Feature** _Backend/API:_ * Added a new API endpoint `/api/v1/misc/extract-attachments` in `AttachmentController` to extract all embedded attachments from a PDF and return them as a ZIP archive. Returns an error if no attachments are found. * Implemented `extractAttachments` method in `AttachmentService` and its interface to traverse embedded files in the PDF, collect them, and package them into a ZIP. Handles unique filenames and file metadata. [[1]](diffhunk://#diff-31bc10a74e0d6a01a558585a760f1861009719d76de323eedf7205f1cd6dd417R109-R266) [[2]](diffhunk://#diff-67f2128bf0a88be44c20ca02d5b03be5496d70160377da4e66e72379e8f5ddc8R14-R15) * Introduced `ExtractAttachmentsRequest` model to handle extraction requests. _User Interface:_ * Added a new web form (`misc/extract-attachments.html`) for users to upload a PDF and download its attachments as a ZIP. * Created a new web route and controller method to serve the extraction form. * Updated navigation menus to include the new "Extract Attachments" tool. [[1]](diffhunk://#diff-22a78471c93650a470526aa92780ba43739d475858fd528f180272039dfaa543R261-R263) [[2]](diffhunk://#diff-0bdef49007d770c7afb60ea9c618869ad3a4e96366e82a00b9424b88cdaa3f76R296-R298) _Internationalization & Messaging:_ * Added English and German translations for all new UI elements and error messages related to attachment extraction. [[1]](diffhunk://#diff-ee1c6999a33498cfa3abba4a384e73a8b8269856899438de80560c965079a9fdR236) [[2]](diffhunk://#diff-482633b22866efc985222c4a14efc5b7d2487b59f39b953f038273a39d0362f7R236) [[3]](diffhunk://#diff-ee1c6999a33498cfa3abba4a384e73a8b8269856899438de80560c965079a9fdR643-R646) [[4]](diffhunk://#diff-482633b22866efc985222c4a14efc5b7d2487b59f39b953f038273a39d0362f7R643-R646) [[5]](diffhunk://#diff-ee1c6999a33498cfa3abba4a384e73a8b8269856899438de80560c965079a9fdR1342-R1347) [[6]](diffhunk://#diff-482633b22866efc985222c4a14efc5b7d2487b59f39b953f038273a39d0362f7R1342-R1347) These changes together provide a seamless way for users to extract and download all embedded files from a PDF document. ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. - [ ]
This commit is contained in:
@@ -7,11 +7,15 @@ import static org.mockito.Mockito.when;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
class AttachmentServiceTest {
|
||||
@@ -105,4 +109,86 @@ class AttachmentServiceTest {
|
||||
assertNotNull(result.getDocumentCatalog().getNames());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractAttachments_SanitizesFilenamesAndExtractsData() throws IOException {
|
||||
attachmentService = new AttachmentService(1024 * 1024, 5 * 1024 * 1024);
|
||||
|
||||
try (var document = new PDDocument()) {
|
||||
var maliciousAttachment =
|
||||
new MockMultipartFile(
|
||||
"file",
|
||||
"..\\evil/../../tricky.txt",
|
||||
MediaType.TEXT_PLAIN_VALUE,
|
||||
"danger".getBytes());
|
||||
|
||||
attachmentService.addAttachment(document, List.of(maliciousAttachment));
|
||||
|
||||
Optional<byte[]> extracted = attachmentService.extractAttachments(document);
|
||||
assertTrue(extracted.isPresent());
|
||||
|
||||
try (var zipInputStream =
|
||||
new ZipInputStream(new ByteArrayInputStream(extracted.get()))) {
|
||||
ZipEntry entry = zipInputStream.getNextEntry();
|
||||
assertNotNull(entry);
|
||||
String sanitizedName = entry.getName();
|
||||
|
||||
assertFalse(sanitizedName.contains(".."));
|
||||
assertFalse(sanitizedName.contains("/"));
|
||||
assertFalse(sanitizedName.contains("\\"));
|
||||
|
||||
byte[] data = zipInputStream.readAllBytes();
|
||||
assertArrayEquals("danger".getBytes(), data);
|
||||
assertNull(zipInputStream.getNextEntry());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractAttachments_SkipsAttachmentsExceedingSizeLimit() throws IOException {
|
||||
attachmentService = new AttachmentService(4, 10);
|
||||
|
||||
try (var document = new PDDocument()) {
|
||||
var oversizedAttachment =
|
||||
new MockMultipartFile(
|
||||
"file",
|
||||
"large.bin",
|
||||
MediaType.APPLICATION_OCTET_STREAM_VALUE,
|
||||
"too big".getBytes());
|
||||
|
||||
attachmentService.addAttachment(document, List.of(oversizedAttachment));
|
||||
|
||||
Optional<byte[]> extracted = attachmentService.extractAttachments(document);
|
||||
assertTrue(extracted.isEmpty());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractAttachments_EnforcesTotalSizeLimit() throws IOException {
|
||||
attachmentService = new AttachmentService(10, 9);
|
||||
|
||||
try (var document = new PDDocument()) {
|
||||
var first =
|
||||
new MockMultipartFile(
|
||||
"file", "first.txt", MediaType.TEXT_PLAIN_VALUE, "12345".getBytes());
|
||||
var second =
|
||||
new MockMultipartFile(
|
||||
"file", "second.txt", MediaType.TEXT_PLAIN_VALUE, "67890".getBytes());
|
||||
|
||||
attachmentService.addAttachment(document, List.of(first, second));
|
||||
|
||||
Optional<byte[]> extracted = attachmentService.extractAttachments(document);
|
||||
assertTrue(extracted.isPresent());
|
||||
|
||||
try (var zipInputStream =
|
||||
new ZipInputStream(new ByteArrayInputStream(extracted.get()))) {
|
||||
ZipEntry firstEntry = zipInputStream.getNextEntry();
|
||||
assertNotNull(firstEntry);
|
||||
assertEquals("first.txt", firstEntry.getName());
|
||||
byte[] firstData = zipInputStream.readNBytes(5);
|
||||
assertArrayEquals("12345".getBytes(), firstData);
|
||||
assertNull(zipInputStream.getNextEntry());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user