mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-01 01:21:18 +01:00
feat(attachments): add “Extract Attachments” tool to export embedded files as ZIP (#4645)
This pull request introduces a new feature that allows users to extract all embedded attachments from a PDF and download them as a ZIP archive. The implementation includes backend support for extracting attachments, a new API endpoint, updates to the service layer, internationalization for UI strings, and a new web form for user interaction. **New PDF Attachment Extraction Feature** _Backend/API:_ * Added a new API endpoint `/api/v1/misc/extract-attachments` in `AttachmentController` to extract all embedded attachments from a PDF and return them as a ZIP archive. Returns an error if no attachments are found. * Implemented `extractAttachments` method in `AttachmentService` and its interface to traverse embedded files in the PDF, collect them, and package them into a ZIP. Handles unique filenames and file metadata. [[1]](diffhunk://#diff-31bc10a74e0d6a01a558585a760f1861009719d76de323eedf7205f1cd6dd417R109-R266) [[2]](diffhunk://#diff-67f2128bf0a88be44c20ca02d5b03be5496d70160377da4e66e72379e8f5ddc8R14-R15) * Introduced `ExtractAttachmentsRequest` model to handle extraction requests. _User Interface:_ * Added a new web form (`misc/extract-attachments.html`) for users to upload a PDF and download its attachments as a ZIP. * Created a new web route and controller method to serve the extraction form. * Updated navigation menus to include the new "Extract Attachments" tool. [[1]](diffhunk://#diff-22a78471c93650a470526aa92780ba43739d475858fd528f180272039dfaa543R261-R263) [[2]](diffhunk://#diff-0bdef49007d770c7afb60ea9c618869ad3a4e96366e82a00b9424b88cdaa3f76R296-R298) _Internationalization & Messaging:_ * Added English and German translations for all new UI elements and error messages related to attachment extraction. [[1]](diffhunk://#diff-ee1c6999a33498cfa3abba4a384e73a8b8269856899438de80560c965079a9fdR236) [[2]](diffhunk://#diff-482633b22866efc985222c4a14efc5b7d2487b59f39b953f038273a39d0362f7R236) [[3]](diffhunk://#diff-ee1c6999a33498cfa3abba4a384e73a8b8269856899438de80560c965079a9fdR643-R646) [[4]](diffhunk://#diff-482633b22866efc985222c4a14efc5b7d2487b59f39b953f038273a39d0362f7R643-R646) [[5]](diffhunk://#diff-ee1c6999a33498cfa3abba4a384e73a8b8269856899438de80560c965079a9fdR1342-R1347) [[6]](diffhunk://#diff-482633b22866efc985222c4a14efc5b7d2487b59f39b953f038273a39d0362f7R1342-R1347) These changes together provide a seamless way for users to extract and download all embedded files from a PDF document. ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. - [ ]
This commit is contained in:
parent
614d410dce
commit
cc1caa993f
@ -2,6 +2,7 @@ package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.springframework.http.MediaType;
|
||||
@ -20,8 +21,10 @@ import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.AddAttachmentRequest;
|
||||
import stirling.software.SPDF.model.api.misc.ExtractAttachmentsRequest;
|
||||
import stirling.software.SPDF.service.AttachmentServiceInterface;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.ExceptionUtils;
|
||||
import stirling.software.common.util.GeneralUtils;
|
||||
import stirling.software.common.util.WebResponseUtils;
|
||||
|
||||
@ -56,4 +59,33 @@ public class AttachmentController {
|
||||
Filenames.toSimpleFileName(fileInput.getOriginalFilename()),
|
||||
"_with_attachments.pdf"));
|
||||
}
|
||||
|
||||
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/extract-attachments")
|
||||
@Operation(
|
||||
summary = "Extract attachments from PDF",
|
||||
description =
|
||||
"This endpoint extracts all embedded attachments from a PDF into a ZIP archive."
|
||||
+ " Input:PDF Output:ZIP Type:SISO")
|
||||
public ResponseEntity<byte[]> extractAttachments(
|
||||
@ModelAttribute ExtractAttachmentsRequest request) throws IOException {
|
||||
try (PDDocument document = pdfDocumentFactory.load(request, true)) {
|
||||
Optional<byte[]> extracted = pdfAttachmentService.extractAttachments(document);
|
||||
|
||||
if (extracted.isEmpty()) {
|
||||
throw ExceptionUtils.createIllegalArgumentException(
|
||||
"error.noAttachmentsFound",
|
||||
"No embedded attachments found in the provided PDF");
|
||||
}
|
||||
|
||||
MultipartFile fileInput = request.getFileInput();
|
||||
String sourceName =
|
||||
fileInput != null ? fileInput.getOriginalFilename() : request.getFileId();
|
||||
String outputName =
|
||||
Filenames.toSimpleFileName(
|
||||
GeneralUtils.generateFilename(sourceName, "_attachments.zip"));
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
extracted.get(), outputName, MediaType.APPLICATION_OCTET_STREAM);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -198,4 +198,11 @@ public class OtherWebController {
|
||||
model.addAttribute("currentPage", "add-attachments");
|
||||
return "misc/add-attachments";
|
||||
}
|
||||
|
||||
@GetMapping("/extract-attachments")
|
||||
@Hidden
|
||||
public String extractAttachmentsForm(Model model) {
|
||||
model.addAttribute("currentPage", "extract-attachments");
|
||||
return "misc/extract-attachments";
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,10 @@
|
||||
package stirling.software.SPDF.model.api.misc;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
|
||||
import stirling.software.common.model.api.PDFFile;
|
||||
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
public class ExtractAttachmentsRequest extends PDFFile {}
|
||||
@ -2,14 +2,22 @@ package stirling.software.SPDF.service;
|
||||
|
||||
import static stirling.software.common.util.AttachmentUtils.setCatalogViewerPreferences;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.attribute.FileTime;
|
||||
import java.time.Instant;
|
||||
import java.time.ZoneId;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
@ -17,17 +25,37 @@ import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
|
||||
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
|
||||
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
|
||||
import org.apache.pdfbox.pdmodel.PageMode;
|
||||
import org.apache.pdfbox.pdmodel.common.PDNameTreeNode;
|
||||
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
|
||||
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
|
||||
import org.apache.pdfbox.pdmodel.common.filespecification.PDFileSpecification;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.github.pixee.security.Filenames;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
public class AttachmentService implements AttachmentServiceInterface {
|
||||
|
||||
private static final long DEFAULT_MAX_ATTACHMENT_SIZE_BYTES = 50L * 1024 * 1024; // 50 MB
|
||||
private static final long DEFAULT_MAX_TOTAL_ATTACHMENT_SIZE_BYTES =
|
||||
200L * 1024 * 1024; // 200 MB
|
||||
|
||||
private final long maxAttachmentSizeBytes;
|
||||
private final long maxTotalAttachmentSizeBytes;
|
||||
|
||||
public AttachmentService() {
|
||||
this(DEFAULT_MAX_ATTACHMENT_SIZE_BYTES, DEFAULT_MAX_TOTAL_ATTACHMENT_SIZE_BYTES);
|
||||
}
|
||||
|
||||
public AttachmentService(long maxAttachmentSizeBytes, long maxTotalAttachmentSizeBytes) {
|
||||
this.maxAttachmentSizeBytes = maxAttachmentSizeBytes;
|
||||
this.maxTotalAttachmentSizeBytes = maxTotalAttachmentSizeBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PDDocument addAttachment(PDDocument document, List<MultipartFile> attachments)
|
||||
throws IOException {
|
||||
@ -93,6 +121,216 @@ public class AttachmentService implements AttachmentServiceInterface {
|
||||
return document;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<byte[]> extractAttachments(PDDocument document) throws IOException {
|
||||
PDDocumentCatalog catalog = document.getDocumentCatalog();
|
||||
if (catalog == null) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
PDDocumentNameDictionary documentNames = catalog.getNames();
|
||||
if (documentNames == null) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
PDEmbeddedFilesNameTreeNode embeddedFilesTree = documentNames.getEmbeddedFiles();
|
||||
if (embeddedFilesTree == null) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
Map<String, PDComplexFileSpecification> embeddedFiles = new LinkedHashMap<>();
|
||||
collectEmbeddedFiles(embeddedFilesTree, embeddedFiles);
|
||||
|
||||
if (embeddedFiles.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
ZipOutputStream zipOutputStream = new ZipOutputStream(baos)) {
|
||||
Set<String> usedNames = new HashSet<>();
|
||||
boolean hasExtractedAttachments = false;
|
||||
long totalBytesWritten = 0L;
|
||||
|
||||
for (Map.Entry<String, PDComplexFileSpecification> entry : embeddedFiles.entrySet()) {
|
||||
PDComplexFileSpecification fileSpecification = entry.getValue();
|
||||
PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpecification);
|
||||
|
||||
if (embeddedFile == null) {
|
||||
log.debug(
|
||||
"Skipping attachment {} because embedded file was null",
|
||||
entry.getKey());
|
||||
continue;
|
||||
}
|
||||
|
||||
String filename = determineFilename(entry.getKey(), fileSpecification);
|
||||
filename = Filenames.toSimpleFileName(filename);
|
||||
String sanitizedFilename = sanitizeFilename(filename);
|
||||
|
||||
Optional<byte[]> attachmentData = readAttachmentData(embeddedFile);
|
||||
if (attachmentData.isEmpty()) {
|
||||
log.warn(
|
||||
"Skipping attachment '{}' because it exceeds the size limit of {} bytes",
|
||||
sanitizedFilename,
|
||||
maxAttachmentSizeBytes);
|
||||
continue;
|
||||
}
|
||||
|
||||
byte[] data = attachmentData.get();
|
||||
if (maxTotalAttachmentSizeBytes > 0
|
||||
&& (data.length + totalBytesWritten) > maxTotalAttachmentSizeBytes) {
|
||||
log.warn(
|
||||
"Skipping attachment '{}' because the total size would exceed {} bytes",
|
||||
sanitizedFilename,
|
||||
maxTotalAttachmentSizeBytes);
|
||||
continue;
|
||||
}
|
||||
|
||||
String uniqueFilename = ensureUniqueFilename(sanitizedFilename, usedNames);
|
||||
|
||||
ZipEntry zipEntry = new ZipEntry(uniqueFilename);
|
||||
if (embeddedFile.getModDate() != null) {
|
||||
zipEntry.setLastModifiedTime(
|
||||
FileTime.from(embeddedFile.getModDate().toInstant()));
|
||||
}
|
||||
if (embeddedFile.getCreationDate() != null) {
|
||||
zipEntry.setCreationTime(
|
||||
FileTime.from(embeddedFile.getCreationDate().toInstant()));
|
||||
}
|
||||
zipEntry.setSize(data.length);
|
||||
|
||||
zipOutputStream.putNextEntry(zipEntry);
|
||||
zipOutputStream.write(data);
|
||||
zipOutputStream.closeEntry();
|
||||
hasExtractedAttachments = true;
|
||||
totalBytesWritten += data.length;
|
||||
log.info("Extracted attachment '{}' ({} bytes)", uniqueFilename, data.length);
|
||||
}
|
||||
|
||||
zipOutputStream.finish();
|
||||
|
||||
if (!hasExtractedAttachments) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
return Optional.of(baos.toByteArray());
|
||||
}
|
||||
}
|
||||
|
||||
private String sanitizeFilename(String candidate) {
|
||||
String sanitized = Filenames.toSimpleFileName(candidate);
|
||||
if (StringUtils.isBlank(sanitized)) {
|
||||
sanitized = generateDefaultFilename();
|
||||
}
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
private String generateDefaultFilename() {
|
||||
return "unknown_attachment_" + System.currentTimeMillis();
|
||||
}
|
||||
|
||||
private Optional<byte[]> readAttachmentData(PDEmbeddedFile embeddedFile) throws IOException {
|
||||
try (var inputStream = embeddedFile.createInputStream();
|
||||
var buffer = new ByteArrayOutputStream()) {
|
||||
byte[] chunk = new byte[8192];
|
||||
long total = 0L;
|
||||
int read;
|
||||
while ((read = inputStream.read(chunk)) != -1) {
|
||||
total += read;
|
||||
if (maxAttachmentSizeBytes > 0 && total > maxAttachmentSizeBytes) {
|
||||
return Optional.empty();
|
||||
}
|
||||
buffer.write(chunk, 0, read);
|
||||
}
|
||||
return Optional.of(buffer.toByteArray());
|
||||
}
|
||||
}
|
||||
|
||||
private void collectEmbeddedFiles(
|
||||
PDNameTreeNode<PDComplexFileSpecification> node,
|
||||
Map<String, PDComplexFileSpecification> collector)
|
||||
throws IOException {
|
||||
if (node == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
Map<String, PDComplexFileSpecification> names = node.getNames();
|
||||
if (names != null) {
|
||||
collector.putAll(names);
|
||||
}
|
||||
|
||||
List<PDNameTreeNode<PDComplexFileSpecification>> kids = node.getKids();
|
||||
if (kids != null) {
|
||||
for (PDNameTreeNode<PDComplexFileSpecification> kid : kids) {
|
||||
collectEmbeddedFiles(kid, collector);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private PDEmbeddedFile getEmbeddedFile(PDFileSpecification fileSpecification) {
|
||||
if (!(fileSpecification instanceof PDComplexFileSpecification complexSpecification)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (complexSpecification.getEmbeddedFileUnicode() != null) {
|
||||
return complexSpecification.getEmbeddedFileUnicode();
|
||||
}
|
||||
if (complexSpecification.getEmbeddedFile() != null) {
|
||||
return complexSpecification.getEmbeddedFile();
|
||||
}
|
||||
if (complexSpecification.getEmbeddedFileDos() != null) {
|
||||
return complexSpecification.getEmbeddedFileDos();
|
||||
}
|
||||
if (complexSpecification.getEmbeddedFileMac() != null) {
|
||||
return complexSpecification.getEmbeddedFileMac();
|
||||
}
|
||||
return complexSpecification.getEmbeddedFileUnix();
|
||||
}
|
||||
|
||||
private String determineFilename(String key, PDComplexFileSpecification specification) {
|
||||
if (specification == null) {
|
||||
return fallbackFilename(key);
|
||||
}
|
||||
|
||||
String name = specification.getFileUnicode();
|
||||
if (StringUtils.isBlank(name)) {
|
||||
name = specification.getFilename();
|
||||
}
|
||||
if (StringUtils.isBlank(name)) {
|
||||
name = specification.getFile();
|
||||
}
|
||||
if (StringUtils.isBlank(name)) {
|
||||
name = key;
|
||||
}
|
||||
return fallbackFilename(name);
|
||||
}
|
||||
|
||||
private String fallbackFilename(String candidate) {
|
||||
if (StringUtils.isBlank(candidate)) {
|
||||
return "unknown_attachment_" + System.currentTimeMillis();
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
private String ensureUniqueFilename(String filename, Set<String> usedNames) {
|
||||
String baseName = filename;
|
||||
String extension = "";
|
||||
int lastDot = filename.lastIndexOf('.');
|
||||
if (lastDot > 0 && lastDot < filename.length() - 1) {
|
||||
baseName = filename.substring(0, lastDot);
|
||||
extension = filename.substring(lastDot);
|
||||
}
|
||||
|
||||
String uniqueName = filename;
|
||||
int counter = 1;
|
||||
while (usedNames.contains(uniqueName)) {
|
||||
uniqueName = baseName + "_" + counter + extension;
|
||||
counter++;
|
||||
}
|
||||
|
||||
usedNames.add(uniqueName);
|
||||
return uniqueName;
|
||||
}
|
||||
|
||||
private PDEmbeddedFilesNameTreeNode getEmbeddedFilesTree(PDDocument document) {
|
||||
PDDocumentCatalog catalog = document.getDocumentCatalog();
|
||||
PDDocumentNameDictionary documentNames = catalog.getNames();
|
||||
|
||||
@ -2,6 +2,7 @@ package stirling.software.SPDF.service;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
@ -10,4 +11,6 @@ public interface AttachmentServiceInterface {
|
||||
|
||||
PDDocument addAttachment(PDDocument document, List<MultipartFile> attachments)
|
||||
throws IOException;
|
||||
|
||||
Optional<byte[]> extractAttachments(PDDocument document) throws IOException;
|
||||
}
|
||||
|
||||
@ -233,6 +233,7 @@ error.angleNotMultipleOf90=Der Winkel muss ein Vielfaches von 90 sein.
|
||||
error.pdfBookmarksNotFound=Keine PDF-Lesezeichen/Gliederung im Dokument gefunden.
|
||||
error.fontLoadingFailed=Fehler bei der Verarbeitung der Schriftdatei.
|
||||
error.fontDirectoryReadFailed=Konnte das Schriftverzeichnis nicht lesen.
|
||||
error.noAttachmentsFound=Keine eingebetteten Anhänge im bereitgestellten PDF gefunden.
|
||||
delete=Löschen
|
||||
username=Benutzername
|
||||
password=Passwort
|
||||
@ -639,6 +640,10 @@ home.attachments.title=Anhänge hinzufügen
|
||||
home.attachments.desc=Eingebettete Dateien (Anhänge) zu einem PDF hinzufügen oder daraus entfernen
|
||||
attachments.tags=einbetten, anhängen, datei, anhang, anhänge
|
||||
|
||||
home.extractAttachments.title=Anhänge extrahieren
|
||||
home.extractAttachments.desc=Eingebettete Dateien (Anhänge) aus einem PDF als ZIP herunterladen
|
||||
extractAttachments.tags=anhänge,extrahieren,einbetten,dateien,zip
|
||||
|
||||
home.watermark.title=Wasserzeichen hinzufügen
|
||||
home.watermark.desc=Fügen Sie ein eigenes Wasserzeichen zu Ihrem PDF hinzu
|
||||
watermark.tags=text,wiederholend,beschriftung,besitzen,urheberrecht,marke,img,jpg,bild,foto
|
||||
@ -1334,6 +1339,12 @@ attachments.description=Ermöglicht das Hinzufügen von Anhängen zum PDF
|
||||
attachments.descriptionPlaceholder=Beschreibung für die Anhänge eingeben...
|
||||
attachments.addButton=Anhänge hinzufügen
|
||||
|
||||
#extractAttachments
|
||||
extractAttachments.title=Anhänge extrahieren
|
||||
extractAttachments.header=Anhänge extrahieren
|
||||
extractAttachments.description=Exportiert alle eingebetteten Dateien aus dem PDF in ein ZIP-Archiv.
|
||||
extractAttachments.downloadButton=Anhänge extrahieren
|
||||
|
||||
#merge
|
||||
merge.title=Zusammenführen
|
||||
merge.header=Mehrere PDFs zusammenführen (2+)
|
||||
|
||||
@ -233,6 +233,7 @@ error.angleNotMultipleOf90=Angle must be a multiple of 90
|
||||
error.pdfBookmarksNotFound=No PDF bookmarks/outline found in document
|
||||
error.fontLoadingFailed=Error processing font file
|
||||
error.fontDirectoryReadFailed=Failed to read font directory
|
||||
error.noAttachmentsFound=No embedded attachments were found in the provided PDF.
|
||||
delete=Delete
|
||||
username=Username
|
||||
password=Password
|
||||
@ -639,6 +640,10 @@ home.attachments.title=Add Attachments
|
||||
home.attachments.desc=Add or remove embedded files (attachments) to/from a PDF
|
||||
attachments.tags=embed,attach,file,attachment,attachments
|
||||
|
||||
home.extractAttachments.title=Extract Attachments
|
||||
home.extractAttachments.desc=Download embedded files (attachments) from a PDF as a ZIP archive
|
||||
extractAttachments.tags=attachments,extract,embedded,files,zip
|
||||
|
||||
home.watermark.title=Add Watermark
|
||||
home.watermark.desc=Add a custom watermark to your PDF document.
|
||||
watermark.tags=Text,repeating,label,own,copyright,trademark,img,jpg,picture,photo
|
||||
@ -1334,6 +1339,12 @@ attachments.description=Allows you to add attachments to the PDF
|
||||
attachments.descriptionPlaceholder=Enter a description for the attachments...
|
||||
attachments.addButton=Add Attachments
|
||||
|
||||
#extractAttachments
|
||||
extractAttachments.title=Extract Attachments
|
||||
extractAttachments.header=Extract attachments
|
||||
extractAttachments.description=Exports all embedded files from the PDF into a ZIP archive.
|
||||
extractAttachments.downloadButton=Extract Attachments
|
||||
|
||||
#merge
|
||||
merge.title=Merge
|
||||
merge.header=Merge multiple PDFs (2+)
|
||||
|
||||
@ -270,6 +270,9 @@
|
||||
<div
|
||||
th:replace="~{fragments/navbarEntry :: navbarEntry('add-attachments', 'attachment', 'home.attachments.title', 'home.attachments.desc', 'attachments.tags', 'other')}">
|
||||
</div>
|
||||
<div
|
||||
th:replace="~{fragments/navbarEntry :: navbarEntry('extract-attachments', 'folder_zip', 'home.extractAttachments.title', 'home.extractAttachments.desc', 'extractAttachments.tags', 'other')}">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="groupAdvanced" class="feature-group">
|
||||
|
||||
@ -293,6 +293,9 @@
|
||||
<div
|
||||
th:replace="~{fragments/navbarEntry :: navbarEntry('add-attachments', 'attachment', 'home.attachments.title', 'home.attachments.desc', 'attachments.tags', 'other')}">
|
||||
</div>
|
||||
<div
|
||||
th:replace="~{fragments/navbarEntry :: navbarEntry('extract-attachments', 'folder_zip', 'home.extractAttachments.title', 'home.extractAttachments.desc', 'extractAttachments.tags', 'other')}">
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -0,0 +1,52 @@
|
||||
<!DOCTYPE html>
|
||||
<html th:lang="${#locale.language}"
|
||||
th:dir="#{language.direction}"
|
||||
th:data-language="${#locale.toString()}"
|
||||
xmlns:th="https://www.thymeleaf.org">
|
||||
|
||||
<head>
|
||||
<th:block th:insert="~{fragments/common :: head(title=#{extractAttachments.title}, header=#{extractAttachments.header})}">
|
||||
</th:block>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div id="page-container">
|
||||
<div id="content-wrap">
|
||||
<th:block th:insert="~{fragments/navbar.html :: navbar}"></th:block>
|
||||
<br><br>
|
||||
<div class="container">
|
||||
<div class="row justify-content-center">
|
||||
<div class="col-md-6 bg-card">
|
||||
<div class="tool-header">
|
||||
<span class="material-symbols-rounded tool-header-icon other">folder_zip</span>
|
||||
<span class="tool-header-text"
|
||||
th:text="#{extractAttachments.header}"></span>
|
||||
</div>
|
||||
|
||||
<p class="tool-description"
|
||||
th:text="#{extractAttachments.description}"></p>
|
||||
|
||||
<form action="#"
|
||||
th:action="@{/api/v1/misc/extract-attachments}"
|
||||
method="post"
|
||||
enctype="multipart/form-data">
|
||||
<!-- PDF file selector -->
|
||||
<div
|
||||
th:replace="~{fragments/common :: fileSelector(name='fileInput', disableMultipleFiles=true, multipleInputsForSingleRequest=false, accept='application/pdf')}">
|
||||
</div>
|
||||
|
||||
<!-- Submit button -->
|
||||
<button type="submit"
|
||||
id="submitBtn"
|
||||
class="btn btn-primary"
|
||||
th:text="#{extractAttachments.downloadButton}">Extract Attachments</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
@ -7,11 +7,15 @@ import static org.mockito.Mockito.when;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
class AttachmentServiceTest {
|
||||
@ -105,4 +109,86 @@ class AttachmentServiceTest {
|
||||
assertNotNull(result.getDocumentCatalog().getNames());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractAttachments_SanitizesFilenamesAndExtractsData() throws IOException {
|
||||
attachmentService = new AttachmentService(1024 * 1024, 5 * 1024 * 1024);
|
||||
|
||||
try (var document = new PDDocument()) {
|
||||
var maliciousAttachment =
|
||||
new MockMultipartFile(
|
||||
"file",
|
||||
"..\\evil/../../tricky.txt",
|
||||
MediaType.TEXT_PLAIN_VALUE,
|
||||
"danger".getBytes());
|
||||
|
||||
attachmentService.addAttachment(document, List.of(maliciousAttachment));
|
||||
|
||||
Optional<byte[]> extracted = attachmentService.extractAttachments(document);
|
||||
assertTrue(extracted.isPresent());
|
||||
|
||||
try (var zipInputStream =
|
||||
new ZipInputStream(new ByteArrayInputStream(extracted.get()))) {
|
||||
ZipEntry entry = zipInputStream.getNextEntry();
|
||||
assertNotNull(entry);
|
||||
String sanitizedName = entry.getName();
|
||||
|
||||
assertFalse(sanitizedName.contains(".."));
|
||||
assertFalse(sanitizedName.contains("/"));
|
||||
assertFalse(sanitizedName.contains("\\"));
|
||||
|
||||
byte[] data = zipInputStream.readAllBytes();
|
||||
assertArrayEquals("danger".getBytes(), data);
|
||||
assertNull(zipInputStream.getNextEntry());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractAttachments_SkipsAttachmentsExceedingSizeLimit() throws IOException {
|
||||
attachmentService = new AttachmentService(4, 10);
|
||||
|
||||
try (var document = new PDDocument()) {
|
||||
var oversizedAttachment =
|
||||
new MockMultipartFile(
|
||||
"file",
|
||||
"large.bin",
|
||||
MediaType.APPLICATION_OCTET_STREAM_VALUE,
|
||||
"too big".getBytes());
|
||||
|
||||
attachmentService.addAttachment(document, List.of(oversizedAttachment));
|
||||
|
||||
Optional<byte[]> extracted = attachmentService.extractAttachments(document);
|
||||
assertTrue(extracted.isEmpty());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void extractAttachments_EnforcesTotalSizeLimit() throws IOException {
|
||||
attachmentService = new AttachmentService(10, 9);
|
||||
|
||||
try (var document = new PDDocument()) {
|
||||
var first =
|
||||
new MockMultipartFile(
|
||||
"file", "first.txt", MediaType.TEXT_PLAIN_VALUE, "12345".getBytes());
|
||||
var second =
|
||||
new MockMultipartFile(
|
||||
"file", "second.txt", MediaType.TEXT_PLAIN_VALUE, "67890".getBytes());
|
||||
|
||||
attachmentService.addAttachment(document, List.of(first, second));
|
||||
|
||||
Optional<byte[]> extracted = attachmentService.extractAttachments(document);
|
||||
assertTrue(extracted.isPresent());
|
||||
|
||||
try (var zipInputStream =
|
||||
new ZipInputStream(new ByteArrayInputStream(extracted.get()))) {
|
||||
ZipEntry firstEntry = zipInputStream.getNextEntry();
|
||||
assertNotNull(firstEntry);
|
||||
assertEquals("first.txt", firstEntry.getName());
|
||||
byte[] firstData = zipInputStream.readNBytes(5);
|
||||
assertArrayEquals("12345".getBytes(), firstData);
|
||||
assertNull(zipInputStream.getNextEntry());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user