mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
feat(merge): use metadata for sortByDate (#4461)
# Description of Changes Follow-up for: https://github.com/Stirling-Tools/Stirling-PDF/issues/4233#issuecomment-3299735631 Please read thread there. ### Quick summary: - Paths/BasicFileAttributes for getting creaton/modifcation date ALWAYS resulted on 0 - Comporator, therefore always merged in the order it was handed to it - -\> when calling this was not an issue because front-end arranged the files on right sortByDate - -\> when calling from API however, due to this, it wasn't working ### Fix: - Rely on PDF/XMP metadata on the back-end for merging, while that is also not completely reliable still better Closes: #4233 <!-- Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --> --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [x] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
b661eb8ff0
commit
fd95876d8f
@ -2,9 +2,7 @@ package stirling.software.SPDF.controller.api;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
@ -13,17 +11,23 @@ import java.util.List;
|
||||
import org.apache.pdfbox.multipdf.PDFMergerUtility;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
|
||||
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.common.PDMetadata;
|
||||
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
|
||||
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
|
||||
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
|
||||
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
|
||||
import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;
|
||||
import org.apache.xmpbox.XMPMetadata;
|
||||
import org.apache.xmpbox.schema.XMPBasicSchema;
|
||||
import org.apache.xmpbox.xml.DomXmpParser;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody;
|
||||
@ -64,49 +68,71 @@ public class MergeController {
|
||||
return mergedDoc;
|
||||
}
|
||||
|
||||
// Re-order files to match the explicit order provided by the front-end.
|
||||
// fileOrder is newline-delimited original filenames in the desired order.
|
||||
private static MultipartFile[] reorderFilesByProvidedOrder(
|
||||
MultipartFile[] files, String fileOrder) {
|
||||
String[] desired = fileOrder.split("\n", -1);
|
||||
List<MultipartFile> remaining = new ArrayList<>(Arrays.asList(files));
|
||||
List<MultipartFile> ordered = new ArrayList<>(files.length);
|
||||
|
||||
for (String name : desired) {
|
||||
if (name == null || name.isEmpty()) continue;
|
||||
int idx = indexOfByOriginalFilename(remaining, name);
|
||||
if (idx >= 0) {
|
||||
ordered.add(remaining.remove(idx));
|
||||
}
|
||||
}
|
||||
|
||||
// Append any files not explicitly listed, preserving their relative order
|
||||
ordered.addAll(remaining);
|
||||
return ordered.toArray(new MultipartFile[0]);
|
||||
}
|
||||
|
||||
// Returns a comparator for sorting MultipartFile arrays based on the given sort type
|
||||
private Comparator<MultipartFile> getSortComparator(String sortType) {
|
||||
return switch (sortType) {
|
||||
case "byFileName" -> Comparator.comparing(MultipartFile::getOriginalFilename);
|
||||
case "byFileName" ->
|
||||
Comparator.comparing(
|
||||
(MultipartFile mf) -> {
|
||||
String name = mf.getOriginalFilename();
|
||||
return name == null ? "" : name;
|
||||
},
|
||||
String.CASE_INSENSITIVE_ORDER);
|
||||
case "byDateModified" ->
|
||||
(file1, file2) -> {
|
||||
try {
|
||||
BasicFileAttributes attr1 =
|
||||
Files.readAttributes(
|
||||
Paths.get(file1.getOriginalFilename()),
|
||||
BasicFileAttributes.class);
|
||||
BasicFileAttributes attr2 =
|
||||
Files.readAttributes(
|
||||
Paths.get(file2.getOriginalFilename()),
|
||||
BasicFileAttributes.class);
|
||||
return attr1.lastModifiedTime().compareTo(attr2.lastModifiedTime());
|
||||
} catch (IOException e) {
|
||||
return 0; // If there's an error, treat them as equal
|
||||
}
|
||||
long t1 = getPdfDateTimeSafe(file1);
|
||||
long t2 = getPdfDateTimeSafe(file2);
|
||||
return Long.compare(t2, t1);
|
||||
};
|
||||
case "byDateCreated" ->
|
||||
(file1, file2) -> {
|
||||
try {
|
||||
BasicFileAttributes attr1 =
|
||||
Files.readAttributes(
|
||||
Paths.get(file1.getOriginalFilename()),
|
||||
BasicFileAttributes.class);
|
||||
BasicFileAttributes attr2 =
|
||||
Files.readAttributes(
|
||||
Paths.get(file2.getOriginalFilename()),
|
||||
BasicFileAttributes.class);
|
||||
return attr1.creationTime().compareTo(attr2.creationTime());
|
||||
} catch (IOException e) {
|
||||
return 0; // If there's an error, treat them as equal
|
||||
}
|
||||
long t1 = getPdfDateTimeSafe(file1);
|
||||
long t2 = getPdfDateTimeSafe(file2);
|
||||
return Long.compare(t2, t1);
|
||||
};
|
||||
case "byPDFTitle" ->
|
||||
(file1, file2) -> {
|
||||
try (PDDocument doc1 = pdfDocumentFactory.load(file1);
|
||||
PDDocument doc2 = pdfDocumentFactory.load(file2)) {
|
||||
String title1 = doc1.getDocumentInformation().getTitle();
|
||||
String title2 = doc2.getDocumentInformation().getTitle();
|
||||
return title1.compareTo(title2);
|
||||
String title1 =
|
||||
doc1.getDocumentInformation() != null
|
||||
? doc1.getDocumentInformation().getTitle()
|
||||
: null;
|
||||
String title2 =
|
||||
doc2.getDocumentInformation() != null
|
||||
? doc2.getDocumentInformation().getTitle()
|
||||
: null;
|
||||
if (title1 == null && title2 == null) {
|
||||
return 0;
|
||||
}
|
||||
if (title1 == null) {
|
||||
return 1;
|
||||
}
|
||||
if (title2 == null) {
|
||||
return -1;
|
||||
}
|
||||
return title1.compareToIgnoreCase(title2);
|
||||
} catch (IOException e) {
|
||||
return 0;
|
||||
}
|
||||
@ -153,6 +179,55 @@ public class MergeController {
|
||||
}
|
||||
}
|
||||
|
||||
private long getPdfDateTimeSafe(MultipartFile file) {
|
||||
try {
|
||||
try (PDDocument doc = pdfDocumentFactory.load(file)) {
|
||||
PDDocumentInformation info = doc.getDocumentInformation();
|
||||
if (info != null) {
|
||||
if (info.getModificationDate() != null) {
|
||||
return info.getModificationDate().getTimeInMillis();
|
||||
}
|
||||
if (info.getCreationDate() != null) {
|
||||
return info.getCreationDate().getTimeInMillis();
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to XMP metadata if Info dates are missing
|
||||
PDMetadata metadata = doc.getDocumentCatalog().getMetadata();
|
||||
if (metadata != null) {
|
||||
try (InputStream is = metadata.createInputStream()) {
|
||||
DomXmpParser parser = new DomXmpParser();
|
||||
XMPMetadata xmp = parser.parse(is);
|
||||
XMPBasicSchema basic = xmp.getXMPBasicSchema();
|
||||
if (basic != null) {
|
||||
if (basic.getModifyDate() != null) {
|
||||
return basic.getModifyDate().getTimeInMillis();
|
||||
}
|
||||
if (basic.getCreateDate() != null) {
|
||||
return basic.getCreateDate().getTimeInMillis();
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.debug(
|
||||
"Unable to read XMP metadata dates from uploaded file: {}",
|
||||
e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.debug("Unable to read PDF dates from uploaded file: {}", e.getMessage());
|
||||
}
|
||||
return 0L;
|
||||
}
|
||||
|
||||
private static int indexOfByOriginalFilename(List<MultipartFile> list, String name) {
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
MultipartFile f = list.get(i);
|
||||
if (name.equals(f.getOriginalFilename())) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/merge-pdfs")
|
||||
@Operation(
|
||||
summary = "Merge multiple PDF files into one",
|
||||
@ -160,22 +235,34 @@ public class MergeController {
|
||||
"This endpoint merges multiple PDF files into a single PDF file. The merged"
|
||||
+ " file will contain all pages from the input files in the order they were"
|
||||
+ " provided. Input:PDF Output:PDF Type:MISO")
|
||||
public ResponseEntity<StreamingResponseBody> mergePdfs(@ModelAttribute MergePdfsRequest request)
|
||||
public ResponseEntity<StreamingResponseBody> mergePdfs(
|
||||
@ModelAttribute MergePdfsRequest request,
|
||||
@RequestParam(value = "fileOrder", required = false) String fileOrder)
|
||||
throws IOException {
|
||||
List<File> filesToDelete = new ArrayList<>(); // List of temporary files to delete
|
||||
TempFile mergedTempFile = null;
|
||||
TempFile outputTempFile = null;
|
||||
PDDocument mergedDocument = null;
|
||||
|
||||
boolean removeCertSign = Boolean.TRUE.equals(request.getRemoveCertSign());
|
||||
boolean generateToc = request.isGenerateToc();
|
||||
|
||||
try {
|
||||
MultipartFile[] files = request.getFileInput();
|
||||
MultipartFile[] files = request.getFileInput();
|
||||
if (files == null) {
|
||||
files = new MultipartFile[0];
|
||||
}
|
||||
|
||||
// If front-end provided explicit visible order, honor it and override backend sorting
|
||||
if (fileOrder != null && !fileOrder.isBlank()) {
|
||||
files = reorderFilesByProvidedOrder(files, fileOrder);
|
||||
} else {
|
||||
Arrays.sort(
|
||||
files,
|
||||
getSortComparator(
|
||||
request.getSortType())); // Sort files based on the given sort type
|
||||
request.getSortType())); // Sort files based on requested sort type
|
||||
}
|
||||
|
||||
ResponseEntity<StreamingResponseBody> response;
|
||||
|
||||
try (TempFile mt = new TempFile(tempFileManager, ".pdf")) {
|
||||
|
||||
PDFMergerUtility mergerUtility = new PDFMergerUtility();
|
||||
long totalSize = 0;
|
||||
@ -188,8 +275,7 @@ public class MergeController {
|
||||
mergerUtility.addSource(tempFile); // Add source file to the merger utility
|
||||
}
|
||||
|
||||
mergedTempFile = new TempFile(tempFileManager, ".pdf");
|
||||
mergerUtility.setDestinationFileName(mergedTempFile.getFile().getAbsolutePath());
|
||||
mergerUtility.setDestinationFileName(mt.getFile().getAbsolutePath());
|
||||
|
||||
try {
|
||||
mergerUtility.mergeDocuments(
|
||||
@ -203,42 +289,35 @@ public class MergeController {
|
||||
throw e;
|
||||
}
|
||||
|
||||
// Load the merged PDF document
|
||||
mergedDocument = pdfDocumentFactory.load(mergedTempFile.getFile());
|
||||
// Load the merged PDF document and operate on it inside try-with-resources
|
||||
try (PDDocument mergedDocument = pdfDocumentFactory.load(mt.getFile())) {
|
||||
// Remove signatures if removeCertSign is true
|
||||
if (removeCertSign) {
|
||||
PDDocumentCatalog catalog = mergedDocument.getDocumentCatalog();
|
||||
PDAcroForm acroForm = catalog.getAcroForm();
|
||||
if (acroForm != null) {
|
||||
List<PDField> fieldsToRemove =
|
||||
acroForm.getFields().stream()
|
||||
.filter(PDSignatureField.class::isInstance)
|
||||
.toList();
|
||||
|
||||
// Remove signatures if removeCertSign is true
|
||||
if (removeCertSign) {
|
||||
PDDocumentCatalog catalog = mergedDocument.getDocumentCatalog();
|
||||
PDAcroForm acroForm = catalog.getAcroForm();
|
||||
if (acroForm != null) {
|
||||
List<PDField> fieldsToRemove =
|
||||
acroForm.getFields().stream()
|
||||
.filter(PDSignatureField.class::isInstance)
|
||||
.toList();
|
||||
|
||||
if (!fieldsToRemove.isEmpty()) {
|
||||
acroForm.flatten(
|
||||
fieldsToRemove,
|
||||
false); // Flatten the fields, effectively removing them
|
||||
if (!fieldsToRemove.isEmpty()) {
|
||||
acroForm.flatten(
|
||||
fieldsToRemove,
|
||||
false); // Flatten the fields, effectively removing them
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add table of contents if generateToc is true
|
||||
if (generateToc && files.length > 0) {
|
||||
addTableOfContents(mergedDocument, files);
|
||||
}
|
||||
|
||||
// Save the modified document to a temporary file
|
||||
outputTempFile = new TempFile(tempFileManager, ".pdf");
|
||||
mergedDocument.save(outputTempFile.getFile());
|
||||
}
|
||||
|
||||
// Add table of contents if generateToc is true
|
||||
if (generateToc && files.length > 0) {
|
||||
addTableOfContents(mergedDocument, files);
|
||||
}
|
||||
|
||||
// Save the modified document to a temporary file
|
||||
outputTempFile = new TempFile(tempFileManager, ".pdf");
|
||||
mergedDocument.save(outputTempFile.getFile());
|
||||
|
||||
String mergedFileName =
|
||||
GeneralUtils.generateFilename(
|
||||
files[0].getOriginalFilename(), "_merged_unsigned.pdf");
|
||||
return WebResponseUtils.pdfFileToWebResponse(
|
||||
outputTempFile, mergedFileName); // Return the modified PDF
|
||||
|
||||
} catch (Exception ex) {
|
||||
if (ex instanceof IOException && PdfErrorUtils.isCorruptedPdfError((IOException) ex)) {
|
||||
log.warn("Corrupted PDF detected in merge pdf process: {}", ex.getMessage());
|
||||
@ -247,15 +326,16 @@ public class MergeController {
|
||||
}
|
||||
throw ex;
|
||||
} finally {
|
||||
if (mergedDocument != null) {
|
||||
mergedDocument.close(); // Close the merged document
|
||||
}
|
||||
for (File file : filesToDelete) {
|
||||
tempFileManager.deleteTempFile(file); // Delete temporary files
|
||||
}
|
||||
if (mergedTempFile != null) {
|
||||
mergedTempFile.close();
|
||||
}
|
||||
}
|
||||
|
||||
String firstFilename = files.length > 0 ? files[0].getOriginalFilename() : null;
|
||||
String mergedFileName =
|
||||
GeneralUtils.generateFilename(firstFilename, "_merged_unsigned.pdf");
|
||||
|
||||
response = WebResponseUtils.pdfFileToWebResponse(outputTempFile, mergedFileName);
|
||||
return response;
|
||||
}
|
||||
}
|
||||
|
||||
@ -175,6 +175,13 @@ function updateFiles() {
|
||||
}
|
||||
}
|
||||
document.getElementById("fileInput-input").files = dataTransfer.files;
|
||||
|
||||
// Also populate hidden fileOrder to preserve visible order
|
||||
const order = Array.from(liElements)
|
||||
.map((li) => li.querySelector(".filename").innerText)
|
||||
.join("\n");
|
||||
const orderInput = document.getElementById("fileOrder");
|
||||
if (orderInput) orderInput.value = order;
|
||||
}
|
||||
|
||||
document.querySelector("#resetFileInputBtn").addEventListener("click", ()=>{
|
||||
|
||||
@ -21,6 +21,7 @@
|
||||
<span class="tool-header-text" th:text="#{merge.header}"></span>
|
||||
</div>
|
||||
<form th:action="@{'api/v1/general/merge-pdfs'}" method="post" enctype="multipart/form-data">
|
||||
<input id="fileOrder" name="fileOrder" type="hidden" />
|
||||
<div class="mb-3">
|
||||
<label th:text="#{multiPdfDropPrompt}" for="fileInput-input"></label>
|
||||
<div
|
||||
|
||||
Loading…
Reference in New Issue
Block a user