feat(merge): use metadata for sortByDate (#4461)

# Description of Changes

Follow-up for:
https://github.com/Stirling-Tools/Stirling-PDF/issues/4233#issuecomment-3299735631

Please read thread there.

### Quick summary:
- Paths/BasicFileAttributes for getting creaton/modifcation date ALWAYS
resulted on 0
- Comporator, therefore always merged in the order it was handed to it
- -\> when calling this was not an issue because front-end arranged the
files on right sortByDate
- -\> when calling from API however, due to this, it wasn't working

### Fix:
- Rely on PDF/XMP metadata on the back-end for merging, while that is
also not completely reliable still better

Closes: #4233
<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [x] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-10-04 12:07:02 +02:00 committed by GitHub
parent b661eb8ff0
commit fd95876d8f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 167 additions and 79 deletions

View File

@ -2,9 +2,7 @@ package stirling.software.SPDF.controller.api;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.attribute.BasicFileAttributes;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
@ -13,17 +11,23 @@ import java.util.List;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.XMPBasicSchema;
import org.apache.xmpbox.xml.DomXmpParser;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody;
@ -64,49 +68,71 @@ public class MergeController {
return mergedDoc;
}
// Re-order files to match the explicit order provided by the front-end.
// fileOrder is newline-delimited original filenames in the desired order.
private static MultipartFile[] reorderFilesByProvidedOrder(
MultipartFile[] files, String fileOrder) {
String[] desired = fileOrder.split("\n", -1);
List<MultipartFile> remaining = new ArrayList<>(Arrays.asList(files));
List<MultipartFile> ordered = new ArrayList<>(files.length);
for (String name : desired) {
if (name == null || name.isEmpty()) continue;
int idx = indexOfByOriginalFilename(remaining, name);
if (idx >= 0) {
ordered.add(remaining.remove(idx));
}
}
// Append any files not explicitly listed, preserving their relative order
ordered.addAll(remaining);
return ordered.toArray(new MultipartFile[0]);
}
// Returns a comparator for sorting MultipartFile arrays based on the given sort type
private Comparator<MultipartFile> getSortComparator(String sortType) {
return switch (sortType) {
case "byFileName" -> Comparator.comparing(MultipartFile::getOriginalFilename);
case "byFileName" ->
Comparator.comparing(
(MultipartFile mf) -> {
String name = mf.getOriginalFilename();
return name == null ? "" : name;
},
String.CASE_INSENSITIVE_ORDER);
case "byDateModified" ->
(file1, file2) -> {
try {
BasicFileAttributes attr1 =
Files.readAttributes(
Paths.get(file1.getOriginalFilename()),
BasicFileAttributes.class);
BasicFileAttributes attr2 =
Files.readAttributes(
Paths.get(file2.getOriginalFilename()),
BasicFileAttributes.class);
return attr1.lastModifiedTime().compareTo(attr2.lastModifiedTime());
} catch (IOException e) {
return 0; // If there's an error, treat them as equal
}
long t1 = getPdfDateTimeSafe(file1);
long t2 = getPdfDateTimeSafe(file2);
return Long.compare(t2, t1);
};
case "byDateCreated" ->
(file1, file2) -> {
try {
BasicFileAttributes attr1 =
Files.readAttributes(
Paths.get(file1.getOriginalFilename()),
BasicFileAttributes.class);
BasicFileAttributes attr2 =
Files.readAttributes(
Paths.get(file2.getOriginalFilename()),
BasicFileAttributes.class);
return attr1.creationTime().compareTo(attr2.creationTime());
} catch (IOException e) {
return 0; // If there's an error, treat them as equal
}
long t1 = getPdfDateTimeSafe(file1);
long t2 = getPdfDateTimeSafe(file2);
return Long.compare(t2, t1);
};
case "byPDFTitle" ->
(file1, file2) -> {
try (PDDocument doc1 = pdfDocumentFactory.load(file1);
PDDocument doc2 = pdfDocumentFactory.load(file2)) {
String title1 = doc1.getDocumentInformation().getTitle();
String title2 = doc2.getDocumentInformation().getTitle();
return title1.compareTo(title2);
String title1 =
doc1.getDocumentInformation() != null
? doc1.getDocumentInformation().getTitle()
: null;
String title2 =
doc2.getDocumentInformation() != null
? doc2.getDocumentInformation().getTitle()
: null;
if (title1 == null && title2 == null) {
return 0;
}
if (title1 == null) {
return 1;
}
if (title2 == null) {
return -1;
}
return title1.compareToIgnoreCase(title2);
} catch (IOException e) {
return 0;
}
@ -153,6 +179,55 @@ public class MergeController {
}
}
private long getPdfDateTimeSafe(MultipartFile file) {
try {
try (PDDocument doc = pdfDocumentFactory.load(file)) {
PDDocumentInformation info = doc.getDocumentInformation();
if (info != null) {
if (info.getModificationDate() != null) {
return info.getModificationDate().getTimeInMillis();
}
if (info.getCreationDate() != null) {
return info.getCreationDate().getTimeInMillis();
}
}
// Fallback to XMP metadata if Info dates are missing
PDMetadata metadata = doc.getDocumentCatalog().getMetadata();
if (metadata != null) {
try (InputStream is = metadata.createInputStream()) {
DomXmpParser parser = new DomXmpParser();
XMPMetadata xmp = parser.parse(is);
XMPBasicSchema basic = xmp.getXMPBasicSchema();
if (basic != null) {
if (basic.getModifyDate() != null) {
return basic.getModifyDate().getTimeInMillis();
}
if (basic.getCreateDate() != null) {
return basic.getCreateDate().getTimeInMillis();
}
}
} catch (Exception e) {
log.debug(
"Unable to read XMP metadata dates from uploaded file: {}",
e.getMessage());
}
}
}
} catch (IOException e) {
log.debug("Unable to read PDF dates from uploaded file: {}", e.getMessage());
}
return 0L;
}
private static int indexOfByOriginalFilename(List<MultipartFile> list, String name) {
for (int i = 0; i < list.size(); i++) {
MultipartFile f = list.get(i);
if (name.equals(f.getOriginalFilename())) return i;
}
return -1;
}
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/merge-pdfs")
@Operation(
summary = "Merge multiple PDF files into one",
@ -160,22 +235,34 @@ public class MergeController {
"This endpoint merges multiple PDF files into a single PDF file. The merged"
+ " file will contain all pages from the input files in the order they were"
+ " provided. Input:PDF Output:PDF Type:MISO")
public ResponseEntity<StreamingResponseBody> mergePdfs(@ModelAttribute MergePdfsRequest request)
public ResponseEntity<StreamingResponseBody> mergePdfs(
@ModelAttribute MergePdfsRequest request,
@RequestParam(value = "fileOrder", required = false) String fileOrder)
throws IOException {
List<File> filesToDelete = new ArrayList<>(); // List of temporary files to delete
TempFile mergedTempFile = null;
TempFile outputTempFile = null;
PDDocument mergedDocument = null;
boolean removeCertSign = Boolean.TRUE.equals(request.getRemoveCertSign());
boolean generateToc = request.isGenerateToc();
try {
MultipartFile[] files = request.getFileInput();
MultipartFile[] files = request.getFileInput();
if (files == null) {
files = new MultipartFile[0];
}
// If front-end provided explicit visible order, honor it and override backend sorting
if (fileOrder != null && !fileOrder.isBlank()) {
files = reorderFilesByProvidedOrder(files, fileOrder);
} else {
Arrays.sort(
files,
getSortComparator(
request.getSortType())); // Sort files based on the given sort type
request.getSortType())); // Sort files based on requested sort type
}
ResponseEntity<StreamingResponseBody> response;
try (TempFile mt = new TempFile(tempFileManager, ".pdf")) {
PDFMergerUtility mergerUtility = new PDFMergerUtility();
long totalSize = 0;
@ -188,8 +275,7 @@ public class MergeController {
mergerUtility.addSource(tempFile); // Add source file to the merger utility
}
mergedTempFile = new TempFile(tempFileManager, ".pdf");
mergerUtility.setDestinationFileName(mergedTempFile.getFile().getAbsolutePath());
mergerUtility.setDestinationFileName(mt.getFile().getAbsolutePath());
try {
mergerUtility.mergeDocuments(
@ -203,42 +289,35 @@ public class MergeController {
throw e;
}
// Load the merged PDF document
mergedDocument = pdfDocumentFactory.load(mergedTempFile.getFile());
// Load the merged PDF document and operate on it inside try-with-resources
try (PDDocument mergedDocument = pdfDocumentFactory.load(mt.getFile())) {
// Remove signatures if removeCertSign is true
if (removeCertSign) {
PDDocumentCatalog catalog = mergedDocument.getDocumentCatalog();
PDAcroForm acroForm = catalog.getAcroForm();
if (acroForm != null) {
List<PDField> fieldsToRemove =
acroForm.getFields().stream()
.filter(PDSignatureField.class::isInstance)
.toList();
// Remove signatures if removeCertSign is true
if (removeCertSign) {
PDDocumentCatalog catalog = mergedDocument.getDocumentCatalog();
PDAcroForm acroForm = catalog.getAcroForm();
if (acroForm != null) {
List<PDField> fieldsToRemove =
acroForm.getFields().stream()
.filter(PDSignatureField.class::isInstance)
.toList();
if (!fieldsToRemove.isEmpty()) {
acroForm.flatten(
fieldsToRemove,
false); // Flatten the fields, effectively removing them
if (!fieldsToRemove.isEmpty()) {
acroForm.flatten(
fieldsToRemove,
false); // Flatten the fields, effectively removing them
}
}
}
// Add table of contents if generateToc is true
if (generateToc && files.length > 0) {
addTableOfContents(mergedDocument, files);
}
// Save the modified document to a temporary file
outputTempFile = new TempFile(tempFileManager, ".pdf");
mergedDocument.save(outputTempFile.getFile());
}
// Add table of contents if generateToc is true
if (generateToc && files.length > 0) {
addTableOfContents(mergedDocument, files);
}
// Save the modified document to a temporary file
outputTempFile = new TempFile(tempFileManager, ".pdf");
mergedDocument.save(outputTempFile.getFile());
String mergedFileName =
GeneralUtils.generateFilename(
files[0].getOriginalFilename(), "_merged_unsigned.pdf");
return WebResponseUtils.pdfFileToWebResponse(
outputTempFile, mergedFileName); // Return the modified PDF
} catch (Exception ex) {
if (ex instanceof IOException && PdfErrorUtils.isCorruptedPdfError((IOException) ex)) {
log.warn("Corrupted PDF detected in merge pdf process: {}", ex.getMessage());
@ -247,15 +326,16 @@ public class MergeController {
}
throw ex;
} finally {
if (mergedDocument != null) {
mergedDocument.close(); // Close the merged document
}
for (File file : filesToDelete) {
tempFileManager.deleteTempFile(file); // Delete temporary files
}
if (mergedTempFile != null) {
mergedTempFile.close();
}
}
String firstFilename = files.length > 0 ? files[0].getOriginalFilename() : null;
String mergedFileName =
GeneralUtils.generateFilename(firstFilename, "_merged_unsigned.pdf");
response = WebResponseUtils.pdfFileToWebResponse(outputTempFile, mergedFileName);
return response;
}
}

View File

@ -175,6 +175,13 @@ function updateFiles() {
}
}
document.getElementById("fileInput-input").files = dataTransfer.files;
// Also populate hidden fileOrder to preserve visible order
const order = Array.from(liElements)
.map((li) => li.querySelector(".filename").innerText)
.join("\n");
const orderInput = document.getElementById("fileOrder");
if (orderInput) orderInput.value = order;
}
document.querySelector("#resetFileInputBtn").addEventListener("click", ()=>{

View File

@ -21,6 +21,7 @@
<span class="tool-header-text" th:text="#{merge.header}"></span>
</div>
<form th:action="@{'api/v1/general/merge-pdfs'}" method="post" enctype="multipart/form-data">
<input id="fileOrder" name="fileOrder" type="hidden" />
<div class="mb-3">
<label th:text="#{multiPdfDropPrompt}" for="fileInput-input"></label>
<div