From dd0e8543e1c5e3912dc5165e074bc40d98f12598 Mon Sep 17 00:00:00 2001 From: Ludy Date: Thu, 16 Oct 2025 23:36:33 +0200 Subject: [PATCH] fix(multitool): avoid duplicate resource embedding on export to prevent huge PDFs (#4684) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description of Changes **What was changed** - Reworked Multi-Tool export to **group page copies per source PDF** and reuse them during assembly. - Eliminates repeated embedding of identical resources (fonts, XObjects, images) that previously occurred when copying pages one-by-one. - Keeps original vector content whenever possible instead of rasterizing unchanged pages. - Minor internal refactor to reduce repeated `copyPages` calls and add pages from a pre-copied cache. **Why the change was made** - Users reported that exporting a PDF via Multi-Tool—without making any edits—could inflate file size by 5x+. - Root cause: exporting pages individually caused duplicated resources in the output document, dramatically increasing size. - Grouping and reusing copied pages avoids redundant embeddings and yields output sizes much closer to the original. Closes #4681 --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --- .../static/js/multitool/PdfContainer.js | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/app/core/src/main/resources/static/js/multitool/PdfContainer.js b/app/core/src/main/resources/static/js/multitool/PdfContainer.js index ade68f0d9..ce4706793 100644 --- a/app/core/src/main/resources/static/js/multitool/PdfContainer.js +++ b/app/core/src/main/resources/static/js/multitool/PdfContainer.js @@ -703,14 +703,40 @@ class PdfContainer { async exportPdf(selected) { const pdfDoc = await PDFLib.PDFDocument.create(); const pageContainers = this.pagesContainer.querySelectorAll('.page-container'); // Select all .page-container elements + + const docPageMap = new Map(); + + pageContainers.forEach((container, index) => { + if (selected && !window.selectedPages.includes(index + 1)) { + return; + } + + const img = container.querySelector('img'); + if (!img?.doc) { + return; + } + + let entry = docPageMap.get(img.doc); + if (!entry) { + entry = { indices: [], copiedPages: [], cursor: 0 }; + docPageMap.set(img.doc, entry); + } + + entry.indices.push(img.pageIdx); + }); + + for (const [doc, entry] of docPageMap.entries()) { + entry.copiedPages = await pdfDoc.copyPages(doc, entry.indices); + } + for (var i = 0; i < pageContainers.length; i++) { if (!selected || window.selectedPages.includes(i + 1)) { const img = pageContainers[i].querySelector('img'); // Find the img element within each .page-container if (!img) continue; let page; if (img.doc) { - const pages = await pdfDoc.copyPages(img.doc, [img.pageIdx]); - page = pages[0]; + const entry = docPageMap.get(img.doc); + page = entry.copiedPages[entry.cursor++]; pdfDoc.addPage(page); } else { page = pdfDoc.addPage([img.naturalWidth, img.naturalHeight]);