From dd0e8543e1c5e3912dc5165e074bc40d98f12598 Mon Sep 17 00:00:00 2001
From: Ludy <Ludy87@users.noreply.github.com>
Date: Thu, 16 Oct 2025 23:36:33 +0200
Subject: [PATCH] fix(multitool): avoid duplicate resource embedding on export
 to prevent huge PDFs (#4684)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# Description of Changes

**What was changed**
- Reworked Multi-Tool export to **group page copies per source PDF** and
reuse them during assembly.
- Eliminates repeated embedding of identical resources (fonts, XObjects,
images) that previously occurred when copying pages one-by-one.
- Keeps original vector content whenever possible instead of rasterizing
unchanged pages.
- Minor internal refactor to reduce repeated `copyPages` calls and add
pages from a pre-copied cache.

**Why the change was made**
- Users reported that exporting a PDF via Multi-Tool—without making any
edits—could inflate file size by 5x+.
- Root cause: exporting pages individually caused duplicated resources
in the output document, dramatically increasing size.
- Grouping and reusing copied pages avoids redundant embeddings and
yields output sizes much closer to the original.

Closes #4681

---

## Checklist

### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.
---
 .../static/js/multitool/PdfContainer.js       | 30 +++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/app/core/src/main/resources/static/js/multitool/PdfContainer.js b/app/core/src/main/resources/static/js/multitool/PdfContainer.js
index ade68f0d9..ce4706793 100644
--- a/app/core/src/main/resources/static/js/multitool/PdfContainer.js
+++ b/app/core/src/main/resources/static/js/multitool/PdfContainer.js
@@ -703,14 +703,40 @@ class PdfContainer {
   async exportPdf(selected) {
     const pdfDoc = await PDFLib.PDFDocument.create();
     const pageContainers = this.pagesContainer.querySelectorAll('.page-container'); // Select all .page-container elements
+
+    const docPageMap = new Map();
+
+    pageContainers.forEach((container, index) => {
+      if (selected && !window.selectedPages.includes(index + 1)) {
+        return;
+      }
+
+      const img = container.querySelector('img');
+      if (!img?.doc) {
+        return;
+      }
+
+      let entry = docPageMap.get(img.doc);
+      if (!entry) {
+        entry = { indices: [], copiedPages: [], cursor: 0 };
+        docPageMap.set(img.doc, entry);
+      }
+
+      entry.indices.push(img.pageIdx);
+    });
+
+    for (const [doc, entry] of docPageMap.entries()) {
+      entry.copiedPages = await pdfDoc.copyPages(doc, entry.indices);
+    }
+
     for (var i = 0; i < pageContainers.length; i++) {
       if (!selected || window.selectedPages.includes(i + 1)) {
         const img = pageContainers[i].querySelector('img'); // Find the img element within each .page-container
         if (!img) continue;
         let page;
         if (img.doc) {
-          const pages = await pdfDoc.copyPages(img.doc, [img.pageIdx]);
-          page = pages[0];
+          const entry = docPageMap.get(img.doc);
+          page = entry.copiedPages[entry.cursor++];
           pdfDoc.addPage(page);
         } else {
           page = pdfDoc.addPage([img.naturalWidth, img.naturalHeight]);