From fddbec2408595bf70ddd51fd38101caaaa694293 Mon Sep 17 00:00:00 2001
From: Felix Kaspar <ich@felixkaspar.com>
Date: Tue, 17 Oct 2023 03:40:54 +0200
Subject: [PATCH] extract, rotate, split

---
 public/exampleWorkflows.js       | 48 +++++++++++++++++++--
 public/functions/extractPages.js | 25 +++++++++++
 public/functions/rotatePDF.js    | 18 ++++++++
 public/functions/splitPDF.js     | 26 ++++++++++++
 public/index.js                  |  2 +-
 public/traverseOperations.js     | 71 ++++++++++++++++++++++++++++++++
 6 files changed, 185 insertions(+), 5 deletions(-)
 create mode 100644 public/functions/extractPages.js
 create mode 100644 public/functions/rotatePDF.js
 create mode 100644 public/functions/splitPDF.js

diff --git a/public/exampleWorkflows.js b/public/exampleWorkflows.js
index 61563483..1d84885e 100644
--- a/public/exampleWorkflows.js
+++ b/public/exampleWorkflows.js
@@ -3,8 +3,7 @@
 // https://cdn.discordapp.com/attachments/1099390571493195898/1118192753759764520/image.png?ex=6537dba7&is=652566a7&hm=dc46820ef7c34bc37424794966c5f66f93ba0e15a740742c364d47d31ea119a9&
 export const discordWorkflow = {
     outputOptions: {
-        zip: false,
-        awaitAllDone: true
+        zip: false
     },
     operations: [
         {
@@ -72,8 +71,7 @@ export const discordWorkflow = {
 // This will merge all input files into one giant document
 export const mergeOnly = {
     outputOptions: {
-        zip: false,
-        awaitAllDone: true
+        zip: false
     },
     operations: [
         {
@@ -82,4 +80,46 @@ export const mergeOnly = {
             operations: []
         }
     ]
+}
+
+// Extract Pages and store them in a new document
+export const extractOnly = {
+    outputOptions: {
+        zip: false
+    },
+    operations: [
+        {
+            type: "extract",
+            values: { "pagesToExtractArray": [0, 2] },
+            operations: []
+        }
+    ]
+}
+
+// Split a document up into multiple documents
+export const splitOnly = {
+    outputOptions: {
+        zip: false
+    },
+    operations: [
+        {
+            type: "split",
+            values: { "pagesToSplitAfterArray": [2, 10] },
+            operations: []
+        }
+    ]
+}
+
+// Split a document up into multiple documents
+export const rotateOnly = {
+    outputOptions: {
+        zip: false
+    },
+    operations: [
+        {
+            type: "rotate",
+            values: { "rotation": -90 },
+            operations: []
+        }
+    ]
 }
\ No newline at end of file
diff --git a/public/functions/extractPages.js b/public/functions/extractPages.js
new file mode 100644
index 00000000..a160335c
--- /dev/null
+++ b/public/functions/extractPages.js
@@ -0,0 +1,25 @@
+const { PDFDocument, ParseSpeeds } = PDFLib;
+
+export const extractPages = async (snapshot, pagesToExtractArray) => {
+    const pdfDoc = await PDFDocument.load(snapshot)
+
+    // TODO: invent a better format for pagesToExtractArray and convert it.
+    return createSubDocument(pdfDoc, pagesToExtractArray);
+};
+
+export async function createSubDocument(pdfDoc, pagesToExtractArray) {
+    const subDocument = await PDFDocument.create();
+
+    // Check that array max number is not larger pdf pages number
+    if(Math.max(...pagesToExtractArray) >= pdfDoc.getPageCount()) {
+        throw new Error(`The PDF document only has ${pdfDoc.getPageCount()} pages and you tried to extract page ${Math.max(...pagesToExtractArray)}`);
+    }
+
+    const copiedPages = await subDocument.copyPages(pdfDoc, pagesToExtractArray);
+
+    for (let i = 0; i < copiedPages.length; i++) {
+        subDocument.addPage(copiedPages[i]);
+    }
+
+    return subDocument.save();
+}
\ No newline at end of file
diff --git a/public/functions/rotatePDF.js b/public/functions/rotatePDF.js
new file mode 100644
index 00000000..5cb6cb74
--- /dev/null
+++ b/public/functions/rotatePDF.js
@@ -0,0 +1,18 @@
+const { PDFDocument, ParseSpeeds, degrees } = PDFLib;
+
+export const rotatePages = async (snapshot, rotation) => {
+    // Load the original PDF file
+    const pdfDoc = await PDFDocument.load(snapshot, {
+        parseSpeed: ParseSpeeds.Fastest,
+    });
+
+    const pages = pdfDoc.getPages();
+
+    pages.forEach(page => {
+        // Change page size
+        page.setRotation(degrees(rotation))
+    });
+
+    // Serialize the modified document
+    return pdfDoc.save();
+};
\ No newline at end of file
diff --git a/public/functions/splitPDF.js b/public/functions/splitPDF.js
new file mode 100644
index 00000000..f2ec0f88
--- /dev/null
+++ b/public/functions/splitPDF.js
@@ -0,0 +1,26 @@
+import { createSubDocument } from "./extractPages.js";
+
+const { PDFDocument, ParseSpeeds } = PDFLib;
+
+export const splitPDF = async (snapshot, splitAfterPageArray) => {
+    const pdfDoc = await PDFDocument.load(snapshot)
+
+    const numberOfPages = pdfDoc.getPages().length;
+
+    let pagesArray = [];
+    let splitAfter = splitAfterPageArray.shift();
+    const subDocuments = [];
+
+    for (let i = 0; i < numberOfPages; i++) {
+        if(i > splitAfter && pagesArray.length > 0) {
+            subDocuments.push(await createSubDocument(pdfDoc, pagesArray));
+            splitAfter = splitAfterPageArray.shift();
+            pagesArray = [];
+        }
+        pagesArray.push(i);        
+    }
+    subDocuments.push(await createSubDocument(pdfDoc, pagesArray));
+    pagesArray = [];
+
+    return subDocuments;
+};
\ No newline at end of file
diff --git a/public/index.js b/public/index.js
index d109efd2..c4a68ab6 100644
--- a/public/index.js
+++ b/public/index.js
@@ -55,4 +55,4 @@ import { traverseOperations } from "./traverseOperations.js";
         //     }
         // }
     });
-})(exampleWorkflows.mergeOnly);
+})(exampleWorkflows.rotateOnly);
diff --git a/public/traverseOperations.js b/public/traverseOperations.js
index b02edf42..e664a3a7 100644
--- a/public/traverseOperations.js
+++ b/public/traverseOperations.js
@@ -1,4 +1,7 @@
+import { extractPages } from "./functions/extractPages.js";
 import { mergePDFs } from "./functions/mergePDFs.js";
+import { rotatePages } from "./functions/rotatePDF.js";
+import { splitPDF } from "./functions/splitPDF.js";
 import { organizeWaitOperations } from "./organizeWaitOperations.js";
 
 export async function traverseOperations(operations, input) {
@@ -34,6 +37,8 @@ export async function traverseOperations(operations, input) {
                 }
                 break;
             case "removeObjects":
+                console.warn("RemoveObjects not implemented yet.")
+
                 if(Array.isArray(input)) {
                     for (let i = 0; i < input.length; i++) {
                         // TODO: modfiy input
@@ -52,16 +57,47 @@ export async function traverseOperations(operations, input) {
                     for (let i = 0; i < input.length; i++) {
                         // TODO: modfiy input
                         input[i].fileName += "_extractedPages";
+                        input[i].buffer = await extractPages(input[i].buffer, operation.values["pagesToExtractArray"]);
                         await nextOperation(operation.operations, input[i]);
                     }
                 }
                 else {
                     // TODO: modfiy input
                     input.fileName += "_extractedPages";
+                    input.buffer = await extractPages(input.buffer, operation.values["pagesToExtractArray"]);
                     await nextOperation(operation.operations, input);
                 }
                 break;
+            case "split":
+                // TODO: When a split goes into a wait function it might break the done condition, as it will count multiplpe times.
+                if(Array.isArray(input)) {
+                    for (let i = 0; i < input.length; i++) {
+                        const splits = await splitPDF(input[i].buffer, operation.values["pagesToSplitAfterArray"]);
+
+                        for (let j = 0; j < splits.length; j++) {
+                            const split = {};
+                            split.originalFileName = input[i].originalFileName;
+                            split.fileName = input[i].fileName + "_split";
+                            split.buffer = splits[j];
+                            await nextOperation(operation.operations, split);
+                        }
+                    }
+                }
+                else {
+                    const splits = await splitPDF(input.buffer, operation.values["pagesToSplitAfterArray"]);
+
+                    for (let j = 0; j < splits.length; j++) {
+                        const split = {};
+                        split.originalFileName = input.originalFileName;
+                        split.fileName = input.fileName + "_split";
+                        split.buffer = splits[j];
+                        await nextOperation(operation.operations, split);
+                    }
+                }
+                break;
             case "fillField":
+                console.warn("FillField not implemented yet.")
+
                 if(Array.isArray(input)) {
                     for (let i = 0; i < input.length; i++) {
                         // TODO: modfiy input
@@ -76,6 +112,8 @@ export async function traverseOperations(operations, input) {
                 }
                 break;
             case "extractImages":
+                console.warn("ExtractImages not implemented yet.")
+
                 if(Array.isArray(input)) {
                     for (let i = 0; i < input.length; i++) {
                         // TODO: modfiy input
@@ -105,6 +143,7 @@ export async function traverseOperations(operations, input) {
                 await nextOperation(operation.operations, input);
                 break;
             case "transform": {
+                console.warn("Transform not implemented yet.")
                 if(Array.isArray(input)) {
                     for (let i = 0; i < input.length; i++) {
                         // TODO: modfiy input
@@ -119,6 +158,38 @@ export async function traverseOperations(operations, input) {
                 }
                 break;
             }
+            case "extract":
+                if(Array.isArray(input)) {
+                    for (let i = 0; i < input.length; i++) {
+                        // TODO: modfiy input
+                        input[i].fileName += "_extractedPages";
+                        input[i].buffer = await extractPages(input[i].buffer, operation.values["pagesToExtractArray"]);
+                        await nextOperation(operation.operations, input[i]);
+                    }
+                }
+                else {
+                    // TODO: modfiy input
+                    input.fileName += "_extractedPages";
+                    input.buffer = await extractPages(input.buffer, operation.values["pagesToExtractArray"]);
+                    await nextOperation(operation.operations, input);
+                }
+                break;
+            case "rotate":
+                if(Array.isArray(input)) {
+                    for (let i = 0; i < input.length; i++) {
+                        // TODO: modfiy input
+                        input[i].fileName += "_turned";
+                        input[i].buffer = await rotatePages(input[i].buffer, operation.values["rotation"]);
+                        await nextOperation(operation.operations, input[i]);
+                    }
+                }
+                else {
+                    // TODO: modfiy input
+                    input.fileName += "_turned";
+                    input.buffer = await rotatePages(input.buffer, operation.values["rotation"]);
+                    await nextOperation(operation.operations, input);
+                }
+                break;
             default:
                 console.log("operation type unknown: ", operation.type);
                 break;