From fddbec2408595bf70ddd51fd38101caaaa694293 Mon Sep 17 00:00:00 2001 From: Felix Kaspar Date: Tue, 17 Oct 2023 03:40:54 +0200 Subject: [PATCH] extract, rotate, split --- public/exampleWorkflows.js | 48 +++++++++++++++++++-- public/functions/extractPages.js | 25 +++++++++++ public/functions/rotatePDF.js | 18 ++++++++ public/functions/splitPDF.js | 26 ++++++++++++ public/index.js | 2 +- public/traverseOperations.js | 71 ++++++++++++++++++++++++++++++++ 6 files changed, 185 insertions(+), 5 deletions(-) create mode 100644 public/functions/extractPages.js create mode 100644 public/functions/rotatePDF.js create mode 100644 public/functions/splitPDF.js diff --git a/public/exampleWorkflows.js b/public/exampleWorkflows.js index 61563483..1d84885e 100644 --- a/public/exampleWorkflows.js +++ b/public/exampleWorkflows.js @@ -3,8 +3,7 @@ // https://cdn.discordapp.com/attachments/1099390571493195898/1118192753759764520/image.png?ex=6537dba7&is=652566a7&hm=dc46820ef7c34bc37424794966c5f66f93ba0e15a740742c364d47d31ea119a9& export const discordWorkflow = { outputOptions: { - zip: false, - awaitAllDone: true + zip: false }, operations: [ { @@ -72,8 +71,7 @@ export const discordWorkflow = { // This will merge all input files into one giant document export const mergeOnly = { outputOptions: { - zip: false, - awaitAllDone: true + zip: false }, operations: [ { @@ -82,4 +80,46 @@ export const mergeOnly = { operations: [] } ] +} + +// Extract Pages and store them in a new document +export const extractOnly = { + outputOptions: { + zip: false + }, + operations: [ + { + type: "extract", + values: { "pagesToExtractArray": [0, 2] }, + operations: [] + } + ] +} + +// Split a document up into multiple documents +export const splitOnly = { + outputOptions: { + zip: false + }, + operations: [ + { + type: "split", + values: { "pagesToSplitAfterArray": [2, 10] }, + operations: [] + } + ] +} + +// Split a document up into multiple documents +export const rotateOnly = { + outputOptions: { + zip: false + }, + operations: [ + { + type: "rotate", + values: { "rotation": -90 }, + operations: [] + } + ] } \ No newline at end of file diff --git a/public/functions/extractPages.js b/public/functions/extractPages.js new file mode 100644 index 00000000..a160335c --- /dev/null +++ b/public/functions/extractPages.js @@ -0,0 +1,25 @@ +const { PDFDocument, ParseSpeeds } = PDFLib; + +export const extractPages = async (snapshot, pagesToExtractArray) => { + const pdfDoc = await PDFDocument.load(snapshot) + + // TODO: invent a better format for pagesToExtractArray and convert it. + return createSubDocument(pdfDoc, pagesToExtractArray); +}; + +export async function createSubDocument(pdfDoc, pagesToExtractArray) { + const subDocument = await PDFDocument.create(); + + // Check that array max number is not larger pdf pages number + if(Math.max(...pagesToExtractArray) >= pdfDoc.getPageCount()) { + throw new Error(`The PDF document only has ${pdfDoc.getPageCount()} pages and you tried to extract page ${Math.max(...pagesToExtractArray)}`); + } + + const copiedPages = await subDocument.copyPages(pdfDoc, pagesToExtractArray); + + for (let i = 0; i < copiedPages.length; i++) { + subDocument.addPage(copiedPages[i]); + } + + return subDocument.save(); +} \ No newline at end of file diff --git a/public/functions/rotatePDF.js b/public/functions/rotatePDF.js new file mode 100644 index 00000000..5cb6cb74 --- /dev/null +++ b/public/functions/rotatePDF.js @@ -0,0 +1,18 @@ +const { PDFDocument, ParseSpeeds, degrees } = PDFLib; + +export const rotatePages = async (snapshot, rotation) => { + // Load the original PDF file + const pdfDoc = await PDFDocument.load(snapshot, { + parseSpeed: ParseSpeeds.Fastest, + }); + + const pages = pdfDoc.getPages(); + + pages.forEach(page => { + // Change page size + page.setRotation(degrees(rotation)) + }); + + // Serialize the modified document + return pdfDoc.save(); +}; \ No newline at end of file diff --git a/public/functions/splitPDF.js b/public/functions/splitPDF.js new file mode 100644 index 00000000..f2ec0f88 --- /dev/null +++ b/public/functions/splitPDF.js @@ -0,0 +1,26 @@ +import { createSubDocument } from "./extractPages.js"; + +const { PDFDocument, ParseSpeeds } = PDFLib; + +export const splitPDF = async (snapshot, splitAfterPageArray) => { + const pdfDoc = await PDFDocument.load(snapshot) + + const numberOfPages = pdfDoc.getPages().length; + + let pagesArray = []; + let splitAfter = splitAfterPageArray.shift(); + const subDocuments = []; + + for (let i = 0; i < numberOfPages; i++) { + if(i > splitAfter && pagesArray.length > 0) { + subDocuments.push(await createSubDocument(pdfDoc, pagesArray)); + splitAfter = splitAfterPageArray.shift(); + pagesArray = []; + } + pagesArray.push(i); + } + subDocuments.push(await createSubDocument(pdfDoc, pagesArray)); + pagesArray = []; + + return subDocuments; +}; \ No newline at end of file diff --git a/public/index.js b/public/index.js index d109efd2..c4a68ab6 100644 --- a/public/index.js +++ b/public/index.js @@ -55,4 +55,4 @@ import { traverseOperations } from "./traverseOperations.js"; // } // } }); -})(exampleWorkflows.mergeOnly); +})(exampleWorkflows.rotateOnly); diff --git a/public/traverseOperations.js b/public/traverseOperations.js index b02edf42..e664a3a7 100644 --- a/public/traverseOperations.js +++ b/public/traverseOperations.js @@ -1,4 +1,7 @@ +import { extractPages } from "./functions/extractPages.js"; import { mergePDFs } from "./functions/mergePDFs.js"; +import { rotatePages } from "./functions/rotatePDF.js"; +import { splitPDF } from "./functions/splitPDF.js"; import { organizeWaitOperations } from "./organizeWaitOperations.js"; export async function traverseOperations(operations, input) { @@ -34,6 +37,8 @@ export async function traverseOperations(operations, input) { } break; case "removeObjects": + console.warn("RemoveObjects not implemented yet.") + if(Array.isArray(input)) { for (let i = 0; i < input.length; i++) { // TODO: modfiy input @@ -52,16 +57,47 @@ export async function traverseOperations(operations, input) { for (let i = 0; i < input.length; i++) { // TODO: modfiy input input[i].fileName += "_extractedPages"; + input[i].buffer = await extractPages(input[i].buffer, operation.values["pagesToExtractArray"]); await nextOperation(operation.operations, input[i]); } } else { // TODO: modfiy input input.fileName += "_extractedPages"; + input.buffer = await extractPages(input.buffer, operation.values["pagesToExtractArray"]); await nextOperation(operation.operations, input); } break; + case "split": + // TODO: When a split goes into a wait function it might break the done condition, as it will count multiplpe times. + if(Array.isArray(input)) { + for (let i = 0; i < input.length; i++) { + const splits = await splitPDF(input[i].buffer, operation.values["pagesToSplitAfterArray"]); + + for (let j = 0; j < splits.length; j++) { + const split = {}; + split.originalFileName = input[i].originalFileName; + split.fileName = input[i].fileName + "_split"; + split.buffer = splits[j]; + await nextOperation(operation.operations, split); + } + } + } + else { + const splits = await splitPDF(input.buffer, operation.values["pagesToSplitAfterArray"]); + + for (let j = 0; j < splits.length; j++) { + const split = {}; + split.originalFileName = input.originalFileName; + split.fileName = input.fileName + "_split"; + split.buffer = splits[j]; + await nextOperation(operation.operations, split); + } + } + break; case "fillField": + console.warn("FillField not implemented yet.") + if(Array.isArray(input)) { for (let i = 0; i < input.length; i++) { // TODO: modfiy input @@ -76,6 +112,8 @@ export async function traverseOperations(operations, input) { } break; case "extractImages": + console.warn("ExtractImages not implemented yet.") + if(Array.isArray(input)) { for (let i = 0; i < input.length; i++) { // TODO: modfiy input @@ -105,6 +143,7 @@ export async function traverseOperations(operations, input) { await nextOperation(operation.operations, input); break; case "transform": { + console.warn("Transform not implemented yet.") if(Array.isArray(input)) { for (let i = 0; i < input.length; i++) { // TODO: modfiy input @@ -119,6 +158,38 @@ export async function traverseOperations(operations, input) { } break; } + case "extract": + if(Array.isArray(input)) { + for (let i = 0; i < input.length; i++) { + // TODO: modfiy input + input[i].fileName += "_extractedPages"; + input[i].buffer = await extractPages(input[i].buffer, operation.values["pagesToExtractArray"]); + await nextOperation(operation.operations, input[i]); + } + } + else { + // TODO: modfiy input + input.fileName += "_extractedPages"; + input.buffer = await extractPages(input.buffer, operation.values["pagesToExtractArray"]); + await nextOperation(operation.operations, input); + } + break; + case "rotate": + if(Array.isArray(input)) { + for (let i = 0; i < input.length; i++) { + // TODO: modfiy input + input[i].fileName += "_turned"; + input[i].buffer = await rotatePages(input[i].buffer, operation.values["rotation"]); + await nextOperation(operation.operations, input[i]); + } + } + else { + // TODO: modfiy input + input.fileName += "_turned"; + input.buffer = await rotatePages(input.buffer, operation.values["rotation"]); + await nextOperation(operation.operations, input); + } + break; default: console.log("operation type unknown: ", operation.type); break;