From 26cdb1d04fbe7dc2a0ed115bf20b58856131baa9 Mon Sep 17 00:00:00 2001 From: Felix Kaspar Date: Sun, 22 Oct 2023 00:55:28 +0200 Subject: [PATCH] Removed duplicate code of node backend Frontend traverser needs to be updated --- CONTRIBUTE.md | 13 ++++- README.md | 2 +- functions/extractPages.js | 25 ---------- functions/impose.js | 15 ------ functions/index.js | 38 +++++++++++++++ functions/mergePDFs.js | 15 ------ functions/rotatePDF.js | 18 ------- functions/scaleContent.js | 29 ------------ functions/scalePage.js | 31 ------------ functions/splitPDF.js | 24 ---------- public/functions/extractPages.js | 12 ++--- public/functions/impose.js | 4 +- public/functions/mergePDFs.js | 8 ++-- public/functions/rotatePDF.js | 18 ------- public/functions/rotatePages.js | 16 +++++++ public/functions/scaleContent.js | 14 +++--- public/functions/scalePage.js | 10 ++-- public/functions/splitPDF.js | 10 ++-- traverseOperations.js | 81 +++++++++++++++----------------- 19 files changed, 128 insertions(+), 255 deletions(-) delete mode 100644 functions/extractPages.js delete mode 100644 functions/impose.js create mode 100644 functions/index.js delete mode 100644 functions/mergePDFs.js delete mode 100644 functions/rotatePDF.js delete mode 100644 functions/scaleContent.js delete mode 100644 functions/scalePage.js delete mode 100644 functions/splitPDF.js delete mode 100644 public/functions/rotatePDF.js create mode 100644 public/functions/rotatePages.js diff --git a/CONTRIBUTE.md b/CONTRIBUTE.md index 87644c63..3ead06e7 100644 --- a/CONTRIBUTE.md +++ b/CONTRIBUTE.md @@ -4,4 +4,15 @@ This file should introduce you with the concepts and tools used in this project. ## PDF Library Docs - [pdf-lib](https://pdf-lib.js.org) - js -- [pdfcpu](https://pdfcpu.io) - go-wasm \ No newline at end of file +- [pdfcpu](https://pdfcpu.io) - go-wasm + +## Adding a PDF Function + +In order to add a PDF-Function there are several files that need to be changed. If the function is on the backend only, or on only on the frontend, you just need to add it to one of the locations. If it is available on both, you need to update both locations. +Dependency Injection is used to accomodate for different imports across platforms. + +### Backend + +Backend functions can have different implementations than their frontend counterparts if neccesary. Otherwise they can just link to their frontend implementation. + +### Frontend \ No newline at end of file diff --git a/README.md b/README.md index 477811cf..0b819ccc 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ You can also nest workflows like this: { "type": "impose", "values": { - "nup": 2, // 2 pages of the input docuemtn will be put on one page of the output document. + "nup": 2, // 2 pages of the input document will be put on one page of the output document. "format": "A4L" // A4L -> The page size of the Ouput will be an A4 in Landscape. You can also use other paper formats and "P" for portrait output. }, "operations": [] diff --git a/functions/extractPages.js b/functions/extractPages.js deleted file mode 100644 index 215c6fdd..00000000 --- a/functions/extractPages.js +++ /dev/null @@ -1,25 +0,0 @@ -import { PDFDocument, ParseSpeeds } from 'pdf-lib' - -export const extractPages = async (snapshot, pagesToExtractArray) => { - const pdfDoc = await PDFDocument.load(snapshot) - - // TODO: invent a better format for pagesToExtractArray and convert it. - return createSubDocument(pdfDoc, pagesToExtractArray); -}; - -export async function createSubDocument(pdfDoc, pagesToExtractArray) { - const subDocument = await PDFDocument.create(); - - // Check that array max number is not larger pdf pages number - if(Math.max(...pagesToExtractArray) >= pdfDoc.getPageCount()) { - throw new Error(`The PDF document only has ${pdfDoc.getPageCount()} pages and you tried to extract page ${Math.max(...pagesToExtractArray)}`); - } - - const copiedPages = await subDocument.copyPages(pdfDoc, pagesToExtractArray); - - for (let i = 0; i < copiedPages.length; i++) { - subDocument.addPage(copiedPages[i]); - } - - return subDocument.save(); -} \ No newline at end of file diff --git a/functions/impose.js b/functions/impose.js deleted file mode 100644 index 7379c5af..00000000 --- a/functions/impose.js +++ /dev/null @@ -1,15 +0,0 @@ -import * as pdfcpuWraopper from "../public/wasm/pdfcpu-wrapper-node.js"; - -export async function impose(snapshot, nup, format) { - return await pdfcpuWraopper.oneToOne([ - "pdfcpu.wasm", - "nup", - "-c", - "disable", - 'f:' + format, - "/output.pdf", - String(nup), - "input.pdf", - ], snapshot); -} - diff --git a/functions/index.js b/functions/index.js new file mode 100644 index 00000000..8960f958 --- /dev/null +++ b/functions/index.js @@ -0,0 +1,38 @@ +import PDFLib from 'pdf-lib'; +import * as pdfcpuWraopper from "../public/wasm/pdfcpu-wrapper-node.js"; + +import { extractPages as dependantExtractPages } from "../public/functions/extractPages.js"; +import { impose as dependantImpose } from '../public/functions/impose.js'; +import { mergePDFs as dependantMergePDFs } from '../public/functions/mergePDFs.js'; +import { rotatePages as dependantRotatePages } from '../public/functions/rotatePages.js'; +import { scaleContent as dependantScaleContent} from '../public/functions/scaleContent.js'; +import { scalePage as dependantScalePage } from '../public/functions/scalePage.js'; +import { splitPDF as dependantSplitPDF } from '../public/functions/splitPDF.js'; + +export async function extractPages(snapshot, pagesToExtractArray) { + return dependantExtractPages(snapshot, pagesToExtractArray, PDFLib); +} + +export async function impose(snapshot, nup, format) { + return dependantImpose(snapshot, nup, format, pdfcpuWraopper); +} + +export async function mergePDFs(snapshots) { + return dependantMergePDFs(snapshots, PDFLib); +} + +export async function rotatePages(snapshot, rotation) { + return dependantRotatePages(snapshot, rotation, PDFLib); +} + +export async function scaleContent(snapshot, scaleFactor) { + return dependantScaleContent(snapshot, scaleFactor, PDFLib); +} + +export async function scalePage(snapshot, pageSize) { + return dependantScalePage(snapshot, pageSize, PDFLib); +} + +export async function splitPDF(snapshot, splitAfterPageArray) { + return dependantSplitPDF(snapshot, splitAfterPageArray, PDFLib); +} \ No newline at end of file diff --git a/functions/mergePDFs.js b/functions/mergePDFs.js deleted file mode 100644 index c3f319ad..00000000 --- a/functions/mergePDFs.js +++ /dev/null @@ -1,15 +0,0 @@ -import { PDFDocument, ParseSpeeds } from 'pdf-lib' - -export const mergePDFs = async (snapshots) => { - - const mergedPdf = await PDFDocument.create(); - - for (let i = 0; i < snapshots.length; i++) { - const pdfToMerge = await PDFDocument.load(snapshots[i]); - - const copiedPages = await mergedPdf.copyPages(pdfToMerge, pdfToMerge.getPageIndices()); - copiedPages.forEach((page) => mergedPdf.addPage(page)); - } - - return mergedPdf.save(); -}; \ No newline at end of file diff --git a/functions/rotatePDF.js b/functions/rotatePDF.js deleted file mode 100644 index c7b70e22..00000000 --- a/functions/rotatePDF.js +++ /dev/null @@ -1,18 +0,0 @@ -import { PDFDocument, ParseSpeeds } from 'pdf-lib' - -export const rotatePages = async (snapshot, rotation) => { - // Load the original PDF file - const pdfDoc = await PDFDocument.load(snapshot, { - parseSpeed: ParseSpeeds.Fastest, - }); - - const pages = pdfDoc.getPages(); - - pages.forEach(page => { - // Change page size - page.setRotation(degrees(rotation)) - }); - - // Serialize the modified document - return pdfDoc.save(); -}; \ No newline at end of file diff --git a/functions/scaleContent.js b/functions/scaleContent.js deleted file mode 100644 index bdfab7de..00000000 --- a/functions/scaleContent.js +++ /dev/null @@ -1,29 +0,0 @@ -import { PDFDocument, ParseSpeeds } from 'pdf-lib' - -export const scaleContent = async (snapshot, scale_factor) => { - // Load the original PDF file - const pdfDoc = await PDFDocument.load(snapshot, { - parseSpeed: ParseSpeeds.Fastest, - }); - - const pages = pdfDoc.getPages(); - - pages.forEach(page => { - const width = page.getWidth(); - const height = page.getHeight(); - - // Scale content - page.scaleContent(scale_factor, scale_factor); - const scaled_diff = { - width: Math.round(width - scale_factor * width), - height: Math.round(height - scale_factor * height), - }; - - // Center content in new page format - page.translateContent(Math.round(scaled_diff.width / 2), Math.round(scaled_diff.height / 2)); - - }); - - // Serialize the modified document - return pdfDoc.save(); -}; \ No newline at end of file diff --git a/functions/scalePage.js b/functions/scalePage.js deleted file mode 100644 index 10f1c5ca..00000000 --- a/functions/scalePage.js +++ /dev/null @@ -1,31 +0,0 @@ -import { PDFDocument, ParseSpeeds } from 'pdf-lib' - -export const scalePage = async (snapshot, page_size) => { - // Load the original PDF file - const pdfDoc = await PDFDocument.load(snapshot, { - parseSpeed: ParseSpeeds.Fastest, - }); - - const new_size = page_size; - - const pages = pdfDoc.getPages(); - - pages.forEach(page => { - // Change page size - page.setSize(new_size.width, new_size.height); - }); - - // Serialize the modified document - return pdfDoc.save(); -}; - -export const PageSize = { - a4: { - width: 594.96, - height: 841.92 - }, - letter: { - width: 612, - height: 792 - } -}; \ No newline at end of file diff --git a/functions/splitPDF.js b/functions/splitPDF.js deleted file mode 100644 index ab68b4fe..00000000 --- a/functions/splitPDF.js +++ /dev/null @@ -1,24 +0,0 @@ -import { PDFDocument, ParseSpeeds } from 'pdf-lib' -import { createSubDocument } from "./extractPages.js"; - -export const splitPDF = async (snapshot, splitAfterPageArray) => { - const pdfDoc = await PDFDocument.load(snapshot) - - const numberOfPages = pdfDoc.getPages().length; - - let pagesArray = []; - let splitAfter = splitAfterPageArray.shift(); - const subDocuments = []; - - for (let i = 0; i < numberOfPages; i++) { - if(i > splitAfter && pagesArray.length > 0) { - subDocuments.push(await createSubDocument(pdfDoc, pagesArray)); - splitAfter = splitAfterPageArray.shift(); - pagesArray = []; - } - pagesArray.push(i); - } - subDocuments.push(await createSubDocument(pdfDoc, pagesArray)); - pagesArray = []; - return subDocuments; -}; \ No newline at end of file diff --git a/public/functions/extractPages.js b/public/functions/extractPages.js index a160335c..20dee150 100644 --- a/public/functions/extractPages.js +++ b/public/functions/extractPages.js @@ -1,14 +1,12 @@ -const { PDFDocument, ParseSpeeds } = PDFLib; - -export const extractPages = async (snapshot, pagesToExtractArray) => { - const pdfDoc = await PDFDocument.load(snapshot) +export async function extractPages(snapshot, pagesToExtractArray, PDFLib) { + const pdfDoc = await PDFLib.PDFDocument.load(snapshot) // TODO: invent a better format for pagesToExtractArray and convert it. - return createSubDocument(pdfDoc, pagesToExtractArray); + return createSubDocument(pdfDoc, pagesToExtractArray, PDFLib); }; -export async function createSubDocument(pdfDoc, pagesToExtractArray) { - const subDocument = await PDFDocument.create(); +export async function createSubDocument(pdfDoc, pagesToExtractArray, PDFLib) { + const subDocument = await PDFLib.PDFDocument.create(); // Check that array max number is not larger pdf pages number if(Math.max(...pagesToExtractArray) >= pdfDoc.getPageCount()) { diff --git a/public/functions/impose.js b/public/functions/impose.js index edf1490e..894e44bd 100644 --- a/public/functions/impose.js +++ b/public/functions/impose.js @@ -1,6 +1,4 @@ -import * as pdfcpuWraopper from "../wasm/pdfcpu-wrapper-browser.js"; - -export async function impose(snapshot, nup, format) { +export async function impose(snapshot, nup, format, pdfcpuWraopper) { return await pdfcpuWraopper.oneToOne([ "pdfcpu.wasm", "nup", diff --git a/public/functions/mergePDFs.js b/public/functions/mergePDFs.js index f243451a..109efc66 100644 --- a/public/functions/mergePDFs.js +++ b/public/functions/mergePDFs.js @@ -1,11 +1,9 @@ -const { PDFDocument, ParseSpeeds } = PDFLib; +export const mergePDFs = async (snapshots, PDFLib) => { -export const mergePDFs = async (snapshots) => { - - const mergedPdf = await PDFDocument.create(); + const mergedPdf = await PDFLib.PDFDocument.create(); for (let i = 0; i < snapshots.length; i++) { - const pdfToMerge = await PDFDocument.load(snapshots[i]); + const pdfToMerge = await PDFLib.PDFDocument.load(snapshots[i]); const copiedPages = await mergedPdf.copyPages(pdfToMerge, pdfToMerge.getPageIndices()); copiedPages.forEach((page) => mergedPdf.addPage(page)); diff --git a/public/functions/rotatePDF.js b/public/functions/rotatePDF.js deleted file mode 100644 index 5cb6cb74..00000000 --- a/public/functions/rotatePDF.js +++ /dev/null @@ -1,18 +0,0 @@ -const { PDFDocument, ParseSpeeds, degrees } = PDFLib; - -export const rotatePages = async (snapshot, rotation) => { - // Load the original PDF file - const pdfDoc = await PDFDocument.load(snapshot, { - parseSpeed: ParseSpeeds.Fastest, - }); - - const pages = pdfDoc.getPages(); - - pages.forEach(page => { - // Change page size - page.setRotation(degrees(rotation)) - }); - - // Serialize the modified document - return pdfDoc.save(); -}; \ No newline at end of file diff --git a/public/functions/rotatePages.js b/public/functions/rotatePages.js new file mode 100644 index 00000000..c42b2886 --- /dev/null +++ b/public/functions/rotatePages.js @@ -0,0 +1,16 @@ +export async function rotatePages (snapshot, rotation, PDFLib) { + // Load the original PDF file + const pdfDoc = await PDFLib.PDFDocument.load(snapshot, { + parseSpeed: PDFLib.ParseSpeeds.Fastest, + }); + + const pages = pdfDoc.getPages(); + + pages.forEach(page => { + // Change page size + page.setRotation(PDFLib.degrees(rotation)) + }); + + // Serialize the modified document + return pdfDoc.save(); +}; \ No newline at end of file diff --git a/public/functions/scaleContent.js b/public/functions/scaleContent.js index 97d56c59..52847af9 100644 --- a/public/functions/scaleContent.js +++ b/public/functions/scaleContent.js @@ -1,9 +1,7 @@ -const { PDFDocument, ParseSpeeds } = PDFLib; - -export const scaleContent = async (snapshot, scale_factor) => { +export async function scaleContent(snapshot, scaleFactor, PDFLib) { // Load the original PDF file - const pdfDoc = await PDFDocument.load(snapshot, { - parseSpeed: ParseSpeeds.Fastest, + const pdfDoc = await PDFLib.PDFDocument.load(snapshot, { + parseSpeed: PDFLib.ParseSpeeds.Fastest, }); const pages = pdfDoc.getPages(); @@ -13,10 +11,10 @@ export const scaleContent = async (snapshot, scale_factor) => { const height = page.getHeight(); // Scale content - page.scaleContent(scale_factor, scale_factor); + page.scaleContent(scaleFactor, scaleFactor); const scaled_diff = { - width: Math.round(width - scale_factor * width), - height: Math.round(height - scale_factor * height), + width: Math.round(width - scaleFactor * width), + height: Math.round(height - scaleFactor * height), }; // Center content in new page format diff --git a/public/functions/scalePage.js b/public/functions/scalePage.js index ffa6ee9a..3696280f 100644 --- a/public/functions/scalePage.js +++ b/public/functions/scalePage.js @@ -1,12 +1,10 @@ -const { PDFDocument, ParseSpeeds } = PDFLib; - -export const scalePage = async (snapshot, page_size) => { +export async function scalePage(snapshot, pageSize, PDFLib) { // Load the original PDF file - const pdfDoc = await PDFDocument.load(snapshot, { - parseSpeed: ParseSpeeds.Fastest, + const pdfDoc = await PDFLib.PDFDocument.load(snapshot, { + parseSpeed: PDFLib.ParseSpeeds.Fastest, }); - const new_size = page_size; + const new_size = pageSize; const pages = pdfDoc.getPages(); diff --git a/public/functions/splitPDF.js b/public/functions/splitPDF.js index f2ec0f88..dac378ce 100644 --- a/public/functions/splitPDF.js +++ b/public/functions/splitPDF.js @@ -1,9 +1,7 @@ import { createSubDocument } from "./extractPages.js"; -const { PDFDocument, ParseSpeeds } = PDFLib; - -export const splitPDF = async (snapshot, splitAfterPageArray) => { - const pdfDoc = await PDFDocument.load(snapshot) +export async function splitPDF(snapshot, splitAfterPageArray, PDFLib) { + const pdfDoc = await PDFLib.PDFDocument.load(snapshot) const numberOfPages = pdfDoc.getPages().length; @@ -13,13 +11,13 @@ export const splitPDF = async (snapshot, splitAfterPageArray) => { for (let i = 0; i < numberOfPages; i++) { if(i > splitAfter && pagesArray.length > 0) { - subDocuments.push(await createSubDocument(pdfDoc, pagesArray)); + subDocuments.push(await createSubDocument(pdfDoc, pagesArray, PDFLib)); splitAfter = splitAfterPageArray.shift(); pagesArray = []; } pagesArray.push(i); } - subDocuments.push(await createSubDocument(pdfDoc, pagesArray)); + subDocuments.push(await createSubDocument(pdfDoc, pagesArray, PDFLib)); pagesArray = []; return subDocuments; diff --git a/traverseOperations.js b/traverseOperations.js index 709623ee..a7325846 100644 --- a/traverseOperations.js +++ b/traverseOperations.js @@ -1,21 +1,17 @@ -import { extractPages } from "./functions/extractPages.js"; -import { impose } from "./functions/impose.js"; -import { mergePDFs } from "./functions/mergePDFs.js"; -import { rotatePages } from "./functions/rotatePDF.js"; -import { splitPDF } from "./functions/splitPDF.js"; +import * as Functions from "./functions/index.js"; import { organizeWaitOperations } from "./public/organizeWaitOperations.js"; export async function * traverseOperations(operations, input) { const waitOperations = organizeWaitOperations(operations); let results = []; - yield* nextOperation(operations, input) + yield* nextOperation(operations, input); + console.log("Done2"); return results; async function * nextOperation(operations, input) { - console.log(Array.isArray(operations) && operations.length == 0); if(Array.isArray(operations) && operations.length == 0) { // isEmpty if(Array.isArray(input)) { - console.log("operation done: " + input[0].fileName + "+"); + console.log("operation done: " + input[0].fileName + input.length > 1 ? "+" : ""); results = results.concat(input); return; } @@ -34,6 +30,8 @@ export async function * traverseOperations(operations, input) { async function * computeOperation(operation, input) { yield "Starting: " + operation.type; switch (operation.type) { + case "done": // Skip this, because it is a valid node. + break; case "wait": const waitOperation = waitOperations[operation.values.id]; @@ -50,18 +48,37 @@ export async function * traverseOperations(operations, input) { } break; case "extract": - yield * nToN(input, operation, async (input) => { + yield* nToN(input, operation, async (input) => { input.fileName += "_extractedPages"; - input.buffer = await extractPages(input.buffer, operation.values["pagesToExtractArray"]); + input.buffer = await Functions.extractPages(input.buffer, operation.values["pagesToExtractArray"]); + }); + break; + case "impose": + yield* nToN(input, operation, async (input) => { + input.fileName += "_imposed"; + input.buffer = await Functions.impose(input.buffer, operation.values["nup"], operation.values["format"]); + }); + break; + case "merge": + yield* nToOne(input, operation, async (inputs) => { + return { + originalFileName: inputs.map(input => input.originalFileName).join("_and_"), + fileName: inputs.map(input => input.fileName).join("_and_") + "_merged", + buffer: await Functions.mergePDFs(inputs.map(input => input.buffer)) + } + }); + break; + case "rotate": + yield* nToN(input, operation, async (input) => { + input.fileName += "_turned"; + input.buffer = await Functions.rotatePages(input.buffer, operation.values["rotation"]); }); - break; case "split": // TODO: A split might break the done condition, it may count multiple times. Needs further testing! - - yield * oneToN(input, operation, async (input) => { - const splitResult = await splitPDF(input.buffer, operation.values["pagesToSplitAfterArray"]); - + yield* oneToN(input, operation, async (input) => { + const splitResult = await Functions.splitPDF(input.buffer, operation.values["pagesToSplitAfterArray"]); + const splits = []; for (let j = 0; j < splitResult.length; j++) { splits.push({ @@ -70,45 +87,23 @@ export async function * traverseOperations(operations, input) { buffer: splitResult[j] }) } - + input = splits; }); break; - case "merge": - yield * nToOne(input, operation, async (input) => { - const inputs = input; - input = { - originalFileName: inputs.map(input => input.originalFileName).join("_and_"), - fileName: inputs.map(input => input.fileName).join("_and_") + "_merged", - buffer: await mergePDFs(inputs.map(input => input.buffer)) - } - }); - break; - case "rotate": - yield * nToN(input, operation, async (input) => { - input.fileName += "_turned"; - input.buffer = await rotatePages(input.buffer, operation.values["rotation"]); - }); - break; - case "impose": - yield * nToN(input, operation, async (input) => { - input.fileName += "_imposed"; - input.buffer = await impose(input.buffer, operation.values["nup"], operation.values["format"]); - }); - break; default: throw new Error(`${operation.type} not implemented yet.`); break; } } - async function * nToOne(input, operation, callback) { - if(!Array.isArray(input)) { - input = [input]; + async function * nToOne(inputs, operation, callback) { + if(!Array.isArray(inputs)) { + inputs = [inputs]; } - await callback(input); - yield* nextOperation(operation.operations, input); + inputs = await callback(inputs); + yield* nextOperation(operation.operations, inputs); } async function * oneToN(input, operation, callback) {