diff --git a/package-lock.json b/package-lock.json index 0210a23a..a98c8b51 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4470,6 +4470,15 @@ "integrity": "sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w==", "dev": true }, + "node_modules/@types/multer": { + "version": "1.4.10", + "resolved": "https://registry.npmjs.org/@types/multer/-/multer-1.4.10.tgz", + "integrity": "sha512-6l9mYMhUe8wbnz/67YIjc7ZJyQNZoKq7fRXVf7nMdgWgalD0KyzJ2ywI7hoATUSXSbTu9q2HBiEwzy0tNN1v2w==", + "dev": true, + "dependencies": { + "@types/express": "*" + } + }, "node_modules/@types/node": { "version": "18.18.7", "resolved": "https://registry.npmjs.org/@types/node/-/node-18.18.7.tgz", @@ -14543,6 +14552,7 @@ }, "devDependencies": { "@types/express": "^4.17.21", + "@types/multer": "^1.4.10", "ts-node-dev": "^2.0.0", "typescript": "^5.2.2" } diff --git a/server-node/nodemon.json b/server-node/nodemon.json index cffbb28a..55fcade2 100644 --- a/server-node/nodemon.json +++ b/server-node/nodemon.json @@ -1,5 +1,5 @@ { - "watch": ["src"], + "watch": ["src", "../shared-operations/src"], "ext": "ts,json", "ignore": ["src/**/*.spec.ts"], "exec": "node --trace-warnings --experimental-specifier-resolution=node --loader ts-node/esm ./src/index.ts" diff --git a/server-node/package.json b/server-node/package.json index f9aabe37..9ee1af42 100644 --- a/server-node/package.json +++ b/server-node/package.json @@ -25,6 +25,7 @@ }, "devDependencies": { "@types/express": "^4.17.21", + "@types/multer": "^1.4.10", "ts-node-dev": "^2.0.0", "typescript": "^5.2.2" }, diff --git a/server-node/src/routes/api/workflow-controller.ts b/server-node/src/routes/api/workflow-controller.ts index 810455f1..2c9a52e5 100644 --- a/server-node/src/routes/api/workflow-controller.ts +++ b/server-node/src/routes/api/workflow-controller.ts @@ -7,6 +7,7 @@ const upload = multer(); import Operations from "../../utils/pdf-operations"; import { traverseOperations } from "@stirling-pdf/shared-operations/src/workflow/traverseOperations"; +import { PdfFile, RepresentationType } from '@stirling-pdf/shared-operations/src/wrappers/PdfFile'; const activeWorkflows: any = {}; @@ -24,82 +25,78 @@ router.post("/:workflowUuid?", [ // TODO: Validate input further (json may be invalid or not be in workflow format) const workflow = JSON.parse(req.body.workflow); + // TODO: Replace with static multer function of pdffile const inputs = await Promise.all((req.files as Express.Multer.File[]).map(async file => { - console.log(file); - return { - originalFileName: file.originalname.replace(/\.[^/.]+$/, ""), - fileName: file.originalname.replace(/\.[^/.]+$/, ""), - buffer: new Uint8Array(await file.buffer) - } + return new PdfFile(file.originalname.replace(/\.[^/.]+$/, ""), new Uint8Array(await file.buffer), RepresentationType.Uint8Array, file.originalname.replace(/\.[^/.]+$/, "")); })); // TODO: Enable if traverse & organize migration is done. - // // Allow option to do it synchronously and just make a long request - // if(req.body.async === "false") { - // console.log("Don't do async"); + // Allow option to do it synchronously and just make a long request + if(req.body.async === "false") { + console.log("Don't do async"); - // const traverse = traverseOperations(workflow.operations, inputs, Operations); + const traverse = traverseOperations(workflow.operations, inputs, Operations); - // let pdfResults; - // let iteration; - // while (true) { - // iteration = await traverse.next(); - // if (iteration.done) { - // pdfResults = iteration.value; - // console.log("Done"); - // break; - // } - // console.log(iteration.value); - // } + let pdfResults; + let iteration; + while (true) { + iteration = await traverse.next(); + if (iteration.done) { + pdfResults = iteration.value; + console.log("Done"); + break; + } + console.log(iteration.value); + } - // console.log("Download"); - // downloadHandler(res, pdfResults); - // } - // else { - // console.log("Start Aync Workflow"); - // // TODO: UUID collision checks - // let workflowID = req.params.workflowUuid - // if(!workflowID) - // workflowID = generateWorkflowID(); + console.log("Download"); + await downloadHandler(res, pdfResults); + } + else { + console.log("Start Aync Workflow"); + // TODO: UUID collision checks + let workflowID = req.params.workflowUuid + if(!workflowID) + workflowID = generateWorkflowID(); - // activeWorkflows[workflowID] = { - // createdAt: Date.now(), - // finished: false, - // eventStream: null, - // result: null, - // // TODO: When auth is implemented: owner - // } - // const activeWorkflow = activeWorkflows[workflowID]; + activeWorkflows[workflowID] = { + createdAt: Date.now(), + finished: false, + eventStream: null, + result: null, + // TODO: When auth is implemented: owner + } + const activeWorkflow = activeWorkflows[workflowID]; - // res.status(200).json({ - // "workflowID": workflowID, - // "data-recieved": { - // "fileCount": filesArr.length, - // "workflow": workflow - // } - // }); + res.status(200).json({ + "workflowID": workflowID, + "data-recieved": { + "fileCount": inputs.length, + "workflow": workflow + } + }); - // const traverse = traverseOperations(workflow.operations, inputs, Operations); + const traverse = traverseOperations(workflow.operations, inputs, Operations); - // let pdfResults; - // let iteration; - // while (true) { - // iteration = await traverse.next(); - // if (iteration.done) { - // pdfResults = iteration.value; - // if(activeWorkflow.eventStream) { - // activeWorkflow.eventStream.write(`data: processing done\n\n`); - // activeWorkflow.eventStream.end(); - // } - // break; - // } - // if(activeWorkflow.eventStream) - // activeWorkflow.eventStream.write(`data: ${iteration.value}\n\n`); - // } + let pdfResults; + let iteration; + while (true) { + iteration = await traverse.next(); + if (iteration.done) { + pdfResults = iteration.value; + if(activeWorkflow.eventStream) { + activeWorkflow.eventStream.write(`data: processing done\n\n`); + activeWorkflow.eventStream.end(); + } + break; + } + if(activeWorkflow.eventStream) + activeWorkflow.eventStream.write(`data: ${iteration.value}\n\n`); + } - // activeWorkflow.result = pdfResults; - // activeWorkflow.finished = true; - // } + activeWorkflow.result = pdfResults; + activeWorkflow.finished = true; + } } ]); @@ -146,7 +143,7 @@ router.get("/progress-stream/:workflowUuid", (req: Request, res: Response) => { }); }); -router.get("/result/:workflowUuid", (req: Request, res: Response) => { +router.get("/result/:workflowUuid", async (req: Request, res: Response) => { if(!req.params.workflowUuid) { res.status(400).json({"error": "No workflowUuid weres provided."}); return; @@ -167,7 +164,7 @@ router.get("/result/:workflowUuid", (req: Request, res: Response) => { return } - downloadHandler(res, workflow.result); + await downloadHandler(res, workflow.result); // Delete workflow / results when done. delete activeWorkflows[req.params.workflowUuid]; }); @@ -190,7 +187,7 @@ function generateWorkflowID() { return crypto.randomUUID(); } -function downloadHandler(res: Response, pdfResults: any) { +async function downloadHandler(res: Response, pdfResults: PdfFile[]) { if(pdfResults.length == 0) { res.status(500).json({"warning": "The workflow had no outputs."}); } @@ -211,7 +208,7 @@ function downloadHandler(res: Response, pdfResults: any) { for (let i = 0; i < pdfResults.length; i++) { // TODO: Implement other file types (mostly fro image & text extraction) // TODO: Check for name collisions - zip.append(Buffer.from(pdfResults[i].buffer), { name: pdfResults[i].fileName + ".pdf" }); + zip.append(Buffer.from(await pdfResults[i].uint8Array), { name: pdfResults[i].filename + ".pdf" }); } zip.finalize(); @@ -219,10 +216,10 @@ function downloadHandler(res: Response, pdfResults: any) { } else { const readStream = new stream.PassThrough(); - readStream.end(pdfResults[0].buffer); + readStream.end(pdfResults[0].uint8Array); // TODO: Implement other file types (mostly fro image & text extraction) - res.set("Content-disposition", 'attachment; filename=' + pdfResults[0].fileName + ".pdf"); + res.set("Content-disposition", 'attachment; filename=' + pdfResults[0].filename + ".pdf"); res.set("Content-Type", "application/pdf"); readStream.pipe(res); diff --git a/server-node/src/utils/libre-office-utils.ts b/server-node/src/utils/libre-office-utils.ts index 3f58e499..736c56f6 100644 --- a/server-node/src/utils/libre-office-utils.ts +++ b/server-node/src/utils/libre-office-utils.ts @@ -3,7 +3,7 @@ import fs from 'fs'; import os from 'os'; import path from 'path'; import { exec, spawn } from 'child_process' -import { PdfFile, fromUint8Array } from '@stirling-pdf/shared-operations/src/wrappers/PdfFile' +import { PdfFile, RepresentationType } from '@stirling-pdf/shared-operations/src/wrappers/PdfFile' export async function fileToPdf(byteArray: Uint8Array, filename: string): Promise { const parentDir = path.join(os.tmpdir(), "StirlingPDF"); @@ -22,7 +22,7 @@ export async function fileToPdf(byteArray: Uint8Array, filename: string): Promis fs.rmdirSync(tempDir); - return fromUint8Array(outputBytes, outputFileName); + return new PdfFile(outputFileName, outputBytes, RepresentationType.Uint8Array); } export function isLibreOfficeInstalled() { diff --git a/shared-operations/src/functions/common/detectEmptyPages.ts b/shared-operations/src/functions/common/detectEmptyPages.ts index 3497e0c5..44958501 100644 --- a/shared-operations/src/functions/common/detectEmptyPages.ts +++ b/shared-operations/src/functions/common/detectEmptyPages.ts @@ -6,7 +6,7 @@ import { Image } from 'image-js'; import { getImagesOnPage } from "./getImagesOnPage.js"; export async function detectEmptyPages(file: PdfFile, whiteThreashold: number): Promise { - const pdfDoc = await file.pdfjsDocuemnt; + const pdfDoc = await file.pdfjsDocument; const emptyPages: number[] = []; for (let i = 1; i <= pdfDoc.numPages; i++) { diff --git a/shared-operations/src/functions/mergePDFs.ts b/shared-operations/src/functions/mergePDFs.ts index 45e1f907..c979c910 100644 --- a/shared-operations/src/functions/mergePDFs.ts +++ b/shared-operations/src/functions/mergePDFs.ts @@ -1,6 +1,6 @@ import { PDFDocument } from 'pdf-lib'; -import { PdfFile } from '../wrappers/PdfFile'; +import { PdfFile, RepresentationType } from '../wrappers/PdfFile'; export type MergeParamsType = { files: PdfFile[]; @@ -15,5 +15,5 @@ export async function mergePDFs(params: MergeParamsType): Promise { copiedPages.forEach((page) => mergedPdf.addPage(page)); } - return new PdfFile("mergedPDF", mergedPdf); + return new PdfFile("mergedPDF", mergedPdf, RepresentationType.PDFLibDocument); }; \ No newline at end of file diff --git a/shared-operations/src/functions/splitOn.ts b/shared-operations/src/functions/splitOn.ts index 04e3ead9..1fa5f7b8 100644 --- a/shared-operations/src/functions/splitOn.ts +++ b/shared-operations/src/functions/splitOn.ts @@ -16,6 +16,8 @@ export async function splitOn(params: SplitOnParamsType) { const { file, type, whiteThreashold } = params; let splitAtPages: number[] = []; + + console.log("File: ", file); switch (type) { case "BAR_CODE": @@ -36,6 +38,8 @@ export async function splitOn(params: SplitOnParamsType) { console.log("Split At Pages: ", splitAtPages); + console.log("File: ", file); + // Remove detected Pages & Split const pdfDoc = await file.pdflibDocument; const numberOfPages = pdfDoc.getPageCount(); @@ -66,7 +70,9 @@ export async function splitOn(params: SplitOnParamsType) { return subDocuments; async function getPagesWithQRCode(file: PdfFile) { - const pdfDoc = await file.pdfjsDocuemnt; + console.log("FileInQRPrev: ", file); + const pdfDoc = await file.pdfjsDocument; + console.log("FileInQRAfter: ", file); const pagesWithQR: number[] = []; for (let i = 0; i < pdfDoc.numPages; i++) { @@ -74,7 +80,7 @@ export async function splitOn(params: SplitOnParamsType) { const page = await pdfDoc.getPage(i + 1); const images = await getImagesOnPage(page); - console.log("images:", images); + // console.log("images:", images); for (const image of images) { const data = await checkForQROnImage(image); if(data == "https://github.com/Frooodle/Stirling-PDF") { diff --git a/shared-operations/src/functions/subDocumentFunctions.ts b/shared-operations/src/functions/subDocumentFunctions.ts index bfd66c18..ed927947 100644 --- a/shared-operations/src/functions/subDocumentFunctions.ts +++ b/shared-operations/src/functions/subDocumentFunctions.ts @@ -1,6 +1,6 @@ import { PDFDocument } from 'pdf-lib'; -import { PdfFile, fromPdfLib } from '../wrappers/PdfFile.js'; +import { PdfFile, RepresentationType } from '../wrappers/PdfFile.js'; import { detectEmptyPages } from "./common/detectEmptyPages.js"; @@ -21,12 +21,11 @@ export async function sortPagesWithPreset(params: SortPagesWithPresetParamsType) throw new Error("Operation not supported"); } - const byteFile = await file.convertToPdfLibFile(); - if (!byteFile?.pdfLib) return byteFile; + const pdflibDocument = await file.pdflibDocument; - const pageCount = byteFile.pdfLib.getPageCount(); + const pageCount = pdflibDocument.getPageCount(); const sortIndecies = sortFunction(pageCount); - return selectPages({file:byteFile, pagesToExtractArray:sortIndecies}); + return selectPages({file: file, pagesToExtractArray: sortIndecies}); } export type RearrangePagesParamsType = { @@ -37,11 +36,10 @@ export type RearrangePagesParamsType = { export async function rearrangePages(params: RearrangePagesParamsType): Promise { const { file, fancyPageSelector } = params; - const byteFile = await file.convertToPdfLibFile(); - if (!byteFile?.pdfLib) return byteFile; + const pdflibDocument = await file.pdflibDocument; - const pagesToExtractArray = parseFancyPageSelector(fancyPageSelector, byteFile.pdfLib.getPageCount()); - const newDocument = selectPages({file:byteFile, pagesToExtractArray}); + const pagesToExtractArray = parseFancyPageSelector(fancyPageSelector, pdflibDocument.getPageCount()); + const newDocument = selectPages({file: file, pagesToExtractArray}); return newDocument; }; @@ -52,23 +50,22 @@ export type SelectPagesParamsType = { export async function selectPages(params: SelectPagesParamsType): Promise { const { file, pagesToExtractArray } = params; - const byteFile = await file.convertToPdfLibFile(); - if (!byteFile?.pdfLib) return byteFile; + const pdflibDocument = await file.pdflibDocument; const subDocument = await PDFDocument.create(); // Check that array max number is not larger pdf pages number - if(Math.max(...pagesToExtractArray) >= byteFile.pdfLib.getPageCount()) { - throw new Error(`The PDF document only has ${byteFile.pdfLib.getPageCount()} pages and you tried to extract page ${Math.max(...pagesToExtractArray)}`); + if(Math.max(...pagesToExtractArray) >= pdflibDocument.getPageCount()) { + throw new Error(`The PDF document only has ${pdflibDocument.getPageCount()} pages and you tried to extract page ${Math.max(...pagesToExtractArray)}`); } - const copiedPages = await subDocument.copyPages(byteFile.pdfLib, pagesToExtractArray); + const copiedPages = await subDocument.copyPages(pdflibDocument, pagesToExtractArray); for (let i = 0; i < copiedPages.length; i++) { subDocument.addPage(copiedPages[i]); } - return fromPdfLib(subDocument, file.filename); + return new PdfFile(file.originalFilename, subDocument, RepresentationType.PDFLibDocument, file.filename); } export type RemovePagesParamsType = { @@ -78,11 +75,10 @@ export type RemovePagesParamsType = { export async function removePages(params: RemovePagesParamsType): Promise { const { file, pagesToRemoveArray } = params; - const byteFile = await file.convertToPdfLibFile(); - if (!byteFile?.pdfLib) return byteFile; + const pdflibDocument = await file.pdflibDocument; - const pagesToExtractArray = invertSelection(pagesToRemoveArray, byteFile.pdfLib.getPageIndices()) - return selectPages({file:byteFile, pagesToExtractArray}); + const pagesToExtractArray = invertSelection(pagesToRemoveArray, pdflibDocument.getPageIndices()) + return selectPages({file: file, pagesToExtractArray}); } export type RemoveBlankPagesParamsType = { diff --git a/shared-operations/src/functions/updateMetadata.ts b/shared-operations/src/functions/updateMetadata.ts index d3a24639..738a0762 100644 --- a/shared-operations/src/functions/updateMetadata.ts +++ b/shared-operations/src/functions/updateMetadata.ts @@ -1,5 +1,5 @@ -import { PdfFile, fromPdfLib } from '../wrappers/PdfFile'; +import { PdfFile } from '../wrappers/PdfFile'; export type UpdateMetadataParams = { file: PdfFile, @@ -17,7 +17,7 @@ export type UpdateMetadataParams = { } export async function updateMetadata(params: UpdateMetadataParams): Promise { - const pdfDoc = await params.file.getAsPdfLib(); + const pdfDoc = await params.file.pdflibDocument; if (params.deleteAll) { pdfDoc.setAuthor(""); @@ -49,5 +49,5 @@ export async function updateMetadata(params: UpdateMetadataParams): Promise { - if(Array.isArray(actions) && actions.length == 0) { // isEmpty + async function * nextOperation(actions: Action[] | undefined, input: PdfFile[] | PdfFile): AsyncGenerator { + console.log("Next Operation"); + if(actions === undefined || (Array.isArray(actions) && actions.length == 0)) { // isEmpty + console.log("Last Operation"); if(Array.isArray(input)) { console.log("operation done: " + input[0].filename + (input.length > 1 ? "+" : "")); results = results.concat(input); @@ -24,11 +26,12 @@ export async function * traverseOperations(operations: Action[], input: PdfFile[ } for (let i = 0; i < actions.length; i++) { - yield* computeOperation(actions[i], structuredClone(input)); + yield* computeOperation(actions[i], input); // TODO: structuredClone doesn't work in ts need to find another solution to pass by value. } } async function * computeOperation(action: Action, input: PdfFile|PdfFile[]): AsyncGenerator { + yield "Starting: " + action.type; switch (action.type) { case "done": // Skip this, because it is a valid node. @@ -132,9 +135,7 @@ export async function * traverseOperations(operations: Action[], input: PdfFile[ const input = Array.isArray(inputs) ? inputs : [inputs]; // Convert single values to array, keep arrays as is. const newInputs = await callback(input); - if (action.actions) { - yield* nextOperation(action.actions, newInputs); - } + yield* nextOperation(action.actions, newInputs); } /** @@ -149,15 +150,11 @@ export async function * traverseOperations(operations: Action[], input: PdfFile[ for (let i = 0; i < input.length; i++) { output = output.concat(await callback(input[i])); } - if (action.actions) { - yield* nextOperation(action.actions, output); - } + yield* nextOperation(action.actions, output); } else { const nextInput = await callback(input); - if (action.actions) { - yield* nextOperation(action.actions, nextInput); - } + yield* nextOperation(action.actions, nextInput); } } @@ -167,15 +164,11 @@ export async function * traverseOperations(operations: Action[], input: PdfFile[ for (let i = 0; i < input.length; i++) { nextInputs.concat(await callback(input[i])); } - if (action.actions) { - yield* nextOperation(action.actions, nextInputs); - } + yield* nextOperation(action.actions, nextInputs); } else { const nextInput = await callback(input); - if (action.actions) { - yield* nextOperation(action.actions, nextInput); - } + yield* nextOperation(action.actions, nextInput); } } } \ No newline at end of file diff --git a/shared-operations/src/wrappers/PdfFile.ts b/shared-operations/src/wrappers/PdfFile.ts index b4c0a2dd..0000bd77 100644 --- a/shared-operations/src/wrappers/PdfFile.ts +++ b/shared-operations/src/wrappers/PdfFile.ts @@ -1,75 +1,100 @@ import * as PDFJS from 'pdfjs-dist'; -import { PDFDocumentProxy as PDFJSDocument } from 'pdfjs-dist/types/src/display/api'; +import type { PDFDocumentProxy as PDFJSDocument } from 'pdfjs-dist/types/src/display/api'; import { PDFDocument as PDFLibDocument } from 'pdf-lib'; import Joi from 'joi'; +export enum RepresentationType { + Uint8Array, + PDFLibDocument, + PDFJSDocument +} + export class PdfFile { private representation: Uint8Array | PDFLibDocument | PDFJSDocument; + private representationType: RepresentationType; originalFilename: string; filename: string; get uint8Array() : Promise { - switch (this.representation.constructor) { - case Uint8Array: + switch (this.representationType) { + case RepresentationType.Uint8Array: return new Promise((resolve, reject) => { resolve(this.representation as Uint8Array); }); - case PDFLibDocument: - return (this.representation as PDFLibDocument).save(); - case PDFJSDocument: - return (this.representation as PDFJSDocument).getData(); + case RepresentationType.PDFLibDocument: + return new Promise(async (resolve, reject) => { + var uint8Array = await (this.representation as PDFLibDocument).save(); + this.uint8Array = uint8Array; + resolve(uint8Array); + }); + case RepresentationType.PDFJSDocument: + return new Promise(async (resolve, reject) => { + var uint8Array = await (this.representation as PDFJSDocument).getData(); + this.uint8Array = uint8Array; + resolve(uint8Array); + }); default: + console.error("unhandeled PDF type: " + typeof this.representation as string); throw Error("unhandeled PDF type"); } } set uint8Array(value: Uint8Array) { this.representation = value; + this.representationType = RepresentationType.Uint8Array; } get pdflibDocument() : Promise { - switch (this.representation.constructor) { - case PDFLibDocument: // PDFLib + switch (this.representationType) { + case RepresentationType.PDFLibDocument: return new Promise((resolve, reject) => { resolve(this.representation as PDFLibDocument); }); default: return new Promise(async (resolve, reject) => { - resolve(PDFLibDocument.load(await this.uint8Array, { + var uint8Array = await this.uint8Array; + var pdfLibDoc = await PDFLibDocument.load(uint8Array, { updateMetadata: false, - })); + }); + this.pdflibDocument = pdfLibDoc; + resolve(pdfLibDoc); }); } } set pdflibDocument(value: PDFLibDocument) { this.representation = value; + this.representationType = RepresentationType.PDFLibDocument; } - get pdfjsDocuemnt() : Promise { - switch (this.representation.constructor) { - case PDFJSDocument: + get pdfjsDocument() : Promise { + switch (this.representationType) { + case RepresentationType.PDFJSDocument: return new Promise((resolve, reject) => { resolve(this.representation as PDFJSDocument); }); default: return new Promise(async (resolve, reject) => { - resolve(await PDFJS.getDocument(await this.uint8Array).promise); + const pdfjsDoc = await PDFJS.getDocument(await this.uint8Array).promise; + this.pdfjsDocument = pdfjsDoc; + resolve(pdfjsDoc); }); } } - set pdfjsDocuemnt(value: PDFJSDocument) { + set pdfjsDocument(value: PDFJSDocument) { this.representation = value; + this.representationType = RepresentationType.PDFJSDocument; } - constructor(originalFilename: string, representation: Uint8Array | PDFLibDocument | PDFJSDocument, filename?: string) { + constructor(originalFilename: string, representation: Uint8Array | PDFLibDocument | PDFJSDocument, representationType: RepresentationType, filename?: string) { this.originalFilename = originalFilename; this.filename = filename ? filename : originalFilename; this.representation = representation; + this.representationType = representationType; } static fromMulterFile(value: Express.Multer.File): PdfFile { - return new PdfFile(value.originalname, value.buffer as Uint8Array) + return new PdfFile(value.originalname, value.buffer as Uint8Array, RepresentationType.Uint8Array); } static fromMulterFiles(values: Express.Multer.File[]): PdfFile[] { return values.map(v => PdfFile.fromMulterFile(v));