diff --git a/CONTRIBUTE.md b/CONTRIBUTE.md new file mode 100644 index 000000000..87644c63f --- /dev/null +++ b/CONTRIBUTE.md @@ -0,0 +1,7 @@ +# Contribute + +This file should introduce you with the concepts and tools used in this project. + +## PDF Library Docs +- [pdf-lib](https://pdf-lib.js.org) - js +- [pdfcpu](https://pdfcpu.io) - go-wasm \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 000000000..b8ec2d7b0 --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# StirlingPDF rewrite + +This is the development repository for the new StirlingPDF backend. With the power of JS, WASM & GO this will provide almost all functionality SPDF can do currently directly on the client. For automation purposes this will still provide an API to automate your workflows. + +## New/Planned Features + +- Propper auth using passportjs +- Workflows & Node based editing of them. +- Client side PDF-Manipulation +- Stateful UI + +## Contribute + +For initial instructions look at [CONTRIBUTE.md](./CONTRIBUTE.md) \ No newline at end of file diff --git a/api/index.js b/api/index.js new file mode 100644 index 000000000..191c2b04a --- /dev/null +++ b/api/index.js @@ -0,0 +1 @@ +// TODO: Make API endpoints available \ No newline at end of file diff --git a/functions/extractPages.js b/functions/extractPages.js new file mode 100644 index 000000000..215c6fdd0 --- /dev/null +++ b/functions/extractPages.js @@ -0,0 +1,25 @@ +import { PDFDocument, ParseSpeeds } from 'pdf-lib' + +export const extractPages = async (snapshot, pagesToExtractArray) => { + const pdfDoc = await PDFDocument.load(snapshot) + + // TODO: invent a better format for pagesToExtractArray and convert it. + return createSubDocument(pdfDoc, pagesToExtractArray); +}; + +export async function createSubDocument(pdfDoc, pagesToExtractArray) { + const subDocument = await PDFDocument.create(); + + // Check that array max number is not larger pdf pages number + if(Math.max(...pagesToExtractArray) >= pdfDoc.getPageCount()) { + throw new Error(`The PDF document only has ${pdfDoc.getPageCount()} pages and you tried to extract page ${Math.max(...pagesToExtractArray)}`); + } + + const copiedPages = await subDocument.copyPages(pdfDoc, pagesToExtractArray); + + for (let i = 0; i < copiedPages.length; i++) { + subDocument.addPage(copiedPages[i]); + } + + return subDocument.save(); +} \ No newline at end of file diff --git a/functions/impose.js b/functions/impose.js new file mode 100644 index 000000000..7379c5af0 --- /dev/null +++ b/functions/impose.js @@ -0,0 +1,15 @@ +import * as pdfcpuWraopper from "../public/wasm/pdfcpu-wrapper-node.js"; + +export async function impose(snapshot, nup, format) { + return await pdfcpuWraopper.oneToOne([ + "pdfcpu.wasm", + "nup", + "-c", + "disable", + 'f:' + format, + "/output.pdf", + String(nup), + "input.pdf", + ], snapshot); +} + diff --git a/functions/mergePDFs.js b/functions/mergePDFs.js new file mode 100644 index 000000000..c3f319ad1 --- /dev/null +++ b/functions/mergePDFs.js @@ -0,0 +1,15 @@ +import { PDFDocument, ParseSpeeds } from 'pdf-lib' + +export const mergePDFs = async (snapshots) => { + + const mergedPdf = await PDFDocument.create(); + + for (let i = 0; i < snapshots.length; i++) { + const pdfToMerge = await PDFDocument.load(snapshots[i]); + + const copiedPages = await mergedPdf.copyPages(pdfToMerge, pdfToMerge.getPageIndices()); + copiedPages.forEach((page) => mergedPdf.addPage(page)); + } + + return mergedPdf.save(); +}; \ No newline at end of file diff --git a/functions/rotatePDF.js b/functions/rotatePDF.js new file mode 100644 index 000000000..c7b70e220 --- /dev/null +++ b/functions/rotatePDF.js @@ -0,0 +1,18 @@ +import { PDFDocument, ParseSpeeds } from 'pdf-lib' + +export const rotatePages = async (snapshot, rotation) => { + // Load the original PDF file + const pdfDoc = await PDFDocument.load(snapshot, { + parseSpeed: ParseSpeeds.Fastest, + }); + + const pages = pdfDoc.getPages(); + + pages.forEach(page => { + // Change page size + page.setRotation(degrees(rotation)) + }); + + // Serialize the modified document + return pdfDoc.save(); +}; \ No newline at end of file diff --git a/functions/scaleContent.js b/functions/scaleContent.js new file mode 100644 index 000000000..bdfab7de2 --- /dev/null +++ b/functions/scaleContent.js @@ -0,0 +1,29 @@ +import { PDFDocument, ParseSpeeds } from 'pdf-lib' + +export const scaleContent = async (snapshot, scale_factor) => { + // Load the original PDF file + const pdfDoc = await PDFDocument.load(snapshot, { + parseSpeed: ParseSpeeds.Fastest, + }); + + const pages = pdfDoc.getPages(); + + pages.forEach(page => { + const width = page.getWidth(); + const height = page.getHeight(); + + // Scale content + page.scaleContent(scale_factor, scale_factor); + const scaled_diff = { + width: Math.round(width - scale_factor * width), + height: Math.round(height - scale_factor * height), + }; + + // Center content in new page format + page.translateContent(Math.round(scaled_diff.width / 2), Math.round(scaled_diff.height / 2)); + + }); + + // Serialize the modified document + return pdfDoc.save(); +}; \ No newline at end of file diff --git a/functions/scalePage.js b/functions/scalePage.js new file mode 100644 index 000000000..10f1c5cae --- /dev/null +++ b/functions/scalePage.js @@ -0,0 +1,31 @@ +import { PDFDocument, ParseSpeeds } from 'pdf-lib' + +export const scalePage = async (snapshot, page_size) => { + // Load the original PDF file + const pdfDoc = await PDFDocument.load(snapshot, { + parseSpeed: ParseSpeeds.Fastest, + }); + + const new_size = page_size; + + const pages = pdfDoc.getPages(); + + pages.forEach(page => { + // Change page size + page.setSize(new_size.width, new_size.height); + }); + + // Serialize the modified document + return pdfDoc.save(); +}; + +export const PageSize = { + a4: { + width: 594.96, + height: 841.92 + }, + letter: { + width: 612, + height: 792 + } +}; \ No newline at end of file diff --git a/functions/splitPDF.js b/functions/splitPDF.js new file mode 100644 index 000000000..01bb66366 --- /dev/null +++ b/functions/splitPDF.js @@ -0,0 +1,25 @@ +import { PDFDocument, ParseSpeeds } from 'pdf-lib' +import { createSubDocument } from "./extractPages.js"; + +export const splitPDF = async (snapshot, splitAfterPageArray) => { + const pdfDoc = await PDFDocument.load(snapshot) + + const numberOfPages = pdfDoc.getPages().length; + + let pagesArray = []; + let splitAfter = splitAfterPageArray.shift(); + const subDocuments = []; + + for (let i = 0; i < numberOfPages; i++) { + if(i > splitAfter && pagesArray.length > 0) { + subDocuments.push(await createSubDocument(pdfDoc, pagesArray)); + splitAfter = splitAfterPageArray.shift(); + pagesArray = []; + } + pagesArray.push(i); + } + subDocuments.push(await createSubDocument(pdfDoc, pagesArray)); + pagesArray = []; + + return subDocuments; +}; \ No newline at end of file diff --git a/index.js b/index.js index 497d9871d..a45c4bfec 100644 --- a/index.js +++ b/index.js @@ -1,15 +1,16 @@ -const express = require('express'); +import express from 'express'; const app = express(); -const path = require('path'); const PORT = 8080; // Static Middleware -app.use(express.static(path.join(__dirname, 'public'))) +app.use(express.static('./public')); -app.get('/', function (req, res, next) { +app.get('/', function (req, res, next) { // TODO: Use EJS? res.render('home.ejs'); -}) +}); + +// TODO: Import and server /api app.listen(PORT, function (err) { if (err) console.log(err); diff --git a/package.json b/package.json index 92c1624d4..8eb41455a 100644 --- a/package.json +++ b/package.json @@ -10,5 +10,6 @@ "license": "ISC", "dependencies": { "express": "^4.18.2" - } + }, + "type": "module" } diff --git a/pdfjs.code-workspace b/pdfjs.code-workspace deleted file mode 100644 index 517e0b2a8..000000000 --- a/pdfjs.code-workspace +++ /dev/null @@ -1,8 +0,0 @@ -{ - "folders": [ - { - "path": "." - } - ], - "settings": {} -} \ No newline at end of file diff --git a/public/functions/impose.js b/public/functions/impose.js index a2909cc9b..edf1490e7 100644 --- a/public/functions/impose.js +++ b/public/functions/impose.js @@ -1,5 +1,14 @@ -import * as pdfcpuWraopper from "../wasm/pdfcpu-wrapper.js"; +import * as pdfcpuWraopper from "../wasm/pdfcpu-wrapper-browser.js"; -export function impose(snapshot, nup, format) { - return pdfcpuWraopper.impose(snapshot, nup, format); +export async function impose(snapshot, nup, format) { + return await pdfcpuWraopper.oneToOne([ + "pdfcpu.wasm", + "nup", + "-c", + "disable", + 'f:' + format, + "/output.pdf", + String(nup), + "input.pdf", + ], snapshot); } \ No newline at end of file diff --git a/public/wasm/pdfcpu-wrapper.js b/public/wasm/pdfcpu-wrapper-browser.js similarity index 79% rename from public/wasm/pdfcpu-wrapper.js rename to public/wasm/pdfcpu-wrapper-browser.js index 8d53b095d..44a94023f 100644 --- a/public/wasm/pdfcpu-wrapper.js +++ b/public/wasm/pdfcpu-wrapper-browser.js @@ -21,14 +21,12 @@ function configureFs() { fs = BrowserFS.BFSRequire("fs"); Buffer = BrowserFS.BFSRequire("buffer").Buffer; - // TODO: Find a way to remove these globals: window.fs = fs; window.Buffer = Buffer; } ); } -// TODO: This needs to be changed in order to run on node function loadWasm() { const script = document.createElement("script"); script.src = wasmLocation + "/wasm_exec.js"; @@ -55,7 +53,8 @@ const runWasm = async (param) => { async function loadFileAsync(data) { console.log(`Writing file to MemoryFS`); await fs.writeFile(`/input.pdf`, data); - let exitCode = await runWasm([ + console.log(`Write done. Validating...`); + let exitcode = await runWasm([ "pdfcpu.wasm", "validate", "-c", @@ -63,23 +62,21 @@ async function loadFileAsync(data) { `/input.pdf`, ]); - if (exitCode !== 0) + if (exitcode !== 0) throw new Error("There was an error validating your PDFs"); + + console.log(`File is Valid`); } export async function impose(snapshot, nup, format) { + +}; + +export async function oneToOne(wasmArray, snapshot) { await loadFileAsync(Buffer.from(snapshot)); - let exitcode = await runWasm([ - "pdfcpu.wasm", - "nup", - "-c", - "disable", - 'f:' + format, - "output.pdf", - String(nup), - "input.pdf", - ]); + console.error("Nuping File"); + let exitcode = await runWasm(wasmArray); if (exitcode !== 0) { console.error("There was an error nuping your PDFs"); @@ -91,4 +88,16 @@ export async function impose(snapshot, nup, format) { fs.unlink("output.pdf"); console.log("Your File ist Ready!"); return new Uint8Array(contents); -}; \ No newline at end of file +} + +export async function manyToOne() { + //TODO: Do this of neccesary for some operations +} + +export async function oneToMany() { + //TODO: Do this of neccesary for some operations +} + +export async function manyToMany() { + //TODO: Do this of neccesary for some operations +} \ No newline at end of file diff --git a/public/wasm/pdfcpu-wrapper-node.js b/public/wasm/pdfcpu-wrapper-node.js new file mode 100644 index 000000000..2e8d583a9 --- /dev/null +++ b/public/wasm/pdfcpu-wrapper-node.js @@ -0,0 +1,136 @@ +// TODO: Uses the BrowserFS import, needs to be changed for serverside + +import { WasmFs } from '@wasmer/wasmfs'; +import path from "path"; + +let webWasmLocation = "/wasm/"; +let nodeWasmLocation = "./public/wasm/"; + +let fs; +const wasmfs = new WasmFs(); + +(async () => { + await loadWasm(); + await configureFs(); +})(); + +async function configureFs() { + // Can't use BrowserFS: https://github.com/jvilk/BrowserFS/issues/271 + fs = wasmfs.fs; + global.fs = fs; + + console.log("InMemoryFs configured"); +} + +async function loadWasm() { + global.crypto = (await import("crypto")).webcrypto; // wasm dependecy + await import("./wasm_exec.js"); +} + +const runWasm = async (param) => { + if (global.cachedWasmResponse === undefined) { + const buffer = (await import("fs")).readFileSync(nodeWasmLocation + "/pdfcpu.wasm"); + global.cachedWasmResponse = buffer; + global.go = new Go(); + } + const { instance } = await WebAssembly.instantiate( + global.cachedWasmResponse, + global.go.importObject + ); + global.go.argv = param; + await global.go.run(instance); + return global.go.exitCode; +}; + +async function loadFileAsync(data) { + console.log(`Writing file to Disk`); + fs.writeFileSync(`input.pdf`, data); + console.log(`Write done. Validating...`); + let exitcode = await runWasm([ + "pdfcpu.wasm", + "validate", + "-c", + "disable", + `input.pdf`, + ]); + if (exitcode !== 0) + throw new Error("There was an error validating your PDFs"); + + // // Get logs of command + // wasmfs.getStdOut().then(response => { + // console.log(response); + // }); + + console.log(`File is Valid`); +} + +export async function oneToOne(wasmArray, snapshot) { + await loadFileAsync(Buffer.from(snapshot)); + + console.log("Nuping File"); + + let exitcode = await runWasm(wasmArray); + if (exitcode !== 0) { + console.error("There was an error nuping your PDFs"); + return; + } + console.log("Nuping Done"); + + await checkExistsWithTimeout("/output.pdf", 1000); + console.log("Write started..."); + // We need to wait for the file write in memfs to finish in node for some reason + await new Promise((resolve, reject) => { + setTimeout(() => { + resolve(); + }, 100); + }); + + + fs.unlinkSync("input.pdf"); + + const data = fs.readFileSync("/output.pdf"); + if(data.length == 0) { + throw Error("File Size 0 that should not happen"); + } + fs.unlinkSync("output.pdf"); + console.log("Your File ist Ready!"); + return new Uint8Array(data); +} + +export async function manyToOne() { + //TODO: Do this of neccesary for some operations +} + +export async function oneToMany() { + //TODO: Do this of neccesary for some operations +} + +export async function manyToMany() { + //TODO: Do this of neccesary for some operations +} + +// THX: https://stackoverflow.com/questions/26165725/nodejs-check-file-exists-if-not-wait-till-it-exist +function checkExistsWithTimeout(filePath, timeout) { + return new Promise(function (resolve, reject) { + + var timer = setTimeout(function () { + watcher.close(); + reject(new Error('File did not exists and was not created during the timeout.')); + }, timeout); + + fs.access(filePath, fs.constants.R_OK, function (err) { + if (!err) { + clearTimeout(timer); + watcher.close(); + resolve(); + } + }); + + var dir = path.dirname(filePath); + var watcher = fs.watch(dir, function (eventType, filename) { + clearTimeout(timer); + watcher.close(); + resolve(); + }); + }); +} \ No newline at end of file