pdfcpu & wasm update, server side functions init, README.md, CONTRIBUTE.md

This commit is contained in:
Felix Kaspar 2023-10-18 23:56:56 +02:00
parent 21e0385a31
commit c60de02e14
16 changed files with 360 additions and 32 deletions

7
CONTRIBUTE.md Normal file
View File

@ -0,0 +1,7 @@
# Contribute
This file should introduce you with the concepts and tools used in this project.
## PDF Library Docs
- [pdf-lib](https://pdf-lib.js.org) - js
- [pdfcpu](https://pdfcpu.io) - go-wasm

14
README.md Normal file
View File

@ -0,0 +1,14 @@
# StirlingPDF rewrite
This is the development repository for the new StirlingPDF backend. With the power of JS, WASM & GO this will provide almost all functionality SPDF can do currently directly on the client. For automation purposes this will still provide an API to automate your workflows.
## New/Planned Features
- Propper auth using passportjs
- Workflows & Node based editing of them.
- Client side PDF-Manipulation
- Stateful UI
## Contribute
For initial instructions look at [CONTRIBUTE.md](./CONTRIBUTE.md)

1
api/index.js Normal file
View File

@ -0,0 +1 @@
// TODO: Make API endpoints available

25
functions/extractPages.js Normal file
View File

@ -0,0 +1,25 @@
import { PDFDocument, ParseSpeeds } from 'pdf-lib'
export const extractPages = async (snapshot, pagesToExtractArray) => {
const pdfDoc = await PDFDocument.load(snapshot)
// TODO: invent a better format for pagesToExtractArray and convert it.
return createSubDocument(pdfDoc, pagesToExtractArray);
};
export async function createSubDocument(pdfDoc, pagesToExtractArray) {
const subDocument = await PDFDocument.create();
// Check that array max number is not larger pdf pages number
if(Math.max(...pagesToExtractArray) >= pdfDoc.getPageCount()) {
throw new Error(`The PDF document only has ${pdfDoc.getPageCount()} pages and you tried to extract page ${Math.max(...pagesToExtractArray)}`);
}
const copiedPages = await subDocument.copyPages(pdfDoc, pagesToExtractArray);
for (let i = 0; i < copiedPages.length; i++) {
subDocument.addPage(copiedPages[i]);
}
return subDocument.save();
}

15
functions/impose.js Normal file
View File

@ -0,0 +1,15 @@
import * as pdfcpuWraopper from "../public/wasm/pdfcpu-wrapper-node.js";
export async function impose(snapshot, nup, format) {
return await pdfcpuWraopper.oneToOne([
"pdfcpu.wasm",
"nup",
"-c",
"disable",
'f:' + format,
"/output.pdf",
String(nup),
"input.pdf",
], snapshot);
}

15
functions/mergePDFs.js Normal file
View File

@ -0,0 +1,15 @@
import { PDFDocument, ParseSpeeds } from 'pdf-lib'
export const mergePDFs = async (snapshots) => {
const mergedPdf = await PDFDocument.create();
for (let i = 0; i < snapshots.length; i++) {
const pdfToMerge = await PDFDocument.load(snapshots[i]);
const copiedPages = await mergedPdf.copyPages(pdfToMerge, pdfToMerge.getPageIndices());
copiedPages.forEach((page) => mergedPdf.addPage(page));
}
return mergedPdf.save();
};

18
functions/rotatePDF.js Normal file
View File

@ -0,0 +1,18 @@
import { PDFDocument, ParseSpeeds } from 'pdf-lib'
export const rotatePages = async (snapshot, rotation) => {
// Load the original PDF file
const pdfDoc = await PDFDocument.load(snapshot, {
parseSpeed: ParseSpeeds.Fastest,
});
const pages = pdfDoc.getPages();
pages.forEach(page => {
// Change page size
page.setRotation(degrees(rotation))
});
// Serialize the modified document
return pdfDoc.save();
};

29
functions/scaleContent.js Normal file
View File

@ -0,0 +1,29 @@
import { PDFDocument, ParseSpeeds } from 'pdf-lib'
export const scaleContent = async (snapshot, scale_factor) => {
// Load the original PDF file
const pdfDoc = await PDFDocument.load(snapshot, {
parseSpeed: ParseSpeeds.Fastest,
});
const pages = pdfDoc.getPages();
pages.forEach(page => {
const width = page.getWidth();
const height = page.getHeight();
// Scale content
page.scaleContent(scale_factor, scale_factor);
const scaled_diff = {
width: Math.round(width - scale_factor * width),
height: Math.round(height - scale_factor * height),
};
// Center content in new page format
page.translateContent(Math.round(scaled_diff.width / 2), Math.round(scaled_diff.height / 2));
});
// Serialize the modified document
return pdfDoc.save();
};

31
functions/scalePage.js Normal file
View File

@ -0,0 +1,31 @@
import { PDFDocument, ParseSpeeds } from 'pdf-lib'
export const scalePage = async (snapshot, page_size) => {
// Load the original PDF file
const pdfDoc = await PDFDocument.load(snapshot, {
parseSpeed: ParseSpeeds.Fastest,
});
const new_size = page_size;
const pages = pdfDoc.getPages();
pages.forEach(page => {
// Change page size
page.setSize(new_size.width, new_size.height);
});
// Serialize the modified document
return pdfDoc.save();
};
export const PageSize = {
a4: {
width: 594.96,
height: 841.92
},
letter: {
width: 612,
height: 792
}
};

25
functions/splitPDF.js Normal file
View File

@ -0,0 +1,25 @@
import { PDFDocument, ParseSpeeds } from 'pdf-lib'
import { createSubDocument } from "./extractPages.js";
export const splitPDF = async (snapshot, splitAfterPageArray) => {
const pdfDoc = await PDFDocument.load(snapshot)
const numberOfPages = pdfDoc.getPages().length;
let pagesArray = [];
let splitAfter = splitAfterPageArray.shift();
const subDocuments = [];
for (let i = 0; i < numberOfPages; i++) {
if(i > splitAfter && pagesArray.length > 0) {
subDocuments.push(await createSubDocument(pdfDoc, pagesArray));
splitAfter = splitAfterPageArray.shift();
pagesArray = [];
}
pagesArray.push(i);
}
subDocuments.push(await createSubDocument(pdfDoc, pagesArray));
pagesArray = [];
return subDocuments;
};

View File

@ -1,15 +1,16 @@
const express = require('express');
import express from 'express';
const app = express();
const path = require('path');
const PORT = 8080;
// Static Middleware
app.use(express.static(path.join(__dirname, 'public')))
app.use(express.static('./public'));
app.get('/', function (req, res, next) {
app.get('/', function (req, res, next) { // TODO: Use EJS?
res.render('home.ejs');
})
});
// TODO: Import and server /api
app.listen(PORT, function (err) {
if (err) console.log(err);

View File

@ -10,5 +10,6 @@
"license": "ISC",
"dependencies": {
"express": "^4.18.2"
}
},
"type": "module"
}

View File

@ -1,8 +0,0 @@
{
"folders": [
{
"path": "."
}
],
"settings": {}
}

View File

@ -1,5 +1,14 @@
import * as pdfcpuWraopper from "../wasm/pdfcpu-wrapper.js";
import * as pdfcpuWraopper from "../wasm/pdfcpu-wrapper-browser.js";
export function impose(snapshot, nup, format) {
return pdfcpuWraopper.impose(snapshot, nup, format);
export async function impose(snapshot, nup, format) {
return await pdfcpuWraopper.oneToOne([
"pdfcpu.wasm",
"nup",
"-c",
"disable",
'f:' + format,
"/output.pdf",
String(nup),
"input.pdf",
], snapshot);
}

View File

@ -21,14 +21,12 @@ function configureFs() {
fs = BrowserFS.BFSRequire("fs");
Buffer = BrowserFS.BFSRequire("buffer").Buffer;
// TODO: Find a way to remove these globals:
window.fs = fs;
window.Buffer = Buffer;
}
);
}
// TODO: This needs to be changed in order to run on node
function loadWasm() {
const script = document.createElement("script");
script.src = wasmLocation + "/wasm_exec.js";
@ -55,7 +53,8 @@ const runWasm = async (param) => {
async function loadFileAsync(data) {
console.log(`Writing file to MemoryFS`);
await fs.writeFile(`/input.pdf`, data);
let exitCode = await runWasm([
console.log(`Write done. Validating...`);
let exitcode = await runWasm([
"pdfcpu.wasm",
"validate",
"-c",
@ -63,23 +62,21 @@ async function loadFileAsync(data) {
`/input.pdf`,
]);
if (exitCode !== 0)
if (exitcode !== 0)
throw new Error("There was an error validating your PDFs");
console.log(`File is Valid`);
}
export async function impose(snapshot, nup, format) {
};
export async function oneToOne(wasmArray, snapshot) {
await loadFileAsync(Buffer.from(snapshot));
let exitcode = await runWasm([
"pdfcpu.wasm",
"nup",
"-c",
"disable",
'f:' + format,
"output.pdf",
String(nup),
"input.pdf",
]);
console.error("Nuping File");
let exitcode = await runWasm(wasmArray);
if (exitcode !== 0) {
console.error("There was an error nuping your PDFs");
@ -91,4 +88,16 @@ export async function impose(snapshot, nup, format) {
fs.unlink("output.pdf");
console.log("Your File ist Ready!");
return new Uint8Array(contents);
};
}
export async function manyToOne() {
//TODO: Do this of neccesary for some operations
}
export async function oneToMany() {
//TODO: Do this of neccesary for some operations
}
export async function manyToMany() {
//TODO: Do this of neccesary for some operations
}

View File

@ -0,0 +1,136 @@
// TODO: Uses the BrowserFS import, needs to be changed for serverside
import { WasmFs } from '@wasmer/wasmfs';
import path from "path";
let webWasmLocation = "/wasm/";
let nodeWasmLocation = "./public/wasm/";
let fs;
const wasmfs = new WasmFs();
(async () => {
await loadWasm();
await configureFs();
})();
async function configureFs() {
// Can't use BrowserFS: https://github.com/jvilk/BrowserFS/issues/271
fs = wasmfs.fs;
global.fs = fs;
console.log("InMemoryFs configured");
}
async function loadWasm() {
global.crypto = (await import("crypto")).webcrypto; // wasm dependecy
await import("./wasm_exec.js");
}
const runWasm = async (param) => {
if (global.cachedWasmResponse === undefined) {
const buffer = (await import("fs")).readFileSync(nodeWasmLocation + "/pdfcpu.wasm");
global.cachedWasmResponse = buffer;
global.go = new Go();
}
const { instance } = await WebAssembly.instantiate(
global.cachedWasmResponse,
global.go.importObject
);
global.go.argv = param;
await global.go.run(instance);
return global.go.exitCode;
};
async function loadFileAsync(data) {
console.log(`Writing file to Disk`);
fs.writeFileSync(`input.pdf`, data);
console.log(`Write done. Validating...`);
let exitcode = await runWasm([
"pdfcpu.wasm",
"validate",
"-c",
"disable",
`input.pdf`,
]);
if (exitcode !== 0)
throw new Error("There was an error validating your PDFs");
// // Get logs of command
// wasmfs.getStdOut().then(response => {
// console.log(response);
// });
console.log(`File is Valid`);
}
export async function oneToOne(wasmArray, snapshot) {
await loadFileAsync(Buffer.from(snapshot));
console.log("Nuping File");
let exitcode = await runWasm(wasmArray);
if (exitcode !== 0) {
console.error("There was an error nuping your PDFs");
return;
}
console.log("Nuping Done");
await checkExistsWithTimeout("/output.pdf", 1000);
console.log("Write started...");
// We need to wait for the file write in memfs to finish in node for some reason
await new Promise((resolve, reject) => {
setTimeout(() => {
resolve();
}, 100);
});
fs.unlinkSync("input.pdf");
const data = fs.readFileSync("/output.pdf");
if(data.length == 0) {
throw Error("File Size 0 that should not happen");
}
fs.unlinkSync("output.pdf");
console.log("Your File ist Ready!");
return new Uint8Array(data);
}
export async function manyToOne() {
//TODO: Do this of neccesary for some operations
}
export async function oneToMany() {
//TODO: Do this of neccesary for some operations
}
export async function manyToMany() {
//TODO: Do this of neccesary for some operations
}
// THX: https://stackoverflow.com/questions/26165725/nodejs-check-file-exists-if-not-wait-till-it-exist
function checkExistsWithTimeout(filePath, timeout) {
return new Promise(function (resolve, reject) {
var timer = setTimeout(function () {
watcher.close();
reject(new Error('File did not exists and was not created during the timeout.'));
}, timeout);
fs.access(filePath, fs.constants.R_OK, function (err) {
if (!err) {
clearTimeout(timer);
watcher.close();
resolve();
}
});
var dir = path.dirname(filePath);
var watcher = fs.watch(dir, function (eventType, filename) {
clearTimeout(timer);
watcher.close();
resolve();
});
});
}