Stirling-PDF/routes/api/workflow.js
2023-10-22 01:11:17 +02:00

233 lines
7.8 KiB
JavaScript

import express from 'express';
import crypto from 'crypto';
import stream from "stream";
import Archiver from 'archiver';
import * as Functions from "../../functions.js";
import { traverseOperations } from "../../public/traverseOperations.js";
const activeWorkflows = {};
const router = express.Router();
router.post("/:workflowUuid?", [
async (req, res) => {
if(req.files == null) {
res.status(400).json({"error": "No files were uploaded."});
return;
}
if(Array.isArray(req.files.files)) {
req.files = req.files.files;
}
else {
req.files = [req.files.files];
}
const workflow = JSON.parse(req.body.workflow);
// TODO: Validate input further (json may fail or not be a valid workflow)
const inputs = await Promise.all(req.files.map(async file => {
return {
originalFileName: file.name.replace(/\.[^/.]+$/, ""),
fileName: file.name.replace(/\.[^/.]+$/, ""),
buffer: new Uint8Array(await file.data)
}
}));
// Allow option to do it synchronously and just make a long request
if(req.body.async === "false") {
console.log("Don't do async");
const traverse = traverseOperations(workflow.operations, inputs, Functions);
let pdfResults;
let iteration;
while (true) {
iteration = await traverse.next();
if (iteration.done) {
pdfResults = iteration.value;
console.log("Done");
break;
}
console.log(iteration.value);
}
console.log("Download");
downloadHandler(res, pdfResults);
}
else {
console.log("Start Aync Workflow");
// TODO: UUID collision checks
let workflowID = req.params.workflowUuid
if(!workflowID)
workflowID = generateWorkflowID();
activeWorkflows[workflowID] = {
createdAt: Date.now(),
finished: false,
eventStream: null,
result: null,
// TODO: When auth is implemented: owner
}
const activeWorkflow = activeWorkflows[workflowID];
res.status(200).json({
"workflowID": workflowID,
"data-recieved": {
"fileCount": req.files.length,
"workflow": workflow
}
});
const traverse = traverseOperations(workflow.operations, inputs);
let pdfResults;
let iteration;
while (true) {
iteration = await traverse.next();
if (iteration.done) {
pdfResults = iteration.value;
if(activeWorkflow.eventStream) {
activeWorkflow.eventStream.write(`data: processing done\n\n`);
activeWorkflow.eventStream.end();
}
break;
}
if(activeWorkflow.eventStream)
activeWorkflow.eventStream.write(`data: ${iteration.value}\n\n`);
}
activeWorkflow.result = pdfResults;
activeWorkflow.finished = true;
}
}
]);
router.get("/progress/:workflowUuid", (req, res, nex) => {
if(!req.params.workflowUuid) {
res.status(400).json({"error": "No workflowUuid weres provided."});
return;
}
if(!activeWorkflows.hasOwnProperty(req.params.workflowUuid)) {
res.status(400).json({"error": `No workflow with workflowUuid "${req.params.workflowUuid}" was found.`});
return;
}
// Return current progress
const workflow = activeWorkflows[req.params.workflowUuid];
res.status(200).json({ createdAt: workflow.createdAt, finished: workflow.finished });
});
router.get("/progress-stream/:workflowUuid", (req, res, nex) => {
if(!req.params.workflowUuid) {
res.status(400).json({"error": "No workflowUuid weres provided."});
return;
}
if(!activeWorkflows.hasOwnProperty(req.params.workflowUuid)) {
res.status(400).json({"error": `No workflow with workflowUuid "${req.params.workflowUuid}" was found.`});
return;
}
// TODO: Check if already done
// Send realtime updates
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Access-Control-Allow-Origin', '*');
res.setHeader('Connection', 'keep-alive');
res.flushHeaders(); // flush the headers to establish SSE with client
const workflow = activeWorkflows[req.params.workflowUuid];
workflow.eventStream = res;
res.on('close', () => {
res.end();
// TODO: Abort if not already done?
});
});
router.get("/result/:workflowUuid", (req, res, nex) => {
if(!req.params.workflowUuid) {
res.status(400).json({"error": "No workflowUuid weres provided."});
return;
}
if(!activeWorkflows.hasOwnProperty(req.params.workflowUuid)) {
res.status(400).json({"error": `No workflow with workflowUuid "${req.params.workflowUuid}" was found.`});
return;
}
/*
* If workflow isn't done return error
* Send file, TODO: if there are multiple outputs return as zip
* If download is done, delete results / allow deletion within the next 5-60 mins
*/
const workflow = activeWorkflows[req.params.workflowUuid];
if(!workflow.finished) {
res.status(202).json({ message: "Workflow hasn't finished yet. Check progress or connect to progress-steam to get notified when its done." });
return
}
downloadHandler(res, workflow.result);
// Delete workflow / results when done.
delete activeWorkflows[req.params.workflowUuid];
});
router.post("/abort/:workflowUuid", (req, res, nex) => {
if(!req.params.workflowUuid) {
res.status(400).json({"error": "No workflowUuid weres provided."});
return;
}
if(!activeWorkflows.hasOwnProperty(req.params.workflowUuid)) {
res.status(400).json({"error": `No workflow with workflowUuid "${req.params.workflowUuid}" was found.`});
return;
}
// TODO: Abort workflow
res.status(501).json({"warning": "Abortion has not been implemented yet."});
});
function generateWorkflowID() {
return crypto.randomUUID();
}
function downloadHandler(res, pdfResults) {
if(pdfResults.length == 0) {
res.status(500).json({"warning": "The workflow had no outputs."});
}
else if(pdfResults.length > 1) {
// TODO: Also allow the user to download multiple files without zip compressen, because this is kind of slow...
res.writeHead(200, {
'Content-Type': 'application/zip',
'Content-disposition': 'attachment; filename=workflow-results.zip'
});
var zip = Archiver('zip');
// Stream the file to the user.
zip.pipe(res);
console.log("Adding Files to ZIP...");
for (let i = 0; i < pdfResults.length; i++) {
// TODO: Implement other file types (mostly fro image & text extraction)
// TODO: Check for name collisions
zip.append(Buffer.from(pdfResults[i].buffer), { name: pdfResults[i].fileName + ".pdf" });
}
zip.finalize();
console.log("Sent");
}
else {
const readStream = new stream.PassThrough();
readStream.end(pdfResults[0].buffer);
// TODO: Implement other file types (mostly fro image & text extraction)
res.set("Content-disposition", 'attachment; filename=' + pdfResults[0].fileName + ".pdf");
res.set("Content-Type", "application/pdf");
readStream.pipe(res);
}
}
export default router;