extract, rotate, split

This commit is contained in:
Felix Kaspar 2023-10-17 03:40:54 +02:00
parent 8e8c4596bf
commit fddbec2408
6 changed files with 185 additions and 5 deletions

View File

@ -3,8 +3,7 @@
// https://cdn.discordapp.com/attachments/1099390571493195898/1118192753759764520/image.png?ex=6537dba7&is=652566a7&hm=dc46820ef7c34bc37424794966c5f66f93ba0e15a740742c364d47d31ea119a9&
export const discordWorkflow = {
outputOptions: {
zip: false,
awaitAllDone: true
zip: false
},
operations: [
{
@ -72,8 +71,7 @@ export const discordWorkflow = {
// This will merge all input files into one giant document
export const mergeOnly = {
outputOptions: {
zip: false,
awaitAllDone: true
zip: false
},
operations: [
{
@ -82,4 +80,46 @@ export const mergeOnly = {
operations: []
}
]
}
// Extract Pages and store them in a new document
export const extractOnly = {
outputOptions: {
zip: false
},
operations: [
{
type: "extract",
values: { "pagesToExtractArray": [0, 2] },
operations: []
}
]
}
// Split a document up into multiple documents
export const splitOnly = {
outputOptions: {
zip: false
},
operations: [
{
type: "split",
values: { "pagesToSplitAfterArray": [2, 10] },
operations: []
}
]
}
// Split a document up into multiple documents
export const rotateOnly = {
outputOptions: {
zip: false
},
operations: [
{
type: "rotate",
values: { "rotation": -90 },
operations: []
}
]
}

View File

@ -0,0 +1,25 @@
const { PDFDocument, ParseSpeeds } = PDFLib;
export const extractPages = async (snapshot, pagesToExtractArray) => {
const pdfDoc = await PDFDocument.load(snapshot)
// TODO: invent a better format for pagesToExtractArray and convert it.
return createSubDocument(pdfDoc, pagesToExtractArray);
};
export async function createSubDocument(pdfDoc, pagesToExtractArray) {
const subDocument = await PDFDocument.create();
// Check that array max number is not larger pdf pages number
if(Math.max(...pagesToExtractArray) >= pdfDoc.getPageCount()) {
throw new Error(`The PDF document only has ${pdfDoc.getPageCount()} pages and you tried to extract page ${Math.max(...pagesToExtractArray)}`);
}
const copiedPages = await subDocument.copyPages(pdfDoc, pagesToExtractArray);
for (let i = 0; i < copiedPages.length; i++) {
subDocument.addPage(copiedPages[i]);
}
return subDocument.save();
}

View File

@ -0,0 +1,18 @@
const { PDFDocument, ParseSpeeds, degrees } = PDFLib;
export const rotatePages = async (snapshot, rotation) => {
// Load the original PDF file
const pdfDoc = await PDFDocument.load(snapshot, {
parseSpeed: ParseSpeeds.Fastest,
});
const pages = pdfDoc.getPages();
pages.forEach(page => {
// Change page size
page.setRotation(degrees(rotation))
});
// Serialize the modified document
return pdfDoc.save();
};

View File

@ -0,0 +1,26 @@
import { createSubDocument } from "./extractPages.js";
const { PDFDocument, ParseSpeeds } = PDFLib;
export const splitPDF = async (snapshot, splitAfterPageArray) => {
const pdfDoc = await PDFDocument.load(snapshot)
const numberOfPages = pdfDoc.getPages().length;
let pagesArray = [];
let splitAfter = splitAfterPageArray.shift();
const subDocuments = [];
for (let i = 0; i < numberOfPages; i++) {
if(i > splitAfter && pagesArray.length > 0) {
subDocuments.push(await createSubDocument(pdfDoc, pagesArray));
splitAfter = splitAfterPageArray.shift();
pagesArray = [];
}
pagesArray.push(i);
}
subDocuments.push(await createSubDocument(pdfDoc, pagesArray));
pagesArray = [];
return subDocuments;
};

View File

@ -55,4 +55,4 @@ import { traverseOperations } from "./traverseOperations.js";
// }
// }
});
})(exampleWorkflows.mergeOnly);
})(exampleWorkflows.rotateOnly);

View File

@ -1,4 +1,7 @@
import { extractPages } from "./functions/extractPages.js";
import { mergePDFs } from "./functions/mergePDFs.js";
import { rotatePages } from "./functions/rotatePDF.js";
import { splitPDF } from "./functions/splitPDF.js";
import { organizeWaitOperations } from "./organizeWaitOperations.js";
export async function traverseOperations(operations, input) {
@ -34,6 +37,8 @@ export async function traverseOperations(operations, input) {
}
break;
case "removeObjects":
console.warn("RemoveObjects not implemented yet.")
if(Array.isArray(input)) {
for (let i = 0; i < input.length; i++) {
// TODO: modfiy input
@ -52,16 +57,47 @@ export async function traverseOperations(operations, input) {
for (let i = 0; i < input.length; i++) {
// TODO: modfiy input
input[i].fileName += "_extractedPages";
input[i].buffer = await extractPages(input[i].buffer, operation.values["pagesToExtractArray"]);
await nextOperation(operation.operations, input[i]);
}
}
else {
// TODO: modfiy input
input.fileName += "_extractedPages";
input.buffer = await extractPages(input.buffer, operation.values["pagesToExtractArray"]);
await nextOperation(operation.operations, input);
}
break;
case "split":
// TODO: When a split goes into a wait function it might break the done condition, as it will count multiplpe times.
if(Array.isArray(input)) {
for (let i = 0; i < input.length; i++) {
const splits = await splitPDF(input[i].buffer, operation.values["pagesToSplitAfterArray"]);
for (let j = 0; j < splits.length; j++) {
const split = {};
split.originalFileName = input[i].originalFileName;
split.fileName = input[i].fileName + "_split";
split.buffer = splits[j];
await nextOperation(operation.operations, split);
}
}
}
else {
const splits = await splitPDF(input.buffer, operation.values["pagesToSplitAfterArray"]);
for (let j = 0; j < splits.length; j++) {
const split = {};
split.originalFileName = input.originalFileName;
split.fileName = input.fileName + "_split";
split.buffer = splits[j];
await nextOperation(operation.operations, split);
}
}
break;
case "fillField":
console.warn("FillField not implemented yet.")
if(Array.isArray(input)) {
for (let i = 0; i < input.length; i++) {
// TODO: modfiy input
@ -76,6 +112,8 @@ export async function traverseOperations(operations, input) {
}
break;
case "extractImages":
console.warn("ExtractImages not implemented yet.")
if(Array.isArray(input)) {
for (let i = 0; i < input.length; i++) {
// TODO: modfiy input
@ -105,6 +143,7 @@ export async function traverseOperations(operations, input) {
await nextOperation(operation.operations, input);
break;
case "transform": {
console.warn("Transform not implemented yet.")
if(Array.isArray(input)) {
for (let i = 0; i < input.length; i++) {
// TODO: modfiy input
@ -119,6 +158,38 @@ export async function traverseOperations(operations, input) {
}
break;
}
case "extract":
if(Array.isArray(input)) {
for (let i = 0; i < input.length; i++) {
// TODO: modfiy input
input[i].fileName += "_extractedPages";
input[i].buffer = await extractPages(input[i].buffer, operation.values["pagesToExtractArray"]);
await nextOperation(operation.operations, input[i]);
}
}
else {
// TODO: modfiy input
input.fileName += "_extractedPages";
input.buffer = await extractPages(input.buffer, operation.values["pagesToExtractArray"]);
await nextOperation(operation.operations, input);
}
break;
case "rotate":
if(Array.isArray(input)) {
for (let i = 0; i < input.length; i++) {
// TODO: modfiy input
input[i].fileName += "_turned";
input[i].buffer = await rotatePages(input[i].buffer, operation.values["rotation"]);
await nextOperation(operation.operations, input[i]);
}
}
else {
// TODO: modfiy input
input.fileName += "_turned";
input.buffer = await rotatePages(input.buffer, operation.values["rotation"]);
await nextOperation(operation.operations, input);
}
break;
default:
console.log("operation type unknown: ", operation.type);
break;