Stirling-PDF/shared-operations/functions/splitOn.ts

113 lines
4.0 KiB
TypeScript
Raw Normal View History

import { PDFDocument } from 'pdf-lib';
import PDFJS from 'pdfjs-dist';
2023-11-10 19:08:07 +01:00
import jsQR from "jsqr";
2023-10-27 02:56:13 +02:00
import { detectEmptyPages } from "./common/detectEmptyPages.js";
import { getImagesOnPage } from "./common/getImagesOnPage.js";
import { createSubDocument } from "./common/createSubDocument.js";
import { TypedArray, DocumentInitParameters } from 'pdfjs-dist/types/src/display/api.js';
export async function splitOn(
snapshot: string | ArrayBuffer | Uint8Array,
type: "BAR_CODE"|"QR_CODE"|"BLANK_PAGE",
2023-11-10 19:08:07 +01:00
whiteThreashold: number) {
let splitAtPages: number[] = [];
switch (type) {
case "BAR_CODE":
// TODO: Implement
2023-10-27 02:56:13 +02:00
throw new Error("This split-type has not been implemented yet");
case "QR_CODE":
2023-10-27 02:56:13 +02:00
splitAtPages = await getPagesWithQRCode(snapshot);
break;
case "BLANK_PAGE":
splitAtPages = await detectEmptyPages(snapshot, whiteThreashold);
break;
default:
throw new Error("An invalid split-type was provided.");
}
console.log("Split At Pages: ", splitAtPages);
2023-10-28 19:30:12 +02:00
// Remove detected Pages & Split
const pdfDoc = await PDFDocument.load(snapshot);
2023-10-28 19:30:12 +02:00
const numberOfPages = pdfDoc.getPages().length;
let pagesArray: number[] = [];
2023-10-28 19:30:12 +02:00
let splitAfter = splitAtPages.shift();
const subDocuments: Uint8Array[] = [];
2023-10-28 19:30:12 +02:00
for (let i = 0; i < numberOfPages; i++) {
console.log(i);
if(i == splitAfter) {
if(pagesArray.length > 0) {
subDocuments.push(await createSubDocument(pdfDoc, pagesArray));
2023-10-28 19:30:12 +02:00
pagesArray = [];
}
splitAfter = splitAtPages.shift();
}
else { // Skip splitAtPage
console.log("PagesArray")
pagesArray.push(i);
}
}
if(pagesArray.length > 0) {
subDocuments.push(await createSubDocument(pdfDoc, pagesArray));
2023-10-28 19:30:12 +02:00
}
pagesArray = [];
return subDocuments;
2023-10-27 02:56:13 +02:00
async function getPagesWithQRCode(snapshot: string | ArrayBuffer | URL | TypedArray | DocumentInitParameters) {
2023-10-27 02:56:13 +02:00
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
const pagesWithQR: number[] = [];
2023-10-28 19:30:12 +02:00
for (let i = 0; i < pdfDoc.numPages; i++) {
console.log("Page:", i, "/", pdfDoc.numPages);
2023-10-28 19:30:12 +02:00
const page = await pdfDoc.getPage(i + 1);
2023-10-27 02:56:13 +02:00
const images = await getImagesOnPage(page);
console.log("images:", images);
2023-10-27 02:56:13 +02:00
for (const image of images) {
const data = await checkForQROnImage(image);
if(data == "https://github.com/Frooodle/Stirling-PDF") {
pagesWithQR.push(i);
}
}
}
if(pagesWithQR.length == 0) {
console.warn("Could not find any QR Codes in the provided PDF.")
}
2023-10-27 02:56:13 +02:00
return pagesWithQR;
}
2023-11-10 19:08:07 +01:00
async function checkForQROnImage(image: any) {
2023-10-27 03:14:22 +02:00
// TODO: There is an issue with the jsQR package (The package expects rgba but sometimes we have rgb), and the package seems to be stale, we could create a fork and fix the issue. In the meanwhile we just force rgba:
// Check for rgb and convert to rgba
2023-10-27 02:56:13 +02:00
if(image.data.length == image.width * image.height * 3) {
const tmpArray = new Uint8ClampedArray(image.width * image.height * 4);
// Iterate through the original array and add an alpha channel
for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) {
tmpArray[j] = image.data[i]; // Red channel
tmpArray[j + 1] = image.data[i + 1]; // Green channel
tmpArray[j + 2] = image.data[i + 2]; // Blue channel
tmpArray[j + 3] = 255; // Alpha channel (fully opaque)
}
image.data = tmpArray;
}
const code = jsQR(image.data, image.width, image.height);
if(code)
return code.data;
else
return null;
}
};