From 4c8a85726de43f966e5ca2ec08e86b0a728913ba Mon Sep 17 00:00:00 2001 From: Saud Fatayerji Date: Fri, 17 Nov 2023 15:52:44 +0300 Subject: [PATCH] Made split pdf functions conform to the new design pattern. --- .../src/functions/common/detectQRCodePages.ts | 55 ++++++++ .../splitPagesByIndex.ts} | 18 +-- shared-operations/src/functions/splitOn.ts | 121 ------------------ .../src/functions/splitPagesByPreset.ts | 44 +++++++ .../src/functions/splitPdfByIndex.ts | 25 ++++ 5 files changed, 129 insertions(+), 134 deletions(-) create mode 100644 shared-operations/src/functions/common/detectQRCodePages.ts rename shared-operations/src/functions/{splitPDF.ts => common/splitPagesByIndex.ts} (56%) delete mode 100644 shared-operations/src/functions/splitOn.ts create mode 100644 shared-operations/src/functions/splitPagesByPreset.ts create mode 100644 shared-operations/src/functions/splitPdfByIndex.ts diff --git a/shared-operations/src/functions/common/detectQRCodePages.ts b/shared-operations/src/functions/common/detectQRCodePages.ts new file mode 100644 index 00000000..0f086494 --- /dev/null +++ b/shared-operations/src/functions/common/detectQRCodePages.ts @@ -0,0 +1,55 @@ + +import jsQR from "jsqr"; + +import { PdfFile } from '../../wrappers/PdfFile.js'; +import { getImagesOnPage, PDFJSImage } from "./getImagesOnPage.js"; + +export async function detectQRCodePages(file: PdfFile) { + console.log("FileInQRPrev: ", file); + const pdfDoc = await file.pdfJsDocument; + console.log("FileInQRAfter: ", file); + + const pagesWithQR: number[] = []; + for (let i = 0; i < pdfDoc.numPages; i++) { + console.log("Page:", i, "/", pdfDoc.numPages); + const page = await pdfDoc.getPage(i + 1); + + const images = await getImagesOnPage(page); + // console.log("images:", images); + for (const image of images) { + const data = await checkForQROnImage(image); + if(data == "https://github.com/Frooodle/Stirling-PDF") { + pagesWithQR.push(i); + } + } + } + if(pagesWithQR.length == 0) { + console.warn("Could not find any QR Codes in the provided PDF.") + } + return pagesWithQR; +} + +async function checkForQROnImage(image: any) { + // TODO: There is an issue with the jsQR package (The package expects rgba but sometimes we have rgb), and the package seems to be stale, we could create a fork and fix the issue. In the meanwhile we just force rgba: + // Check for rgb and convert to rgba + + if(image.data.length == image.width * image.height * 3) { + const tmpArray = new Uint8ClampedArray(image.width * image.height * 4); + + // Iterate through the original array and add an alpha channel + for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) { + tmpArray[j] = image.data[i]; // Red channel + tmpArray[j + 1] = image.data[i + 1]; // Green channel + tmpArray[j + 2] = image.data[i + 2]; // Blue channel + tmpArray[j + 3] = 255; // Alpha channel (fully opaque) + } + + image.data = tmpArray; + } + + const code = jsQR(image.data, image.width, image.height); + if(code) + return code.data; + else + return null; +} \ No newline at end of file diff --git a/shared-operations/src/functions/splitPDF.ts b/shared-operations/src/functions/common/splitPagesByIndex.ts similarity index 56% rename from shared-operations/src/functions/splitPDF.ts rename to shared-operations/src/functions/common/splitPagesByIndex.ts index d5dfe948..03621298 100644 --- a/shared-operations/src/functions/splitPDF.ts +++ b/shared-operations/src/functions/common/splitPagesByIndex.ts @@ -1,27 +1,19 @@ -import { getPages } from "./common/getPagesByIndex"; -import { PdfFile } from '../wrappers/PdfFile'; - -export type SplitPdfParamsType = { - file: PdfFile; - splitAfterPageArray: number[]; -} - -export async function splitPDF(params: SplitPdfParamsType): Promise { - const { file, splitAfterPageArray } = params; +import { PdfFile } from '../../wrappers/PdfFile.js'; +import { getPages } from "./getPagesByIndex"; +export async function splitPagesByIndex(file: PdfFile, splitAfterPageIndexes: number[]): Promise { const pdfLibDocument = await file.pdfLibDocument; - const numberOfPages = pdfLibDocument.getPages().length; let pagesArray: number[] = []; - let splitAfter = splitAfterPageArray.shift(); + let splitAfter = splitAfterPageIndexes.shift(); const subDocuments: PdfFile[] = []; for (let i = 0; i < numberOfPages; i++) { if(splitAfter && i > splitAfter && pagesArray.length > 0) { subDocuments.push(await getPages(file, pagesArray)); - splitAfter = splitAfterPageArray.shift(); + splitAfter = splitAfterPageIndexes.shift(); pagesArray = []; } pagesArray.push(i); diff --git a/shared-operations/src/functions/splitOn.ts b/shared-operations/src/functions/splitOn.ts deleted file mode 100644 index 5f79c09d..00000000 --- a/shared-operations/src/functions/splitOn.ts +++ /dev/null @@ -1,121 +0,0 @@ - -import jsQR from "jsqr"; - -import { detectEmptyPages } from "./common/detectEmptyPages.js"; -import { getImagesOnPage } from "./common/getImagesOnPage.js"; -import { getPages } from "./common/getPagesByIndex.js"; -import { PdfFile } from '../wrappers/PdfFile.js'; - -export type SplitOnParamsType = { - file: PdfFile; - type: "BAR_CODE"|"QR_CODE"|"BLANK_PAGE"; - whiteThreashold: number; -} - -export async function splitOn(params: SplitOnParamsType) { - const { file, type, whiteThreashold } = params; - - let splitAtPages: number[] = []; - - console.log("File: ", file); - - switch (type) { - case "BAR_CODE": - // TODO: Implement - throw new Error("This split-type has not been implemented yet"); - - case "QR_CODE": - splitAtPages = await getPagesWithQRCode(file); - break; - - case "BLANK_PAGE": - splitAtPages = await detectEmptyPages(file, whiteThreashold); - break; - - default: - throw new Error("An invalid split-type was provided."); - } - - console.log("Split At Pages: ", splitAtPages); - - console.log("File: ", file); - - // Remove detected Pages & Split - const pdfDoc = await file.pdfLibDocument; - const numberOfPages = pdfDoc.getPageCount(); - - let pagesArray: number[] = []; - let splitAfter = splitAtPages.shift(); - const subDocuments: PdfFile[] = []; - - for (let i = 0; i < numberOfPages; i++) { - console.log(i); - if(i == splitAfter) { - if(pagesArray.length > 0) { - subDocuments.push(await getPages(file, pagesArray)); - pagesArray = []; - } - splitAfter = splitAtPages.shift(); - } - else { // Skip splitAtPage - console.log("PagesArray") - pagesArray.push(i); - } - } - if(pagesArray.length > 0) { - subDocuments.push(await getPages(file, pagesArray)); - } - pagesArray = []; - - return subDocuments; - - async function getPagesWithQRCode(file: PdfFile) { - console.log("FileInQRPrev: ", file); - const pdfDoc = await file.pdfJsDocument; - console.log("FileInQRAfter: ", file); - - const pagesWithQR: number[] = []; - for (let i = 0; i < pdfDoc.numPages; i++) { - console.log("Page:", i, "/", pdfDoc.numPages); - const page = await pdfDoc.getPage(i + 1); - - const images = await getImagesOnPage(page); - // console.log("images:", images); - for (const image of images) { - const data = await checkForQROnImage(image); - if(data == "https://github.com/Frooodle/Stirling-PDF") { - pagesWithQR.push(i); - } - } - } - if(pagesWithQR.length == 0) { - console.warn("Could not find any QR Codes in the provided PDF.") - } - return pagesWithQR; - } - - async function checkForQROnImage(image: any) { - // TODO: There is an issue with the jsQR package (The package expects rgba but sometimes we have rgb), and the package seems to be stale, we could create a fork and fix the issue. In the meanwhile we just force rgba: - // Check for rgb and convert to rgba - - if(image.data.length == image.width * image.height * 3) { - const tmpArray = new Uint8ClampedArray(image.width * image.height * 4); - - // Iterate through the original array and add an alpha channel - for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) { - tmpArray[j] = image.data[i]; // Red channel - tmpArray[j + 1] = image.data[i + 1]; // Green channel - tmpArray[j + 2] = image.data[i + 2]; // Blue channel - tmpArray[j + 3] = 255; // Alpha channel (fully opaque) - } - - image.data = tmpArray; - } - - const code = jsQR(image.data, image.width, image.height); - if(code) - return code.data; - else - return null; - } -}; diff --git a/shared-operations/src/functions/splitPagesByPreset.ts b/shared-operations/src/functions/splitPagesByPreset.ts new file mode 100644 index 00000000..d0892dd6 --- /dev/null +++ b/shared-operations/src/functions/splitPagesByPreset.ts @@ -0,0 +1,44 @@ + +import { PdfFile } from '../wrappers/PdfFile.js'; +import { splitPagesByIndex } from "./common/splitPagesByIndex.js"; +import { detectEmptyPages } from "./common/detectEmptyPages.js"; +import { detectQRCodePages } from "./common/detectQRCodePages.js"; + +export type SplitOnParamsType = { + file: PdfFile; + type: "BAR_CODE"|"QR_CODE"|"BLANK_PAGE"; + whiteThreashold?: number; +} +export async function splitPagesByPreset(params: SplitOnParamsType): Promise { + const { file, type, whiteThreashold } = params; + + console.log("File: ", file); + + let splitAtPages: number[]; + switch (type) { + case "BAR_CODE": + // TODO: Implement + throw new Error("This split-type has not been implemented yet"); + + case "QR_CODE": + splitAtPages = await detectQRCodePages(file); + break; + + case "BLANK_PAGE": + if (!whiteThreashold) + throw new Error("White threshold not provided"); + splitAtPages = await detectEmptyPages(file, whiteThreashold); + break; + + default: + throw new Error("An invalid split-type was provided."); + } + + console.debug("Split At Pages: ", splitAtPages); + + const newFiles = await splitPagesByIndex(file, splitAtPages); + for (let i = 0; i < newFiles.length; i++) { + newFiles[i].filename += "_split-"+i; + } + return newFiles; +}; diff --git a/shared-operations/src/functions/splitPdfByIndex.ts b/shared-operations/src/functions/splitPdfByIndex.ts new file mode 100644 index 00000000..0de29b0a --- /dev/null +++ b/shared-operations/src/functions/splitPdfByIndex.ts @@ -0,0 +1,25 @@ + +import { PdfFile } from '../wrappers/PdfFile.js'; +import { parsePageIndexSpecification } from './common/pageIndexesUtils' +import { splitPagesByIndex } from './common/splitPagesByIndex.js'; + +export type SplitPagesParamsType = { + file: PdfFile; + pageIndexes: string | number[]; +} +export async function splitPdfByIndex(params: SplitPagesParamsType): Promise { + const { file, pageIndexes } = params; + const pdfLibDocument = await file.pdfLibDocument; + + var indexes = pageIndexes; + + if (!Array.isArray(indexes)) { + indexes = parsePageIndexSpecification(indexes, pdfLibDocument.getPageCount()); + } + + const newFiles = await splitPagesByIndex(file, indexes); + for (let i = 0; i < newFiles.length; i++) { + newFiles[i].filename += "_split-"+i; + } + return newFiles; +}