Made split pdf functions conform to the new design pattern.

This commit is contained in:
Saud Fatayerji 2023-11-17 15:52:44 +03:00
parent b4251b56fe
commit 4c8a85726d
5 changed files with 129 additions and 134 deletions

View File

@ -0,0 +1,55 @@
import jsQR from "jsqr";
import { PdfFile } from '../../wrappers/PdfFile.js';
import { getImagesOnPage, PDFJSImage } from "./getImagesOnPage.js";
export async function detectQRCodePages(file: PdfFile) {
console.log("FileInQRPrev: ", file);
const pdfDoc = await file.pdfJsDocument;
console.log("FileInQRAfter: ", file);
const pagesWithQR: number[] = [];
for (let i = 0; i < pdfDoc.numPages; i++) {
console.log("Page:", i, "/", pdfDoc.numPages);
const page = await pdfDoc.getPage(i + 1);
const images = await getImagesOnPage(page);
// console.log("images:", images);
for (const image of images) {
const data = await checkForQROnImage(image);
if(data == "https://github.com/Frooodle/Stirling-PDF") {
pagesWithQR.push(i);
}
}
}
if(pagesWithQR.length == 0) {
console.warn("Could not find any QR Codes in the provided PDF.")
}
return pagesWithQR;
}
async function checkForQROnImage(image: any) {
// TODO: There is an issue with the jsQR package (The package expects rgba but sometimes we have rgb), and the package seems to be stale, we could create a fork and fix the issue. In the meanwhile we just force rgba:
// Check for rgb and convert to rgba
if(image.data.length == image.width * image.height * 3) {
const tmpArray = new Uint8ClampedArray(image.width * image.height * 4);
// Iterate through the original array and add an alpha channel
for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) {
tmpArray[j] = image.data[i]; // Red channel
tmpArray[j + 1] = image.data[i + 1]; // Green channel
tmpArray[j + 2] = image.data[i + 2]; // Blue channel
tmpArray[j + 3] = 255; // Alpha channel (fully opaque)
}
image.data = tmpArray;
}
const code = jsQR(image.data, image.width, image.height);
if(code)
return code.data;
else
return null;
}

View File

@ -1,27 +1,19 @@
import { getPages } from "./common/getPagesByIndex";
import { PdfFile } from '../wrappers/PdfFile';
export type SplitPdfParamsType = {
file: PdfFile;
splitAfterPageArray: number[];
}
export async function splitPDF(params: SplitPdfParamsType): Promise<PdfFile[]> {
const { file, splitAfterPageArray } = params;
import { PdfFile } from '../../wrappers/PdfFile.js';
import { getPages } from "./getPagesByIndex";
export async function splitPagesByIndex(file: PdfFile, splitAfterPageIndexes: number[]): Promise<PdfFile[]> {
const pdfLibDocument = await file.pdfLibDocument;
const numberOfPages = pdfLibDocument.getPages().length;
let pagesArray: number[] = [];
let splitAfter = splitAfterPageArray.shift();
let splitAfter = splitAfterPageIndexes.shift();
const subDocuments: PdfFile[] = [];
for (let i = 0; i < numberOfPages; i++) {
if(splitAfter && i > splitAfter && pagesArray.length > 0) {
subDocuments.push(await getPages(file, pagesArray));
splitAfter = splitAfterPageArray.shift();
splitAfter = splitAfterPageIndexes.shift();
pagesArray = [];
}
pagesArray.push(i);

View File

@ -1,121 +0,0 @@
import jsQR from "jsqr";
import { detectEmptyPages } from "./common/detectEmptyPages.js";
import { getImagesOnPage } from "./common/getImagesOnPage.js";
import { getPages } from "./common/getPagesByIndex.js";
import { PdfFile } from '../wrappers/PdfFile.js';
export type SplitOnParamsType = {
file: PdfFile;
type: "BAR_CODE"|"QR_CODE"|"BLANK_PAGE";
whiteThreashold: number;
}
export async function splitOn(params: SplitOnParamsType) {
const { file, type, whiteThreashold } = params;
let splitAtPages: number[] = [];
console.log("File: ", file);
switch (type) {
case "BAR_CODE":
// TODO: Implement
throw new Error("This split-type has not been implemented yet");
case "QR_CODE":
splitAtPages = await getPagesWithQRCode(file);
break;
case "BLANK_PAGE":
splitAtPages = await detectEmptyPages(file, whiteThreashold);
break;
default:
throw new Error("An invalid split-type was provided.");
}
console.log("Split At Pages: ", splitAtPages);
console.log("File: ", file);
// Remove detected Pages & Split
const pdfDoc = await file.pdfLibDocument;
const numberOfPages = pdfDoc.getPageCount();
let pagesArray: number[] = [];
let splitAfter = splitAtPages.shift();
const subDocuments: PdfFile[] = [];
for (let i = 0; i < numberOfPages; i++) {
console.log(i);
if(i == splitAfter) {
if(pagesArray.length > 0) {
subDocuments.push(await getPages(file, pagesArray));
pagesArray = [];
}
splitAfter = splitAtPages.shift();
}
else { // Skip splitAtPage
console.log("PagesArray")
pagesArray.push(i);
}
}
if(pagesArray.length > 0) {
subDocuments.push(await getPages(file, pagesArray));
}
pagesArray = [];
return subDocuments;
async function getPagesWithQRCode(file: PdfFile) {
console.log("FileInQRPrev: ", file);
const pdfDoc = await file.pdfJsDocument;
console.log("FileInQRAfter: ", file);
const pagesWithQR: number[] = [];
for (let i = 0; i < pdfDoc.numPages; i++) {
console.log("Page:", i, "/", pdfDoc.numPages);
const page = await pdfDoc.getPage(i + 1);
const images = await getImagesOnPage(page);
// console.log("images:", images);
for (const image of images) {
const data = await checkForQROnImage(image);
if(data == "https://github.com/Frooodle/Stirling-PDF") {
pagesWithQR.push(i);
}
}
}
if(pagesWithQR.length == 0) {
console.warn("Could not find any QR Codes in the provided PDF.")
}
return pagesWithQR;
}
async function checkForQROnImage(image: any) {
// TODO: There is an issue with the jsQR package (The package expects rgba but sometimes we have rgb), and the package seems to be stale, we could create a fork and fix the issue. In the meanwhile we just force rgba:
// Check for rgb and convert to rgba
if(image.data.length == image.width * image.height * 3) {
const tmpArray = new Uint8ClampedArray(image.width * image.height * 4);
// Iterate through the original array and add an alpha channel
for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) {
tmpArray[j] = image.data[i]; // Red channel
tmpArray[j + 1] = image.data[i + 1]; // Green channel
tmpArray[j + 2] = image.data[i + 2]; // Blue channel
tmpArray[j + 3] = 255; // Alpha channel (fully opaque)
}
image.data = tmpArray;
}
const code = jsQR(image.data, image.width, image.height);
if(code)
return code.data;
else
return null;
}
};

View File

@ -0,0 +1,44 @@
import { PdfFile } from '../wrappers/PdfFile.js';
import { splitPagesByIndex } from "./common/splitPagesByIndex.js";
import { detectEmptyPages } from "./common/detectEmptyPages.js";
import { detectQRCodePages } from "./common/detectQRCodePages.js";
export type SplitOnParamsType = {
file: PdfFile;
type: "BAR_CODE"|"QR_CODE"|"BLANK_PAGE";
whiteThreashold?: number;
}
export async function splitPagesByPreset(params: SplitOnParamsType): Promise<PdfFile[]> {
const { file, type, whiteThreashold } = params;
console.log("File: ", file);
let splitAtPages: number[];
switch (type) {
case "BAR_CODE":
// TODO: Implement
throw new Error("This split-type has not been implemented yet");
case "QR_CODE":
splitAtPages = await detectQRCodePages(file);
break;
case "BLANK_PAGE":
if (!whiteThreashold)
throw new Error("White threshold not provided");
splitAtPages = await detectEmptyPages(file, whiteThreashold);
break;
default:
throw new Error("An invalid split-type was provided.");
}
console.debug("Split At Pages: ", splitAtPages);
const newFiles = await splitPagesByIndex(file, splitAtPages);
for (let i = 0; i < newFiles.length; i++) {
newFiles[i].filename += "_split-"+i;
}
return newFiles;
};

View File

@ -0,0 +1,25 @@
import { PdfFile } from '../wrappers/PdfFile.js';
import { parsePageIndexSpecification } from './common/pageIndexesUtils'
import { splitPagesByIndex } from './common/splitPagesByIndex.js';
export type SplitPagesParamsType = {
file: PdfFile;
pageIndexes: string | number[];
}
export async function splitPdfByIndex(params: SplitPagesParamsType): Promise<PdfFile[]> {
const { file, pageIndexes } = params;
const pdfLibDocument = await file.pdfLibDocument;
var indexes = pageIndexes;
if (!Array.isArray(indexes)) {
indexes = parsePageIndexSpecification(indexes, pdfLibDocument.getPageCount());
}
const newFiles = await splitPagesByIndex(file, indexes);
for (let i = 0; i < newFiles.length; i++) {
newFiles[i].filename += "_split-"+i;
}
return newFiles;
}