mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-02-07 00:17:07 +01:00
Made split pdf functions conform to the new design pattern.
This commit is contained in:
parent
b4251b56fe
commit
4c8a85726d
55
shared-operations/src/functions/common/detectQRCodePages.ts
Normal file
55
shared-operations/src/functions/common/detectQRCodePages.ts
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
|
||||||
|
import jsQR from "jsqr";
|
||||||
|
|
||||||
|
import { PdfFile } from '../../wrappers/PdfFile.js';
|
||||||
|
import { getImagesOnPage, PDFJSImage } from "./getImagesOnPage.js";
|
||||||
|
|
||||||
|
export async function detectQRCodePages(file: PdfFile) {
|
||||||
|
console.log("FileInQRPrev: ", file);
|
||||||
|
const pdfDoc = await file.pdfJsDocument;
|
||||||
|
console.log("FileInQRAfter: ", file);
|
||||||
|
|
||||||
|
const pagesWithQR: number[] = [];
|
||||||
|
for (let i = 0; i < pdfDoc.numPages; i++) {
|
||||||
|
console.log("Page:", i, "/", pdfDoc.numPages);
|
||||||
|
const page = await pdfDoc.getPage(i + 1);
|
||||||
|
|
||||||
|
const images = await getImagesOnPage(page);
|
||||||
|
// console.log("images:", images);
|
||||||
|
for (const image of images) {
|
||||||
|
const data = await checkForQROnImage(image);
|
||||||
|
if(data == "https://github.com/Frooodle/Stirling-PDF") {
|
||||||
|
pagesWithQR.push(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(pagesWithQR.length == 0) {
|
||||||
|
console.warn("Could not find any QR Codes in the provided PDF.")
|
||||||
|
}
|
||||||
|
return pagesWithQR;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkForQROnImage(image: any) {
|
||||||
|
// TODO: There is an issue with the jsQR package (The package expects rgba but sometimes we have rgb), and the package seems to be stale, we could create a fork and fix the issue. In the meanwhile we just force rgba:
|
||||||
|
// Check for rgb and convert to rgba
|
||||||
|
|
||||||
|
if(image.data.length == image.width * image.height * 3) {
|
||||||
|
const tmpArray = new Uint8ClampedArray(image.width * image.height * 4);
|
||||||
|
|
||||||
|
// Iterate through the original array and add an alpha channel
|
||||||
|
for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) {
|
||||||
|
tmpArray[j] = image.data[i]; // Red channel
|
||||||
|
tmpArray[j + 1] = image.data[i + 1]; // Green channel
|
||||||
|
tmpArray[j + 2] = image.data[i + 2]; // Blue channel
|
||||||
|
tmpArray[j + 3] = 255; // Alpha channel (fully opaque)
|
||||||
|
}
|
||||||
|
|
||||||
|
image.data = tmpArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
const code = jsQR(image.data, image.width, image.height);
|
||||||
|
if(code)
|
||||||
|
return code.data;
|
||||||
|
else
|
||||||
|
return null;
|
||||||
|
}
|
@ -1,27 +1,19 @@
|
|||||||
|
|
||||||
import { getPages } from "./common/getPagesByIndex";
|
import { PdfFile } from '../../wrappers/PdfFile.js';
|
||||||
import { PdfFile } from '../wrappers/PdfFile';
|
import { getPages } from "./getPagesByIndex";
|
||||||
|
|
||||||
export type SplitPdfParamsType = {
|
|
||||||
file: PdfFile;
|
|
||||||
splitAfterPageArray: number[];
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function splitPDF(params: SplitPdfParamsType): Promise<PdfFile[]> {
|
|
||||||
const { file, splitAfterPageArray } = params;
|
|
||||||
|
|
||||||
|
export async function splitPagesByIndex(file: PdfFile, splitAfterPageIndexes: number[]): Promise<PdfFile[]> {
|
||||||
const pdfLibDocument = await file.pdfLibDocument;
|
const pdfLibDocument = await file.pdfLibDocument;
|
||||||
|
|
||||||
const numberOfPages = pdfLibDocument.getPages().length;
|
const numberOfPages = pdfLibDocument.getPages().length;
|
||||||
|
|
||||||
let pagesArray: number[] = [];
|
let pagesArray: number[] = [];
|
||||||
let splitAfter = splitAfterPageArray.shift();
|
let splitAfter = splitAfterPageIndexes.shift();
|
||||||
const subDocuments: PdfFile[] = [];
|
const subDocuments: PdfFile[] = [];
|
||||||
|
|
||||||
for (let i = 0; i < numberOfPages; i++) {
|
for (let i = 0; i < numberOfPages; i++) {
|
||||||
if(splitAfter && i > splitAfter && pagesArray.length > 0) {
|
if(splitAfter && i > splitAfter && pagesArray.length > 0) {
|
||||||
subDocuments.push(await getPages(file, pagesArray));
|
subDocuments.push(await getPages(file, pagesArray));
|
||||||
splitAfter = splitAfterPageArray.shift();
|
splitAfter = splitAfterPageIndexes.shift();
|
||||||
pagesArray = [];
|
pagesArray = [];
|
||||||
}
|
}
|
||||||
pagesArray.push(i);
|
pagesArray.push(i);
|
@ -1,121 +0,0 @@
|
|||||||
|
|
||||||
import jsQR from "jsqr";
|
|
||||||
|
|
||||||
import { detectEmptyPages } from "./common/detectEmptyPages.js";
|
|
||||||
import { getImagesOnPage } from "./common/getImagesOnPage.js";
|
|
||||||
import { getPages } from "./common/getPagesByIndex.js";
|
|
||||||
import { PdfFile } from '../wrappers/PdfFile.js';
|
|
||||||
|
|
||||||
export type SplitOnParamsType = {
|
|
||||||
file: PdfFile;
|
|
||||||
type: "BAR_CODE"|"QR_CODE"|"BLANK_PAGE";
|
|
||||||
whiteThreashold: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function splitOn(params: SplitOnParamsType) {
|
|
||||||
const { file, type, whiteThreashold } = params;
|
|
||||||
|
|
||||||
let splitAtPages: number[] = [];
|
|
||||||
|
|
||||||
console.log("File: ", file);
|
|
||||||
|
|
||||||
switch (type) {
|
|
||||||
case "BAR_CODE":
|
|
||||||
// TODO: Implement
|
|
||||||
throw new Error("This split-type has not been implemented yet");
|
|
||||||
|
|
||||||
case "QR_CODE":
|
|
||||||
splitAtPages = await getPagesWithQRCode(file);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case "BLANK_PAGE":
|
|
||||||
splitAtPages = await detectEmptyPages(file, whiteThreashold);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
throw new Error("An invalid split-type was provided.");
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log("Split At Pages: ", splitAtPages);
|
|
||||||
|
|
||||||
console.log("File: ", file);
|
|
||||||
|
|
||||||
// Remove detected Pages & Split
|
|
||||||
const pdfDoc = await file.pdfLibDocument;
|
|
||||||
const numberOfPages = pdfDoc.getPageCount();
|
|
||||||
|
|
||||||
let pagesArray: number[] = [];
|
|
||||||
let splitAfter = splitAtPages.shift();
|
|
||||||
const subDocuments: PdfFile[] = [];
|
|
||||||
|
|
||||||
for (let i = 0; i < numberOfPages; i++) {
|
|
||||||
console.log(i);
|
|
||||||
if(i == splitAfter) {
|
|
||||||
if(pagesArray.length > 0) {
|
|
||||||
subDocuments.push(await getPages(file, pagesArray));
|
|
||||||
pagesArray = [];
|
|
||||||
}
|
|
||||||
splitAfter = splitAtPages.shift();
|
|
||||||
}
|
|
||||||
else { // Skip splitAtPage
|
|
||||||
console.log("PagesArray")
|
|
||||||
pagesArray.push(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(pagesArray.length > 0) {
|
|
||||||
subDocuments.push(await getPages(file, pagesArray));
|
|
||||||
}
|
|
||||||
pagesArray = [];
|
|
||||||
|
|
||||||
return subDocuments;
|
|
||||||
|
|
||||||
async function getPagesWithQRCode(file: PdfFile) {
|
|
||||||
console.log("FileInQRPrev: ", file);
|
|
||||||
const pdfDoc = await file.pdfJsDocument;
|
|
||||||
console.log("FileInQRAfter: ", file);
|
|
||||||
|
|
||||||
const pagesWithQR: number[] = [];
|
|
||||||
for (let i = 0; i < pdfDoc.numPages; i++) {
|
|
||||||
console.log("Page:", i, "/", pdfDoc.numPages);
|
|
||||||
const page = await pdfDoc.getPage(i + 1);
|
|
||||||
|
|
||||||
const images = await getImagesOnPage(page);
|
|
||||||
// console.log("images:", images);
|
|
||||||
for (const image of images) {
|
|
||||||
const data = await checkForQROnImage(image);
|
|
||||||
if(data == "https://github.com/Frooodle/Stirling-PDF") {
|
|
||||||
pagesWithQR.push(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(pagesWithQR.length == 0) {
|
|
||||||
console.warn("Could not find any QR Codes in the provided PDF.")
|
|
||||||
}
|
|
||||||
return pagesWithQR;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function checkForQROnImage(image: any) {
|
|
||||||
// TODO: There is an issue with the jsQR package (The package expects rgba but sometimes we have rgb), and the package seems to be stale, we could create a fork and fix the issue. In the meanwhile we just force rgba:
|
|
||||||
// Check for rgb and convert to rgba
|
|
||||||
|
|
||||||
if(image.data.length == image.width * image.height * 3) {
|
|
||||||
const tmpArray = new Uint8ClampedArray(image.width * image.height * 4);
|
|
||||||
|
|
||||||
// Iterate through the original array and add an alpha channel
|
|
||||||
for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) {
|
|
||||||
tmpArray[j] = image.data[i]; // Red channel
|
|
||||||
tmpArray[j + 1] = image.data[i + 1]; // Green channel
|
|
||||||
tmpArray[j + 2] = image.data[i + 2]; // Blue channel
|
|
||||||
tmpArray[j + 3] = 255; // Alpha channel (fully opaque)
|
|
||||||
}
|
|
||||||
|
|
||||||
image.data = tmpArray;
|
|
||||||
}
|
|
||||||
|
|
||||||
const code = jsQR(image.data, image.width, image.height);
|
|
||||||
if(code)
|
|
||||||
return code.data;
|
|
||||||
else
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
};
|
|
44
shared-operations/src/functions/splitPagesByPreset.ts
Normal file
44
shared-operations/src/functions/splitPagesByPreset.ts
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
|
||||||
|
import { PdfFile } from '../wrappers/PdfFile.js';
|
||||||
|
import { splitPagesByIndex } from "./common/splitPagesByIndex.js";
|
||||||
|
import { detectEmptyPages } from "./common/detectEmptyPages.js";
|
||||||
|
import { detectQRCodePages } from "./common/detectQRCodePages.js";
|
||||||
|
|
||||||
|
export type SplitOnParamsType = {
|
||||||
|
file: PdfFile;
|
||||||
|
type: "BAR_CODE"|"QR_CODE"|"BLANK_PAGE";
|
||||||
|
whiteThreashold?: number;
|
||||||
|
}
|
||||||
|
export async function splitPagesByPreset(params: SplitOnParamsType): Promise<PdfFile[]> {
|
||||||
|
const { file, type, whiteThreashold } = params;
|
||||||
|
|
||||||
|
console.log("File: ", file);
|
||||||
|
|
||||||
|
let splitAtPages: number[];
|
||||||
|
switch (type) {
|
||||||
|
case "BAR_CODE":
|
||||||
|
// TODO: Implement
|
||||||
|
throw new Error("This split-type has not been implemented yet");
|
||||||
|
|
||||||
|
case "QR_CODE":
|
||||||
|
splitAtPages = await detectQRCodePages(file);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "BLANK_PAGE":
|
||||||
|
if (!whiteThreashold)
|
||||||
|
throw new Error("White threshold not provided");
|
||||||
|
splitAtPages = await detectEmptyPages(file, whiteThreashold);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
throw new Error("An invalid split-type was provided.");
|
||||||
|
}
|
||||||
|
|
||||||
|
console.debug("Split At Pages: ", splitAtPages);
|
||||||
|
|
||||||
|
const newFiles = await splitPagesByIndex(file, splitAtPages);
|
||||||
|
for (let i = 0; i < newFiles.length; i++) {
|
||||||
|
newFiles[i].filename += "_split-"+i;
|
||||||
|
}
|
||||||
|
return newFiles;
|
||||||
|
};
|
25
shared-operations/src/functions/splitPdfByIndex.ts
Normal file
25
shared-operations/src/functions/splitPdfByIndex.ts
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
|
||||||
|
import { PdfFile } from '../wrappers/PdfFile.js';
|
||||||
|
import { parsePageIndexSpecification } from './common/pageIndexesUtils'
|
||||||
|
import { splitPagesByIndex } from './common/splitPagesByIndex.js';
|
||||||
|
|
||||||
|
export type SplitPagesParamsType = {
|
||||||
|
file: PdfFile;
|
||||||
|
pageIndexes: string | number[];
|
||||||
|
}
|
||||||
|
export async function splitPdfByIndex(params: SplitPagesParamsType): Promise<PdfFile[]> {
|
||||||
|
const { file, pageIndexes } = params;
|
||||||
|
const pdfLibDocument = await file.pdfLibDocument;
|
||||||
|
|
||||||
|
var indexes = pageIndexes;
|
||||||
|
|
||||||
|
if (!Array.isArray(indexes)) {
|
||||||
|
indexes = parsePageIndexSpecification(indexes, pdfLibDocument.getPageCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
const newFiles = await splitPagesByIndex(file, indexes);
|
||||||
|
for (let i = 0; i < newFiles.length; i++) {
|
||||||
|
newFiles[i].filename += "_split-"+i;
|
||||||
|
}
|
||||||
|
return newFiles;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user