mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-02-07 00:17:07 +01:00
Made split pdf functions conform to the new design pattern.
This commit is contained in:
parent
b4251b56fe
commit
4c8a85726d
55
shared-operations/src/functions/common/detectQRCodePages.ts
Normal file
55
shared-operations/src/functions/common/detectQRCodePages.ts
Normal file
@ -0,0 +1,55 @@
|
||||
|
||||
import jsQR from "jsqr";
|
||||
|
||||
import { PdfFile } from '../../wrappers/PdfFile.js';
|
||||
import { getImagesOnPage, PDFJSImage } from "./getImagesOnPage.js";
|
||||
|
||||
export async function detectQRCodePages(file: PdfFile) {
|
||||
console.log("FileInQRPrev: ", file);
|
||||
const pdfDoc = await file.pdfJsDocument;
|
||||
console.log("FileInQRAfter: ", file);
|
||||
|
||||
const pagesWithQR: number[] = [];
|
||||
for (let i = 0; i < pdfDoc.numPages; i++) {
|
||||
console.log("Page:", i, "/", pdfDoc.numPages);
|
||||
const page = await pdfDoc.getPage(i + 1);
|
||||
|
||||
const images = await getImagesOnPage(page);
|
||||
// console.log("images:", images);
|
||||
for (const image of images) {
|
||||
const data = await checkForQROnImage(image);
|
||||
if(data == "https://github.com/Frooodle/Stirling-PDF") {
|
||||
pagesWithQR.push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(pagesWithQR.length == 0) {
|
||||
console.warn("Could not find any QR Codes in the provided PDF.")
|
||||
}
|
||||
return pagesWithQR;
|
||||
}
|
||||
|
||||
async function checkForQROnImage(image: any) {
|
||||
// TODO: There is an issue with the jsQR package (The package expects rgba but sometimes we have rgb), and the package seems to be stale, we could create a fork and fix the issue. In the meanwhile we just force rgba:
|
||||
// Check for rgb and convert to rgba
|
||||
|
||||
if(image.data.length == image.width * image.height * 3) {
|
||||
const tmpArray = new Uint8ClampedArray(image.width * image.height * 4);
|
||||
|
||||
// Iterate through the original array and add an alpha channel
|
||||
for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) {
|
||||
tmpArray[j] = image.data[i]; // Red channel
|
||||
tmpArray[j + 1] = image.data[i + 1]; // Green channel
|
||||
tmpArray[j + 2] = image.data[i + 2]; // Blue channel
|
||||
tmpArray[j + 3] = 255; // Alpha channel (fully opaque)
|
||||
}
|
||||
|
||||
image.data = tmpArray;
|
||||
}
|
||||
|
||||
const code = jsQR(image.data, image.width, image.height);
|
||||
if(code)
|
||||
return code.data;
|
||||
else
|
||||
return null;
|
||||
}
|
@ -1,27 +1,19 @@
|
||||
|
||||
import { getPages } from "./common/getPagesByIndex";
|
||||
import { PdfFile } from '../wrappers/PdfFile';
|
||||
|
||||
export type SplitPdfParamsType = {
|
||||
file: PdfFile;
|
||||
splitAfterPageArray: number[];
|
||||
}
|
||||
|
||||
export async function splitPDF(params: SplitPdfParamsType): Promise<PdfFile[]> {
|
||||
const { file, splitAfterPageArray } = params;
|
||||
import { PdfFile } from '../../wrappers/PdfFile.js';
|
||||
import { getPages } from "./getPagesByIndex";
|
||||
|
||||
export async function splitPagesByIndex(file: PdfFile, splitAfterPageIndexes: number[]): Promise<PdfFile[]> {
|
||||
const pdfLibDocument = await file.pdfLibDocument;
|
||||
|
||||
const numberOfPages = pdfLibDocument.getPages().length;
|
||||
|
||||
let pagesArray: number[] = [];
|
||||
let splitAfter = splitAfterPageArray.shift();
|
||||
let splitAfter = splitAfterPageIndexes.shift();
|
||||
const subDocuments: PdfFile[] = [];
|
||||
|
||||
for (let i = 0; i < numberOfPages; i++) {
|
||||
if(splitAfter && i > splitAfter && pagesArray.length > 0) {
|
||||
subDocuments.push(await getPages(file, pagesArray));
|
||||
splitAfter = splitAfterPageArray.shift();
|
||||
splitAfter = splitAfterPageIndexes.shift();
|
||||
pagesArray = [];
|
||||
}
|
||||
pagesArray.push(i);
|
@ -1,121 +0,0 @@
|
||||
|
||||
import jsQR from "jsqr";
|
||||
|
||||
import { detectEmptyPages } from "./common/detectEmptyPages.js";
|
||||
import { getImagesOnPage } from "./common/getImagesOnPage.js";
|
||||
import { getPages } from "./common/getPagesByIndex.js";
|
||||
import { PdfFile } from '../wrappers/PdfFile.js';
|
||||
|
||||
export type SplitOnParamsType = {
|
||||
file: PdfFile;
|
||||
type: "BAR_CODE"|"QR_CODE"|"BLANK_PAGE";
|
||||
whiteThreashold: number;
|
||||
}
|
||||
|
||||
export async function splitOn(params: SplitOnParamsType) {
|
||||
const { file, type, whiteThreashold } = params;
|
||||
|
||||
let splitAtPages: number[] = [];
|
||||
|
||||
console.log("File: ", file);
|
||||
|
||||
switch (type) {
|
||||
case "BAR_CODE":
|
||||
// TODO: Implement
|
||||
throw new Error("This split-type has not been implemented yet");
|
||||
|
||||
case "QR_CODE":
|
||||
splitAtPages = await getPagesWithQRCode(file);
|
||||
break;
|
||||
|
||||
case "BLANK_PAGE":
|
||||
splitAtPages = await detectEmptyPages(file, whiteThreashold);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error("An invalid split-type was provided.");
|
||||
}
|
||||
|
||||
console.log("Split At Pages: ", splitAtPages);
|
||||
|
||||
console.log("File: ", file);
|
||||
|
||||
// Remove detected Pages & Split
|
||||
const pdfDoc = await file.pdfLibDocument;
|
||||
const numberOfPages = pdfDoc.getPageCount();
|
||||
|
||||
let pagesArray: number[] = [];
|
||||
let splitAfter = splitAtPages.shift();
|
||||
const subDocuments: PdfFile[] = [];
|
||||
|
||||
for (let i = 0; i < numberOfPages; i++) {
|
||||
console.log(i);
|
||||
if(i == splitAfter) {
|
||||
if(pagesArray.length > 0) {
|
||||
subDocuments.push(await getPages(file, pagesArray));
|
||||
pagesArray = [];
|
||||
}
|
||||
splitAfter = splitAtPages.shift();
|
||||
}
|
||||
else { // Skip splitAtPage
|
||||
console.log("PagesArray")
|
||||
pagesArray.push(i);
|
||||
}
|
||||
}
|
||||
if(pagesArray.length > 0) {
|
||||
subDocuments.push(await getPages(file, pagesArray));
|
||||
}
|
||||
pagesArray = [];
|
||||
|
||||
return subDocuments;
|
||||
|
||||
async function getPagesWithQRCode(file: PdfFile) {
|
||||
console.log("FileInQRPrev: ", file);
|
||||
const pdfDoc = await file.pdfJsDocument;
|
||||
console.log("FileInQRAfter: ", file);
|
||||
|
||||
const pagesWithQR: number[] = [];
|
||||
for (let i = 0; i < pdfDoc.numPages; i++) {
|
||||
console.log("Page:", i, "/", pdfDoc.numPages);
|
||||
const page = await pdfDoc.getPage(i + 1);
|
||||
|
||||
const images = await getImagesOnPage(page);
|
||||
// console.log("images:", images);
|
||||
for (const image of images) {
|
||||
const data = await checkForQROnImage(image);
|
||||
if(data == "https://github.com/Frooodle/Stirling-PDF") {
|
||||
pagesWithQR.push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(pagesWithQR.length == 0) {
|
||||
console.warn("Could not find any QR Codes in the provided PDF.")
|
||||
}
|
||||
return pagesWithQR;
|
||||
}
|
||||
|
||||
async function checkForQROnImage(image: any) {
|
||||
// TODO: There is an issue with the jsQR package (The package expects rgba but sometimes we have rgb), and the package seems to be stale, we could create a fork and fix the issue. In the meanwhile we just force rgba:
|
||||
// Check for rgb and convert to rgba
|
||||
|
||||
if(image.data.length == image.width * image.height * 3) {
|
||||
const tmpArray = new Uint8ClampedArray(image.width * image.height * 4);
|
||||
|
||||
// Iterate through the original array and add an alpha channel
|
||||
for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) {
|
||||
tmpArray[j] = image.data[i]; // Red channel
|
||||
tmpArray[j + 1] = image.data[i + 1]; // Green channel
|
||||
tmpArray[j + 2] = image.data[i + 2]; // Blue channel
|
||||
tmpArray[j + 3] = 255; // Alpha channel (fully opaque)
|
||||
}
|
||||
|
||||
image.data = tmpArray;
|
||||
}
|
||||
|
||||
const code = jsQR(image.data, image.width, image.height);
|
||||
if(code)
|
||||
return code.data;
|
||||
else
|
||||
return null;
|
||||
}
|
||||
};
|
44
shared-operations/src/functions/splitPagesByPreset.ts
Normal file
44
shared-operations/src/functions/splitPagesByPreset.ts
Normal file
@ -0,0 +1,44 @@
|
||||
|
||||
import { PdfFile } from '../wrappers/PdfFile.js';
|
||||
import { splitPagesByIndex } from "./common/splitPagesByIndex.js";
|
||||
import { detectEmptyPages } from "./common/detectEmptyPages.js";
|
||||
import { detectQRCodePages } from "./common/detectQRCodePages.js";
|
||||
|
||||
export type SplitOnParamsType = {
|
||||
file: PdfFile;
|
||||
type: "BAR_CODE"|"QR_CODE"|"BLANK_PAGE";
|
||||
whiteThreashold?: number;
|
||||
}
|
||||
export async function splitPagesByPreset(params: SplitOnParamsType): Promise<PdfFile[]> {
|
||||
const { file, type, whiteThreashold } = params;
|
||||
|
||||
console.log("File: ", file);
|
||||
|
||||
let splitAtPages: number[];
|
||||
switch (type) {
|
||||
case "BAR_CODE":
|
||||
// TODO: Implement
|
||||
throw new Error("This split-type has not been implemented yet");
|
||||
|
||||
case "QR_CODE":
|
||||
splitAtPages = await detectQRCodePages(file);
|
||||
break;
|
||||
|
||||
case "BLANK_PAGE":
|
||||
if (!whiteThreashold)
|
||||
throw new Error("White threshold not provided");
|
||||
splitAtPages = await detectEmptyPages(file, whiteThreashold);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error("An invalid split-type was provided.");
|
||||
}
|
||||
|
||||
console.debug("Split At Pages: ", splitAtPages);
|
||||
|
||||
const newFiles = await splitPagesByIndex(file, splitAtPages);
|
||||
for (let i = 0; i < newFiles.length; i++) {
|
||||
newFiles[i].filename += "_split-"+i;
|
||||
}
|
||||
return newFiles;
|
||||
};
|
25
shared-operations/src/functions/splitPdfByIndex.ts
Normal file
25
shared-operations/src/functions/splitPdfByIndex.ts
Normal file
@ -0,0 +1,25 @@
|
||||
|
||||
import { PdfFile } from '../wrappers/PdfFile.js';
|
||||
import { parsePageIndexSpecification } from './common/pageIndexesUtils'
|
||||
import { splitPagesByIndex } from './common/splitPagesByIndex.js';
|
||||
|
||||
export type SplitPagesParamsType = {
|
||||
file: PdfFile;
|
||||
pageIndexes: string | number[];
|
||||
}
|
||||
export async function splitPdfByIndex(params: SplitPagesParamsType): Promise<PdfFile[]> {
|
||||
const { file, pageIndexes } = params;
|
||||
const pdfLibDocument = await file.pdfLibDocument;
|
||||
|
||||
var indexes = pageIndexes;
|
||||
|
||||
if (!Array.isArray(indexes)) {
|
||||
indexes = parsePageIndexSpecification(indexes, pdfLibDocument.getPageCount());
|
||||
}
|
||||
|
||||
const newFiles = await splitPagesByIndex(file, indexes);
|
||||
for (let i = 0; i < newFiles.length; i++) {
|
||||
newFiles[i].filename += "_split-"+i;
|
||||
}
|
||||
return newFiles;
|
||||
}
|
Loading…
Reference in New Issue
Block a user