mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-04-22 01:16:39 +02:00
QRCode Detection working.
This commit is contained in:
parent
4e8d8e3d53
commit
dd14b3a773
@ -1,7 +1,9 @@
|
||||
import PDFLib from 'pdf-lib';
|
||||
import OpenCV from 'opencv-wasm';
|
||||
import PDFJS from "pdfjs-dist";
|
||||
|
||||
delete global.crypto; // TODO: I hate to do this, but the new node version forces me to, if anyone finds a better solution, please tell me!
|
||||
import * as pdfcpuWraopper from "./public/wasm/pdfcpu-wrapper-node.js";
|
||||
import OpenCV from 'opencv-wasm';
|
||||
|
||||
import { extractPages as dependantExtractPages } from "./public/functions/extractPages.js";
|
||||
import { impose as dependantImpose } from './public/functions/impose.js';
|
||||
@ -13,6 +15,7 @@ import { splitPDF as dependantSplitPDF } from './public/functions/splitPDF.js';
|
||||
import { editMetadata as dependantEditMetadata } from './public/functions/editMetadata.js';
|
||||
import { organizePages as dependantOrganizePages } from './public/functions/organizePages.js';
|
||||
import { removeBlankPages as dependantRemoveBlankPages} from './public/functions/removeBlankPages.js';
|
||||
import { splitOn as dependantSplitOn } from "./public/functions/splitOn.js";
|
||||
|
||||
export async function extractPages(snapshot, pagesToExtractArray) {
|
||||
return dependantExtractPages(snapshot, pagesToExtractArray, PDFLib);
|
||||
@ -52,4 +55,8 @@ export async function organizePages(snapshot, operation, customOrderString) {
|
||||
|
||||
export async function removeBlankPages(snapshot, whiteThreashold) {
|
||||
return dependantRemoveBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV, PDFLib);
|
||||
}
|
||||
|
||||
export async function splitOn(snapshot, type, whiteThreashold) {
|
||||
return dependantSplitOn(snapshot, type, whiteThreashold, PDFJS, OpenCV, PDFLib);
|
||||
}
|
5010
package-lock.json
generated
5010
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -13,6 +13,7 @@
|
||||
"archiver": "^6.0.1",
|
||||
"express": "^4.18.2",
|
||||
"express-fileupload": "^1.4.1",
|
||||
"jsqr": "^1.4.0",
|
||||
"opencv-wasm": "^4.3.0-10",
|
||||
"pdf-lib": "^1.17.1",
|
||||
"pdfjs-dist": "^2.0.943"
|
||||
|
@ -13,6 +13,7 @@ import { splitPDF as dependantSplitPDF } from './functions/splitPDF.js';
|
||||
import { editMetadata as dependantEditMetadata} from "./functions/editMetadata.js";
|
||||
import { organizePages as dependantOrganizePages} from "./functions/organizePages.js";
|
||||
import { removeBlankPages as dependantRemoveBlankPages} from "./functions/removeBlankPages.js";
|
||||
import { splitOn as dependantSplitOn } from "./functions/splitOn.js";
|
||||
|
||||
export async function extractPages(snapshot, pagesToExtractArray) {
|
||||
return dependantExtractPages(snapshot, pagesToExtractArray, PDFLib);
|
||||
@ -52,4 +53,8 @@ export async function organizePages(snapshot, operation, customOrderString) {
|
||||
|
||||
export async function removeBlankPages(snapshot, whiteThreashold) {
|
||||
return dependantRemoveBlankPages(snapshot, whiteThreashold, pdfjsLib, OpenCV, PDFLib);
|
||||
}
|
||||
|
||||
export async function splitOn(snapshot, type, whiteThreashold) {
|
||||
return dependantSplitOn(snapshot, type, whiteThreashold, pdfjsLib, OpenCV, PDFLib);
|
||||
}
|
@ -1,3 +1,5 @@
|
||||
import { getImagesOnPage } from "./getImagesOnPage.js";
|
||||
|
||||
export async function detectEmptyPages(snapshot, whiteThreashold, PDFJS, OpenCV) {
|
||||
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
|
||||
|
||||
@ -27,15 +29,10 @@ export async function detectEmptyPages(snapshot, whiteThreashold, PDFJS, OpenCV)
|
||||
}
|
||||
|
||||
async function areImagesBlank(page, threshold) {
|
||||
const ops = await page.getOperatorList();
|
||||
|
||||
for (var j=0; j < ops.fnArray.length; j++) {
|
||||
if (ops.fnArray[j] == PDFJS.OPS.paintJpegXObject || ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
|
||||
const image = page.objs.get(ops.argsArray[j][0]);
|
||||
if(image.data) {
|
||||
return isImageBlank(image, threshold);
|
||||
}
|
||||
}
|
||||
const images = getImagesOnPage(page, PDFJS);
|
||||
for (const image of images) {
|
||||
if(!isImageBlank(image, threshold))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
11
public/functions/shared/getImagesOnPage.js
Normal file
11
public/functions/shared/getImagesOnPage.js
Normal file
@ -0,0 +1,11 @@
|
||||
export async function getImagesOnPage(page, PDFJS) {
|
||||
const ops = await page.getOperatorList();
|
||||
const images = [];
|
||||
for (var j=0; j < ops.fnArray.length; j++) {
|
||||
if (ops.fnArray[j] == PDFJS.OPS.paintJpegXObject || ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
|
||||
const image = page.objs.get(ops.argsArray[j][0]);
|
||||
images.push(image);
|
||||
}
|
||||
}
|
||||
return images;
|
||||
}
|
@ -1,4 +1,7 @@
|
||||
import { detectEmptyPages } from "./shared/detectEmptyPages";
|
||||
import { detectEmptyPages } from "./shared/detectEmptyPages.js";
|
||||
import { getImagesOnPage } from "./shared/getImagesOnPage.js";
|
||||
|
||||
import jsQR from "jsQR";
|
||||
|
||||
/**
|
||||
* @typedef {"BAR_CODE"|"QR_CODE"|"BLANK_PAGE"} SplitType
|
||||
@ -9,24 +12,22 @@ import { detectEmptyPages } from "./shared/detectEmptyPages";
|
||||
* @param {Uint16Array} snapshot
|
||||
* @param {SplitType} type
|
||||
* @param {} PDFJS
|
||||
* @param {} OpenCV
|
||||
* @param {import('opencv-wasm')} OpenCV
|
||||
* @param {} PDFLib
|
||||
* @param {} QRCode
|
||||
* @returns
|
||||
*/
|
||||
export async function splitOn(snapshot, type, whiteThreashold, PDFJS, OpenCV, PDFLib, QRCode) {
|
||||
export async function splitOn(snapshot, type, whiteThreashold, PDFJS, OpenCV, PDFLib) {
|
||||
|
||||
let splitAtPages = [];
|
||||
|
||||
switch (type) {
|
||||
case "BAR_CODE":
|
||||
// TODO: Implement
|
||||
throw new Error("This split-type has not been implemented yet")
|
||||
throw new Error("This split-type has not been implemented yet");
|
||||
break;
|
||||
|
||||
case "QR_CODE":
|
||||
// TODO: Implement
|
||||
throw new Error("This split-type has not been implemented yet")
|
||||
splitAtPages = await getPagesWithQRCode(snapshot);
|
||||
break;
|
||||
|
||||
case "BLANK_PAGE":
|
||||
@ -45,4 +46,49 @@ export async function splitOn(snapshot, type, whiteThreashold, PDFJS, OpenCV, PD
|
||||
// TODO: Remove detected Pages & Split
|
||||
|
||||
return pdfDoc.save();
|
||||
|
||||
async function getPagesWithQRCode(snapshot) {
|
||||
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
|
||||
|
||||
const pagesWithQR = [];
|
||||
for (let i = 1; i <= pdfDoc.numPages; i++) {
|
||||
const page = await pdfDoc.getPage(i);
|
||||
console.log("Checking page " + i);
|
||||
|
||||
const images = await getImagesOnPage(page, PDFJS);
|
||||
|
||||
for (const image of images) {
|
||||
const data = await checkForQROnImage(image);
|
||||
if(data == "https://github.com/Frooodle/Stirling-PDF") {
|
||||
pagesWithQR.push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return pagesWithQR;
|
||||
}
|
||||
|
||||
async function checkForQROnImage(image) {
|
||||
console.log(image.data, image.width, image.height, image.width * image.height * 4);
|
||||
|
||||
// TODO: There is an issue with the jsQR package, and the package seems to be stale, we could create a fork and fix the issue (The package expects rgba but sometimes we have rgb). In the meanwhile we just force rgba:
|
||||
if(image.data.length == image.width * image.height * 3) {
|
||||
const tmpArray = new Uint8ClampedArray(image.width * image.height * 4);
|
||||
|
||||
// Iterate through the original array and add an alpha channel
|
||||
for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) {
|
||||
tmpArray[j] = image.data[i]; // Red channel
|
||||
tmpArray[j + 1] = image.data[i + 1]; // Green channel
|
||||
tmpArray[j + 2] = image.data[i + 2]; // Blue channel
|
||||
tmpArray[j + 3] = 255; // Alpha channel (fully opaque)
|
||||
}
|
||||
|
||||
image.data = tmpArray;
|
||||
}
|
||||
|
||||
const code = jsQR(image.data, image.width, image.height);
|
||||
if(code)
|
||||
return code.data;
|
||||
else
|
||||
return null;
|
||||
}
|
||||
};
|
@ -115,6 +115,12 @@ export async function * traverseOperations(operations, input, Functions) {
|
||||
input.buffer = await Functions.removeBlankPages(input.buffer, operation.values["whiteThreashold"]);
|
||||
});
|
||||
break;
|
||||
case "splitOn":
|
||||
yield* oneToN(input, operation, async (input) => {
|
||||
input.fileName += "_split";
|
||||
input.buffer = await Functions.splitOn(input.buffer, operation.values["type"], operation.values["whiteThreashold"]);
|
||||
});
|
||||
break;
|
||||
default:
|
||||
throw new Error(`${operation.type} not implemented yet.`);
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user