QRCode Detection working.

This commit is contained in:
Felix Kaspar 2023-10-27 02:56:13 +02:00
parent 4e8d8e3d53
commit dd14b3a773
8 changed files with 4826 additions and 291 deletions

View File

@ -1,7 +1,9 @@
import PDFLib from 'pdf-lib';
import OpenCV from 'opencv-wasm';
import PDFJS from "pdfjs-dist";
delete global.crypto; // TODO: I hate to do this, but the new node version forces me to, if anyone finds a better solution, please tell me!
import * as pdfcpuWraopper from "./public/wasm/pdfcpu-wrapper-node.js";
import OpenCV from 'opencv-wasm';
import { extractPages as dependantExtractPages } from "./public/functions/extractPages.js";
import { impose as dependantImpose } from './public/functions/impose.js';
@ -13,6 +15,7 @@ import { splitPDF as dependantSplitPDF } from './public/functions/splitPDF.js';
import { editMetadata as dependantEditMetadata } from './public/functions/editMetadata.js';
import { organizePages as dependantOrganizePages } from './public/functions/organizePages.js';
import { removeBlankPages as dependantRemoveBlankPages} from './public/functions/removeBlankPages.js';
import { splitOn as dependantSplitOn } from "./public/functions/splitOn.js";
export async function extractPages(snapshot, pagesToExtractArray) {
return dependantExtractPages(snapshot, pagesToExtractArray, PDFLib);
@ -53,3 +56,7 @@ export async function organizePages(snapshot, operation, customOrderString) {
export async function removeBlankPages(snapshot, whiteThreashold) {
return dependantRemoveBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV, PDFLib);
}
export async function splitOn(snapshot, type, whiteThreashold) {
return dependantSplitOn(snapshot, type, whiteThreashold, PDFJS, OpenCV, PDFLib);
}

5010
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -13,6 +13,7 @@
"archiver": "^6.0.1",
"express": "^4.18.2",
"express-fileupload": "^1.4.1",
"jsqr": "^1.4.0",
"opencv-wasm": "^4.3.0-10",
"pdf-lib": "^1.17.1",
"pdfjs-dist": "^2.0.943"

View File

@ -13,6 +13,7 @@ import { splitPDF as dependantSplitPDF } from './functions/splitPDF.js';
import { editMetadata as dependantEditMetadata} from "./functions/editMetadata.js";
import { organizePages as dependantOrganizePages} from "./functions/organizePages.js";
import { removeBlankPages as dependantRemoveBlankPages} from "./functions/removeBlankPages.js";
import { splitOn as dependantSplitOn } from "./functions/splitOn.js";
export async function extractPages(snapshot, pagesToExtractArray) {
return dependantExtractPages(snapshot, pagesToExtractArray, PDFLib);
@ -53,3 +54,7 @@ export async function organizePages(snapshot, operation, customOrderString) {
export async function removeBlankPages(snapshot, whiteThreashold) {
return dependantRemoveBlankPages(snapshot, whiteThreashold, pdfjsLib, OpenCV, PDFLib);
}
export async function splitOn(snapshot, type, whiteThreashold) {
return dependantSplitOn(snapshot, type, whiteThreashold, pdfjsLib, OpenCV, PDFLib);
}

View File

@ -1,3 +1,5 @@
import { getImagesOnPage } from "./getImagesOnPage.js";
export async function detectEmptyPages(snapshot, whiteThreashold, PDFJS, OpenCV) {
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
@ -27,15 +29,10 @@ export async function detectEmptyPages(snapshot, whiteThreashold, PDFJS, OpenCV)
}
async function areImagesBlank(page, threshold) {
const ops = await page.getOperatorList();
for (var j=0; j < ops.fnArray.length; j++) {
if (ops.fnArray[j] == PDFJS.OPS.paintJpegXObject || ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
const image = page.objs.get(ops.argsArray[j][0]);
if(image.data) {
return isImageBlank(image, threshold);
}
}
const images = getImagesOnPage(page, PDFJS);
for (const image of images) {
if(!isImageBlank(image, threshold))
return false;
}
return true;
}

View File

@ -0,0 +1,11 @@
export async function getImagesOnPage(page, PDFJS) {
const ops = await page.getOperatorList();
const images = [];
for (var j=0; j < ops.fnArray.length; j++) {
if (ops.fnArray[j] == PDFJS.OPS.paintJpegXObject || ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
const image = page.objs.get(ops.argsArray[j][0]);
images.push(image);
}
}
return images;
}

View File

@ -1,4 +1,7 @@
import { detectEmptyPages } from "./shared/detectEmptyPages";
import { detectEmptyPages } from "./shared/detectEmptyPages.js";
import { getImagesOnPage } from "./shared/getImagesOnPage.js";
import jsQR from "jsQR";
/**
* @typedef {"BAR_CODE"|"QR_CODE"|"BLANK_PAGE"} SplitType
@ -9,24 +12,22 @@ import { detectEmptyPages } from "./shared/detectEmptyPages";
* @param {Uint16Array} snapshot
* @param {SplitType} type
* @param {} PDFJS
* @param {} OpenCV
* @param {import('opencv-wasm')} OpenCV
* @param {} PDFLib
* @param {} QRCode
* @returns
*/
export async function splitOn(snapshot, type, whiteThreashold, PDFJS, OpenCV, PDFLib, QRCode) {
export async function splitOn(snapshot, type, whiteThreashold, PDFJS, OpenCV, PDFLib) {
let splitAtPages = [];
switch (type) {
case "BAR_CODE":
// TODO: Implement
throw new Error("This split-type has not been implemented yet")
throw new Error("This split-type has not been implemented yet");
break;
case "QR_CODE":
// TODO: Implement
throw new Error("This split-type has not been implemented yet")
splitAtPages = await getPagesWithQRCode(snapshot);
break;
case "BLANK_PAGE":
@ -45,4 +46,49 @@ export async function splitOn(snapshot, type, whiteThreashold, PDFJS, OpenCV, PD
// TODO: Remove detected Pages & Split
return pdfDoc.save();
async function getPagesWithQRCode(snapshot) {
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
const pagesWithQR = [];
for (let i = 1; i <= pdfDoc.numPages; i++) {
const page = await pdfDoc.getPage(i);
console.log("Checking page " + i);
const images = await getImagesOnPage(page, PDFJS);
for (const image of images) {
const data = await checkForQROnImage(image);
if(data == "https://github.com/Frooodle/Stirling-PDF") {
pagesWithQR.push(i);
}
}
}
return pagesWithQR;
}
async function checkForQROnImage(image) {
console.log(image.data, image.width, image.height, image.width * image.height * 4);
// TODO: There is an issue with the jsQR package, and the package seems to be stale, we could create a fork and fix the issue (The package expects rgba but sometimes we have rgb). In the meanwhile we just force rgba:
if(image.data.length == image.width * image.height * 3) {
const tmpArray = new Uint8ClampedArray(image.width * image.height * 4);
// Iterate through the original array and add an alpha channel
for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) {
tmpArray[j] = image.data[i]; // Red channel
tmpArray[j + 1] = image.data[i + 1]; // Green channel
tmpArray[j + 2] = image.data[i + 2]; // Blue channel
tmpArray[j + 3] = 255; // Alpha channel (fully opaque)
}
image.data = tmpArray;
}
const code = jsQR(image.data, image.width, image.height);
if(code)
return code.data;
else
return null;
}
};

View File

@ -115,6 +115,12 @@ export async function * traverseOperations(operations, input, Functions) {
input.buffer = await Functions.removeBlankPages(input.buffer, operation.values["whiteThreashold"]);
});
break;
case "splitOn":
yield* oneToN(input, operation, async (input) => {
input.fileName += "_split";
input.buffer = await Functions.splitOn(input.buffer, operation.values["type"], operation.values["whiteThreashold"]);
});
break;
default:
throw new Error(`${operation.type} not implemented yet.`);
break;