OpenCV-wasm-browser, removed external dependencies

This commit is contained in:
Felix Kaspar 2023-10-24 19:03:43 +02:00
parent 9eb3ff4bb3
commit 50a1bd8082
11 changed files with 10274 additions and 7 deletions

View File

@ -51,5 +51,5 @@ export async function organizePages(snapshot, operation, customOrderString) {
} }
export async function removeBlankPages(snapshot, whiteThreashold) { export async function removeBlankPages(snapshot, whiteThreashold) {
return dependantRemoveBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV); return dependantRemoveBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV, PDFLib);
} }

View File

@ -0,0 +1,167 @@
//download.js v4.2, by dandavis; 2008-2016. [MIT] see http://danml.com/download.html for tests/usage
// v1 landed a FF+Chrome compat way of downloading strings to local un-named files, upgraded to use a hidden frame and optional mime
// v2 added named files via a[download], msSaveBlob, IE (10+) support, and window.URL support for larger+faster saves than dataURLs
// v3 added dataURL and Blob Input, bind-toggle arity, and legacy dataURL fallback was improved with force-download mime and base64 support. 3.1 improved safari handling.
// v4 adds AMD/UMD, commonJS, and plain browser support
// v4.1 adds url download capability via solo URL argument (same domain/CORS only)
// v4.2 adds semantic variable names, long (over 2MB) dataURL support, and hidden by default temp anchors
// https://github.com/rndme/download
(function (root, factory) {
if (typeof define === 'function' && define.amd) {
// AMD. Register as an anonymous module.
define([], factory);
} else if (typeof exports === 'object') {
// Node. Does not work with strict CommonJS, but
// only CommonJS-like environments that support module.exports,
// like Node.
module.exports = factory();
} else {
// Browser globals (root is window)
root.download = factory();
}
}(this, function () {
return function download(data, strFileName, strMimeType) {
var self = window, // this script is only for browsers anyway...
defaultMime = "application/octet-stream", // this default mime also triggers iframe downloads
mimeType = strMimeType || defaultMime,
payload = data,
url = !strFileName && !strMimeType && payload,
anchor = document.createElement("a"),
toString = function(a){return String(a);},
myBlob = (self.Blob || self.MozBlob || self.WebKitBlob || toString),
fileName = strFileName || "download",
blob,
reader;
myBlob= myBlob.call ? myBlob.bind(self) : Blob ;
if(String(this)==="true"){ //reverse arguments, allowing download.bind(true, "text/xml", "export.xml") to act as a callback
payload=[payload, mimeType];
mimeType=payload[0];
payload=payload[1];
}
if(url && url.length< 2048){ // if no filename and no mime, assume a url was passed as the only argument
fileName = url.split("/").pop().split("?")[0];
anchor.href = url; // assign href prop to temp anchor
if(anchor.href.indexOf(url) !== -1){ // if the browser determines that it's a potentially valid url path:
var ajax=new XMLHttpRequest();
ajax.open( "GET", url, true);
ajax.responseType = 'blob';
ajax.onload= function(e){
download(e.target.response, fileName, defaultMime);
};
setTimeout(function(){ ajax.send();}, 0); // allows setting custom ajax headers using the return:
return ajax;
} // end if valid url?
} // end if url?
//go ahead and download dataURLs right away
if(/^data:([\w+-]+\/[\w+.-]+)?[,;]/.test(payload)){
if(payload.length > (1024*1024*1.999) && myBlob !== toString ){
payload=dataUrlToBlob(payload);
mimeType=payload.type || defaultMime;
}else{
return navigator.msSaveBlob ? // IE10 can't do a[download], only Blobs:
navigator.msSaveBlob(dataUrlToBlob(payload), fileName) :
saver(payload) ; // everyone else can save dataURLs un-processed
}
}else{//not data url, is it a string with special needs?
if(/([\x80-\xff])/.test(payload)){
var i=0, tempUiArr= new Uint8Array(payload.length), mx=tempUiArr.length;
for(i;i<mx;++i) tempUiArr[i]= payload.charCodeAt(i);
payload=new myBlob([tempUiArr], {type: mimeType});
}
}
blob = payload instanceof myBlob ?
payload :
new myBlob([payload], {type: mimeType}) ;
function dataUrlToBlob(strUrl) {
var parts= strUrl.split(/[:;,]/),
type= parts[1],
decoder= parts[2] == "base64" ? atob : decodeURIComponent,
binData= decoder( parts.pop() ),
mx= binData.length,
i= 0,
uiArr= new Uint8Array(mx);
for(i;i<mx;++i) uiArr[i]= binData.charCodeAt(i);
return new myBlob([uiArr], {type: type});
}
function saver(url, winMode){
if ('download' in anchor) { //html5 A[download]
anchor.href = url;
anchor.setAttribute("download", fileName);
anchor.className = "download-js-link";
anchor.innerHTML = "downloading...";
anchor.style.display = "none";
document.body.appendChild(anchor);
setTimeout(function() {
anchor.click();
document.body.removeChild(anchor);
if(winMode===true){setTimeout(function(){ self.URL.revokeObjectURL(anchor.href);}, 250 );}
}, 66);
return true;
}
// handle non-a[download] safari as best we can:
if(/(Version)\/(\d+)\.(\d+)(?:\.(\d+))?.*Safari\//.test(navigator.userAgent)) {
if(/^data:/.test(url)) url="data:"+url.replace(/^data:([\w\/\-\+]+)/, defaultMime);
if(!window.open(url)){ // popup blocked, offer direct download:
if(confirm("Displaying New Document\n\nUse Save As... to download, then click back to return to this page.")){ location.href=url; }
}
return true;
}
//do iframe dataURL download (old ch+FF):
var f = document.createElement("iframe");
document.body.appendChild(f);
if(!winMode && /^data:/.test(url)){ // force a mime that will download:
url="data:"+url.replace(/^data:([\w\/\-\+]+)/, defaultMime);
}
f.src=url;
setTimeout(function(){ document.body.removeChild(f); }, 333);
}//end saver
if (navigator.msSaveBlob) { // IE10+ : (has Blob, but not a[download] or URL)
return navigator.msSaveBlob(blob, fileName);
}
if(self.URL){ // simple fast and modern way using Blob and URL:
saver(self.URL.createObjectURL(blob), true);
}else{
// handle non-Blob()+non-URL browsers:
if(typeof blob === "string" || blob.constructor===toString ){
try{
return saver( "data:" + mimeType + ";base64," + self.btoa(blob) );
}catch(y){
return saver( "data:" + mimeType + "," + encodeURIComponent(blob) );
}
}
// Blob but not URL support:
reader=new FileReader();
reader.onload=function(e){
saver(this.result);
};
reader.readAsDataURL(blob);
}
return true;
}; /* end download() */
}));

16
public/dep/pdf-lib.min.js vendored Normal file

File diff suppressed because one or more lines are too long

9995
public/dep/pdf.min.js vendored Normal file

File diff suppressed because it is too large Load Diff

22
public/dep/pdf.worker.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -136,4 +136,18 @@ export const imposeOnly = {
operations: [] operations: []
} }
] ]
}
// Split a document up into multiple documents
export const removeBlankPagesOnly = {
outputOptions: {
zip: false
},
operations: [
{
type: "removeBlankPages",
values: { "whiteThreashold": 10 },
operations: []
}
]
} }

View File

@ -1,5 +1,6 @@
// PDFLib gets importet via index.html script-tag // PDFLib gets importet via index.html script-tag
// TODO: OpenCV wasm js from browser const OpenCV = { cv: cv } // OPENCV gets importet as cv via index.html script-tag
// PDFJS as pdfjsLib via index.html script-tag
import * as pdfcpuWraopper from "./wasm/pdfcpu-wrapper-browser.js"; import * as pdfcpuWraopper from "./wasm/pdfcpu-wrapper-browser.js";
import { extractPages as dependantExtractPages } from "./functions/extractPages.js"; import { extractPages as dependantExtractPages } from "./functions/extractPages.js";
@ -50,5 +51,5 @@ export async function organizePages(snapshot, operation, customOrderString) {
} }
export async function removeBlankPages(snapshot, whiteThreashold) { export async function removeBlankPages(snapshot, whiteThreashold) {
return dependantRemoveBlankPages(snapshot, whiteThreashold, PDFLib, OpenCV); return dependantRemoveBlankPages(snapshot, whiteThreashold, pdfjsLib, OpenCV, PDFLib);
} }

View File

@ -1,10 +1,11 @@
export async function removeBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV) { export async function removeBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV, PDFLib) {
const pdfDoc = await PDFJS.getDocument(snapshot).promise; const pdfDoc = await PDFJS.getDocument(snapshot).promise;
const emptyPages = []; const emptyPages = [];
for (let i = 1; i <= pdfDoc.numPages; i++) { for (let i = 1; i <= pdfDoc.numPages; i++) {
const page = await pdfDoc.getPage(i); const page = await pdfDoc.getPage(i);
console.log("Checking images");
if(!await hasText(page)) { if(!await hasText(page)) {
console.log("Found text on Page, page is not empty"); console.log("Found text on Page, page is not empty");

View File

@ -6,10 +6,12 @@
<title>Document</title> <title>Document</title>
<!--TODO: Remove External Dependencies--> <!--TODO: Remove External Dependencies-->
<script src="https://unpkg.com/pdf-lib@1.17.1/dist/pdf-lib.min.js"></script> <script src="/dep/pdf-lib.min.js"></script>
<script src="https://unpkg.com/downloadjs@1.4.7"></script> <script src="/dep/downloadjs_1.4.7.js"></script>
<script src="/dep/pdf.min.js"></script>
<script src="/wasm/browserfs.min.js"></script> <script src="/wasm/browserfs.min.js"></script>
<script src="/wasm/opencv/opencv_3_4_custom_O3.js"></script>
<script src="index.js" type="module"></script> <script src="index.js" type="module"></script>
</head> </head>

View File

@ -40,4 +40,4 @@ import * as Functions from "./functions.js";
download(result.buffer, result.fileName, "application/pdf"); download(result.buffer, result.fileName, "application/pdf");
}); });
}); });
})(exampleWorkflows.imposeOnly); })(exampleWorkflows.removeBlankPagesOnly);

File diff suppressed because one or more lines are too long