Remove blank pages done, Updated README.md

This commit is contained in:
Felix Kaspar 2023-10-24 19:31:14 +02:00
parent 50a1bd8082
commit f78a64d545
2 changed files with 33 additions and 21 deletions

View File

@ -105,7 +105,7 @@ Current functions of spdf and their progress in this repo.
| Status | Feature | Description |
| ------ | ------------------ | ----------- |
| 🚧 | Remove Blank Pages | |
| ✔️ | Remove Blank Pages | |
| 🚧 | Auto Split Pages | |
| Status | Feature | Description |

View File

@ -1,29 +1,41 @@
export async function removeBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV, PDFLib) {
const emptyPages = await findEmptyPages(snapshot);
console.log("Empty Pages: ", emptyPages);
const pdfDoc = await PDFLib.PDFDocument.load(snapshot);
// Reverse the array before looping in order to keep the indecies at the right pages. E.g. if you delete page 5 page 7 becomes page 6, if you delete page 7 page 5 remains page 5
emptyPages.reverse().forEach(pageIndex => {
pdfDoc.removePage(pageIndex);
})
return pdfDoc.save();
async function findEmptyPages(snapshot) {
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
const emptyPages = [];
for (let i = 1; i <= pdfDoc.numPages; i++) {
const page = await pdfDoc.getPage(i);
console.log("Checking images");
console.log("Checking page " + i);
if(!await hasText(page)) {
console.log("Found text on Page, page is not empty");
console.log(`Found text on Page ${i}, page is not empty`);
continue;
}
if(!await areImagesBlank(page, whiteThreashold)) {
console.log("Found image on Page, page is not empty");
console.log(`Found non white image on Page ${i}, page is not empty`);
continue;
}
emptyPages.push[i];
console.log(`Page ${i} is empty.`);
emptyPages.push(i - 1);
}
return emptyPages;
}
console.log(emptyPages);
// TODO: Remove emptyPages using pdflib
// return pdf;
async function areImagesBlank(page, whiteThreashold) {
const ops = await page.getOperatorList();