mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-01-23 00:06:08 +01:00
Remove blank pages done, Updated README.md
This commit is contained in:
parent
50a1bd8082
commit
f78a64d545
@ -105,7 +105,7 @@ Current functions of spdf and their progress in this repo.
|
||||
|
||||
| Status | Feature | Description |
|
||||
| ------ | ------------------ | ----------- |
|
||||
| 🚧 | Remove Blank Pages | |
|
||||
| ✔️ | Remove Blank Pages | |
|
||||
| 🚧 | Auto Split Pages | |
|
||||
|
||||
| Status | Feature | Description |
|
||||
|
@ -1,30 +1,42 @@
|
||||
export async function removeBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV, PDFLib) {
|
||||
|
||||
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
|
||||
const emptyPages = await findEmptyPages(snapshot);
|
||||
|
||||
const emptyPages = [];
|
||||
for (let i = 1; i <= pdfDoc.numPages; i++) {
|
||||
const page = await pdfDoc.getPage(i);
|
||||
console.log("Checking images");
|
||||
console.log("Empty Pages: ", emptyPages);
|
||||
|
||||
if(!await hasText(page)) {
|
||||
console.log("Found text on Page, page is not empty");
|
||||
continue;
|
||||
const pdfDoc = await PDFLib.PDFDocument.load(snapshot);
|
||||
|
||||
// Reverse the array before looping in order to keep the indecies at the right pages. E.g. if you delete page 5 page 7 becomes page 6, if you delete page 7 page 5 remains page 5
|
||||
emptyPages.reverse().forEach(pageIndex => {
|
||||
pdfDoc.removePage(pageIndex);
|
||||
})
|
||||
|
||||
return pdfDoc.save();
|
||||
|
||||
async function findEmptyPages(snapshot) {
|
||||
const pdfDoc = await PDFJS.getDocument(snapshot).promise;
|
||||
|
||||
const emptyPages = [];
|
||||
for (let i = 1; i <= pdfDoc.numPages; i++) {
|
||||
const page = await pdfDoc.getPage(i);
|
||||
console.log("Checking page " + i);
|
||||
|
||||
if(!await hasText(page)) {
|
||||
console.log(`Found text on Page ${i}, page is not empty`);
|
||||
continue;
|
||||
}
|
||||
|
||||
if(!await areImagesBlank(page, whiteThreashold)) {
|
||||
console.log(`Found non white image on Page ${i}, page is not empty`);
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(`Page ${i} is empty.`);
|
||||
emptyPages.push(i - 1);
|
||||
}
|
||||
|
||||
if(!await areImagesBlank(page, whiteThreashold)) {
|
||||
console.log("Found image on Page, page is not empty");
|
||||
continue;
|
||||
}
|
||||
|
||||
emptyPages.push[i];
|
||||
return emptyPages;
|
||||
}
|
||||
|
||||
console.log(emptyPages);
|
||||
|
||||
// TODO: Remove emptyPages using pdflib
|
||||
// return pdf;
|
||||
|
||||
async function areImagesBlank(page, whiteThreashold) {
|
||||
const ops = await page.getOperatorList();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user