mirror of
				https://github.com/Frooodle/Stirling-PDF.git
				synced 2025-11-01 01:21:18 +01:00 
			
		
		
		
	Remove blank pages done, Updated README.md
This commit is contained in:
		
							parent
							
								
									50a1bd8082
								
							
						
					
					
						commit
						f78a64d545
					
				@ -105,7 +105,7 @@ Current functions of spdf and their progress in this repo.
 | 
			
		||||
 | 
			
		||||
| Status | Feature            | Description |
 | 
			
		||||
| ------ | ------------------ | ----------- |
 | 
			
		||||
| 🚧    | Remove Blank Pages |             |
 | 
			
		||||
| ✔️    | Remove Blank Pages |             |
 | 
			
		||||
| 🚧    | Auto Split Pages   |             |
 | 
			
		||||
 | 
			
		||||
| Status | Feature      | Description |
 | 
			
		||||
 | 
			
		||||
@ -1,30 +1,42 @@
 | 
			
		||||
export async function removeBlankPages(snapshot, whiteThreashold, PDFJS, OpenCV, PDFLib) {
 | 
			
		||||
    
 | 
			
		||||
    const pdfDoc = await PDFJS.getDocument(snapshot).promise;
 | 
			
		||||
    const emptyPages = await findEmptyPages(snapshot);
 | 
			
		||||
 | 
			
		||||
    const emptyPages = [];
 | 
			
		||||
    for (let i = 1; i <= pdfDoc.numPages; i++) {
 | 
			
		||||
        const page = await pdfDoc.getPage(i);
 | 
			
		||||
        console.log("Checking images");
 | 
			
		||||
    console.log("Empty Pages: ", emptyPages);
 | 
			
		||||
 | 
			
		||||
        if(!await hasText(page)) {
 | 
			
		||||
            console.log("Found text on Page, page is not empty");
 | 
			
		||||
            continue;
 | 
			
		||||
    const pdfDoc = await PDFLib.PDFDocument.load(snapshot);
 | 
			
		||||
 | 
			
		||||
    // Reverse the array before looping in order to keep the indecies at the right pages. E.g. if you delete page 5 page 7 becomes page 6, if you delete page 7 page 5 remains page 5
 | 
			
		||||
    emptyPages.reverse().forEach(pageIndex => {
 | 
			
		||||
        pdfDoc.removePage(pageIndex);
 | 
			
		||||
    })
 | 
			
		||||
 | 
			
		||||
    return pdfDoc.save();
 | 
			
		||||
 | 
			
		||||
    async function findEmptyPages(snapshot) {
 | 
			
		||||
        const pdfDoc = await PDFJS.getDocument(snapshot).promise;
 | 
			
		||||
 | 
			
		||||
        const emptyPages = [];
 | 
			
		||||
        for (let i = 1; i <= pdfDoc.numPages; i++) {
 | 
			
		||||
            const page = await pdfDoc.getPage(i);
 | 
			
		||||
            console.log("Checking page " + i);
 | 
			
		||||
    
 | 
			
		||||
            if(!await hasText(page)) {
 | 
			
		||||
                console.log(`Found text on Page ${i}, page is not empty`);
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
    
 | 
			
		||||
            if(!await areImagesBlank(page, whiteThreashold)) {
 | 
			
		||||
                console.log(`Found non white image on Page ${i}, page is not empty`);
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
    
 | 
			
		||||
            console.log(`Page ${i} is empty.`);
 | 
			
		||||
            emptyPages.push(i - 1);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(!await areImagesBlank(page, whiteThreashold)) {
 | 
			
		||||
            console.log("Found image on Page, page is not empty");
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        emptyPages.push[i];
 | 
			
		||||
        return emptyPages;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(emptyPages);
 | 
			
		||||
 | 
			
		||||
    // TODO: Remove emptyPages using pdflib
 | 
			
		||||
    // return pdf;
 | 
			
		||||
 | 
			
		||||
    async function areImagesBlank(page, whiteThreashold) {
 | 
			
		||||
        const ops = await page.getOperatorList();
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user