mirror of
				https://github.com/Frooodle/Stirling-PDF.git
				synced 2025-11-01 01:21:18 +01:00 
			
		
		
		
	Merge branch 'cleanups' of https://github.com/Frooodle/Stirling-PDF into cleanups
This commit is contained in:
		
						commit
						1d55ee7f93
					
				@ -40,6 +40,8 @@ Feel free to request any features or bug fixes either in github issues or our [D
 | 
			
		||||
- Parallel file processing and downloads
 | 
			
		||||
- API for integration with external scripts 
 | 
			
		||||
 | 
			
		||||
Hosted instance/demo of the app can be seen [here](https://pdf.adminforge.de/) hosted by the team at adminforge.de
 | 
			
		||||
 | 
			
		||||
## Technologies used
 | 
			
		||||
- Spring Boot + Thymeleaf
 | 
			
		||||
- PDFBox
 | 
			
		||||
 | 
			
		||||
@ -0,0 +1,98 @@
 | 
			
		||||
package stirling.software.SPDF.controller.api.other;
 | 
			
		||||
import org.apache.pdfbox.pdmodel.PDDocument;
 | 
			
		||||
import org.apache.pdfbox.pdmodel.PDPage;
 | 
			
		||||
import org.apache.pdfbox.pdmodel.PDPageTree;
 | 
			
		||||
import org.apache.pdfbox.text.PDFTextStripper;
 | 
			
		||||
import org.springframework.http.HttpStatus;
 | 
			
		||||
import org.springframework.http.ResponseEntity;
 | 
			
		||||
import org.springframework.web.bind.annotation.PostMapping;
 | 
			
		||||
import org.springframework.web.bind.annotation.RequestPart;
 | 
			
		||||
import org.springframework.web.bind.annotation.RestController;
 | 
			
		||||
import org.springframework.web.multipart.MultipartFile;
 | 
			
		||||
import stirling.software.SPDF.utils.ImageFinder;
 | 
			
		||||
import java.io.ByteArrayOutputStream;
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.Arrays;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
@RestController
 | 
			
		||||
public class BlankPageController {
 | 
			
		||||
 | 
			
		||||
    @PostMapping(consumes = "multipart/form-data", value = "/remove-blanks")
 | 
			
		||||
    public ResponseEntity<byte[]> removeBlankPages(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile, @RequestPart(required = true, value = "processType") int processType) throws IOException, InterruptedException {
 | 
			
		||||
        boolean removeNoText = false;
 | 
			
		||||
        boolean removeNoTextOrImages = false;
 | 
			
		||||
        
 | 
			
		||||
        if(processType == 0) {
 | 
			
		||||
        	removeNoText = true;
 | 
			
		||||
        } else if (processType == 1) {
 | 
			
		||||
        	removeNoTextOrImages = true;
 | 
			
		||||
        } else if (processType == 2) {
 | 
			
		||||
        	//run OCR 
 | 
			
		||||
        	OCRController ocr = new OCRController();
 | 
			
		||||
            ocr.processPdfWithOCR(inputFile, Arrays.asList("eng"), false, false, true, false, "type", "hocr", false);
 | 
			
		||||
            
 | 
			
		||||
        	removeNoText = true;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        try {
 | 
			
		||||
            PDDocument document = PDDocument.load(inputFile.getInputStream());
 | 
			
		||||
            PDPageTree pages = document.getDocumentCatalog().getPages();
 | 
			
		||||
            PDFTextStripper textStripper = new PDFTextStripper();
 | 
			
		||||
 | 
			
		||||
            List<PDPage> pagesToKeep = new ArrayList<>();
 | 
			
		||||
            int pageIndex = 0;
 | 
			
		||||
 | 
			
		||||
            for (PDPage page : pages) {
 | 
			
		||||
                pageIndex++;
 | 
			
		||||
                textStripper.setStartPage(pageIndex);
 | 
			
		||||
                textStripper.setEndPage(pageIndex);
 | 
			
		||||
                String pageText = textStripper.getText(document);
 | 
			
		||||
                boolean hasText = !pageText.trim().isEmpty();
 | 
			
		||||
                boolean hasImages = hasImagesOnPage(page);
 | 
			
		||||
 | 
			
		||||
                if (removeNoText && removeNoTextOrImages) {
 | 
			
		||||
                    if (hasText || hasImages) {
 | 
			
		||||
                        pagesToKeep.add(page);
 | 
			
		||||
                    }
 | 
			
		||||
                } else if (removeNoText) {
 | 
			
		||||
                    if (hasText) {
 | 
			
		||||
                        pagesToKeep.add(page);
 | 
			
		||||
                    }
 | 
			
		||||
                } else if (removeNoTextOrImages) {
 | 
			
		||||
                    if (hasText && hasImages) {
 | 
			
		||||
                        pagesToKeep.add(page);
 | 
			
		||||
                    }
 | 
			
		||||
                } else {
 | 
			
		||||
                    pagesToKeep.add(page);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            PDDocument outputDocument = new PDDocument();
 | 
			
		||||
            for (PDPage page : pagesToKeep) {
 | 
			
		||||
                outputDocument.addPage(page);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
 | 
			
		||||
            outputDocument.save(outputStream);
 | 
			
		||||
            outputDocument.close();
 | 
			
		||||
            document.close();
 | 
			
		||||
 | 
			
		||||
            return new ResponseEntity<>(outputStream.toByteArray(), HttpStatus.OK);
 | 
			
		||||
        } catch (IOException e) {
 | 
			
		||||
            e.printStackTrace();
 | 
			
		||||
            return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    private static boolean hasImagesOnPage(PDPage page) throws IOException {
 | 
			
		||||
        ImageFinder imageFinder = new ImageFinder(page);
 | 
			
		||||
        imageFinder.processPage(page);
 | 
			
		||||
        return imageFinder.hasImages();
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    // ... rest of the code (ImageFinder class and hasImagesOnPage method)
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										130
									
								
								src/main/java/stirling/software/SPDF/utils/ImageFinder.java
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										130
									
								
								src/main/java/stirling/software/SPDF/utils/ImageFinder.java
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,130 @@
 | 
			
		||||
package stirling.software.SPDF.utils;
 | 
			
		||||
 | 
			
		||||
import org.apache.pdfbox.contentstream.operator.Operator;
 | 
			
		||||
import org.apache.pdfbox.contentstream.operator.OperatorName;
 | 
			
		||||
import org.apache.pdfbox.cos.COSBase;
 | 
			
		||||
import org.apache.pdfbox.cos.COSName;
 | 
			
		||||
import org.apache.pdfbox.pdmodel.PDPage;
 | 
			
		||||
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
 | 
			
		||||
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
 | 
			
		||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
 | 
			
		||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
 | 
			
		||||
 | 
			
		||||
import java.awt.geom.Point2D;
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
public class ImageFinder extends org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine {
 | 
			
		||||
    private boolean hasImages = false;
 | 
			
		||||
 | 
			
		||||
    public ImageFinder(PDPage page) {
 | 
			
		||||
        super(page);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public boolean hasImages() {
 | 
			
		||||
        return hasImages;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Override
 | 
			
		||||
    protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
 | 
			
		||||
        String operation = operator.getName();
 | 
			
		||||
        if (operation.equals(OperatorName.DRAW_OBJECT)) {
 | 
			
		||||
            COSBase base = operands.get(0);
 | 
			
		||||
            if (base instanceof COSName) {
 | 
			
		||||
                COSName objectName = (COSName) base;
 | 
			
		||||
                PDXObject xobject = getResources().getXObject(objectName);
 | 
			
		||||
                if (xobject instanceof PDImageXObject) {
 | 
			
		||||
                    hasImages = true;
 | 
			
		||||
                } else if (xobject instanceof PDFormXObject) {
 | 
			
		||||
                    PDFormXObject form = (PDFormXObject) xobject;
 | 
			
		||||
                    ImageFinder innerFinder = new ImageFinder(getPage());
 | 
			
		||||
                    innerFinder.processPage(getPage());
 | 
			
		||||
                    if (innerFinder.hasImages()) {
 | 
			
		||||
                        hasImages = true;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        super.processOperator(operator, operands);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void drawImage(PDImage pdImage) throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void clip(int windingRule) throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void moveTo(float x, float y) throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void lineTo(float x, float y) throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public Point2D getCurrentPoint() throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		return null;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void closePath() throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void endPath() throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void strokePath() throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void fillPath(int windingRule) throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void fillAndStrokePath(int windingRule) throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void shadingFill(COSName shadingName) throws IOException {
 | 
			
		||||
		// TODO Auto-generated method stub
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
    // ... rest of the overridden methods
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user