mirror of
				https://github.com/Frooodle/Stirling-PDF.git
				synced 2025-10-20 11:16:24 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			50 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			50 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| # Main stage
 | |
| FROM ubuntu:latest AS base
 | |
| 
 | |
| 
 | |
| 
 | |
| # JDK for app
 | |
| RUN apt-get update && \
 | |
|     apt-get install -y --no-install-recommends \
 | |
|     	openjdk-17-jre
 | |
|     	
 | |
| # Doc conversion
 | |
| RUN apt-get update && \
 | |
|     apt-get install -y --no-install-recommends \
 | |
|     	libreoffice-core \
 | |
|         libreoffice-common \
 | |
|         libreoffice-writer \
 | |
|         libreoffice-calc \
 | |
|         libreoffice-impress \
 | |
| 		python3-uno \
 | |
| 		curl \
 | |
| 		unoconv
 | |
| 		
 | |
| 		
 | |
| # OCR MY PDF (unpaper for descew and other advanced featues)
 | |
| RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2 && \
 | |
| add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr && \
 | |
| apt-get update && \
 | |
|     apt-get install -y --no-install-recommends \
 | |
|         ghostscript \
 | |
|         python3-pip \
 | |
|         ocrmypdf \
 | |
|         unpaper && \
 | |
|     pip install --upgrade pip && \
 | |
|     pip install --no-cache-dir --upgrade ocrmypdf && \
 | |
|     pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1
 | |
|     
 | |
|     
 | |
| #CV and HTML   
 | |
| RUN pip install --no-cache-dir opencv-python-headless WeasyPrint 
 | |
| 
 | |
| 
 | |
| # cleanup and etc
 | |
| RUN rm -rf /var/lib/apt/lists/* && \
 | |
|     mkdir /usr/share/tesseract-ocr-original && \
 | |
|     cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \
 | |
|     rm -rf /usr/share/tesseract-ocr
 | |
| 
 | |
| 
 | |
| 
 | |
|      |