# Main stage FROM ubuntu:latest AS base RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2 RUN add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr RUN apt-get update && \ apt-get install -y --no-install-recommends \ openjdk-17-jre \ libreoffice-core-nogui \ libreoffice-common \ libreoffice-writer-nogui \ libreoffice-calc-nogui \ libreoffice-impress-nogui \ python3-uno \ ghostscript \ python3-pip \ ocrmypdf \ unoconv && \ pip install --upgrade pip && \ pip install --no-cache-dir --user --upgrade ocrmypdf && \ pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1 \ pip install --no-cache-dir --upgrade ocrmypdf && \ pip install --no-cache-dir \ opencv-python-headless && \ rm -rf /var/lib/apt/lists/* && \ mkdir /usr/share/tesseract-ocr-original && \ cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \ rm -rf /usr/share/tesseract-ocr # Python packages stage FROM base AS python-packages # Install build tools and Python libraries RUN apt-get update && \ apt-get install -y --no-install-recommends \ build-essential \ libffi-dev \ libssl-dev \ zlib1g-dev \ libjpeg-dev # Final stage: Copy necessary files from the previous stage FROM base COPY --from=python-packages /usr/local /usr/local