Files
Stirling-PDF/docker/embedded/Dockerfile.fat
Balázs Szücs 9ac260ee92 feat(aot): add aot-diagnostics.sh for AOT cache diagnostics and validation (#5848)
# Description of Changes



This pull request makes significant improvements to the Docker build
process for the embedded Stirling-PDF image, focusing on build
efficiency, runtime optimization, and maintainability. Key changes
include upgrading major tool versions, introducing optional stripping of
Calibre's WebEngine to reduce image size, consolidating ImageMagick
layers, and refining the Python environment build process. The runtime
image is now leaner, with clearer separation between build and runtime
dependencies, and improved caching for faster builds and pulls.

**Build and Dependency Management Improvements**
* Upgraded Calibre to version `9.4.0` and added support for the
`TARGETPLATFORM` build argument for multi-platform builds.
* Added an optional `CALIBRE_STRIP_WEBENGINE` build argument to strip
Chromium/WebEngine from Calibre, saving ~80 MB when PDF output via
Calibre is not needed.
* Consolidated ImageMagick outputs into a single staging directory
(`/magick-export`) to reduce Docker layers and improve caching
efficiency.
* Refactored Python virtual environment build: now built in a dedicated
stage with pre-built wheels and copied into the runtime image,
eliminating the need for build tools and pip installs at runtime.

**Runtime Image Optimization**
* Reduced installed system packages to only what is needed at runtime;
Python build tools and dev packages are no longer included.
* Cleaned up unnecessary runtime files, including removal of build-only
Python artifacts and system headers, for a smaller and more secure
image.

**Layer and Copy Optimization**
* Switched to `COPY --link` for all major external tool layers and
application files, enabling independent layer caching and parallel pulls
for faster builds.

**Runtime Configuration and Health**
* Improved runtime directory structure and permissions, added persistent
cache directories for Project Leyden AOT, and wrote the version tag to
`/etc/stirling_version` for easier script access.
* Updated the healthcheck to wait longer for startup and increased
timeout/retries for more robust readiness detection.

<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Translations (if applicable)

- [ ] I ran
[`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
2026-03-03 19:06:46 +00:00

607 lines
27 KiB
Docker

# Stirling-PDF - Fat version (embedded frontend)
# Extra fonts for air-gapped environments
FROM ubuntu:noble AS calibre-build
ARG CALIBRE_VERSION=9.4.0
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates curl xz-utils libnss3 libfontconfig1 \
libgl1 libegl1 libdbus-1-3 libasound2t64 libxcomposite1 \
libxrandr2 libxkbcommon0 libxi6 libxtst6 libopengl0; \
rm -rf /var/lib/apt/lists/*; \
\
case "$(uname -m)" in \
x86_64) CALIBRE_ARCH="x86_64" ;; \
aarch64) CALIBRE_ARCH="arm64" ;; \
*) echo "Unsupported arch: $(uname -m)"; exit 1 ;; \
esac; \
\
curl -fsSL \
"https://download.calibre-ebook.com/${CALIBRE_VERSION}/calibre-${CALIBRE_VERSION}-${CALIBRE_ARCH}.txz" \
-o /tmp/calibre.txz; \
mkdir -p /opt/calibre; \
tar xJf /tmp/calibre.txz -C /opt/calibre; \
rm /tmp/calibre.txz; \
\
# Remove GUI-only shared libraries.
# Libs required by WebEngine PDF output are preserved.
rm -f /opt/calibre/lib/libQt6Designer* \
/opt/calibre/lib/libQt6Multimedia* \
/opt/calibre/lib/libQt6SpatialAudio.so.* \
/opt/calibre/lib/libQt6NetworkAuth.so.* \
/opt/calibre/lib/libQt6Concurrent.so.* \
/opt/calibre/lib/libQt6OpenGLWidgets.so.* \
/opt/calibre/lib/libQt6QuickWidgets.so.* \
# AV / multimedia
/opt/calibre/lib/libavcodec.so.* \
/opt/calibre/lib/libavfilter.so.* \
/opt/calibre/lib/libavformat.so.* \
/opt/calibre/lib/libavutil.so.* \
/opt/calibre/lib/libavdevice.so.* \
/opt/calibre/lib/libpostproc.so.* \
/opt/calibre/lib/libswresample.so.* \
/opt/calibre/lib/libswscale.so.* \
# Audio / speech / TTS
/opt/calibre/lib/libspeex.so.* \
/opt/calibre/lib/libFLAC.so.* \
/opt/calibre/lib/libopus.so.* \
/opt/calibre/lib/libvorbis*.so.* \
/opt/calibre/lib/libasyncns.so.* \
/opt/calibre/lib/libspeechd.so.* \
/opt/calibre/lib/libespeak-ng.so.* \
# Other unused libs
/opt/calibre/lib/libonnxruntime.so.* \
/opt/calibre/lib/libgio-2.0.so.* \
/opt/calibre/lib/libzstd.so.* \
/opt/calibre/lib/libhunspell-1.7.so.* \
/opt/calibre/lib/libbrotlienc.so.* \
/opt/calibre/lib/libbrotlicommon.so.* \
/opt/calibre/lib/libbrotlidec.so.* \
/opt/calibre/lib/libstemmer.so.* \
/opt/calibre/lib/libmtp.so.* \
/opt/calibre/lib/libncursesw.so.* \
/opt/calibre/lib/libchm.so.* \
/opt/calibre/lib/libgcrypt.so.* \
/opt/calibre/lib/libgpg-error.so.* \
/opt/calibre/lib/libicuio.so.* \
/opt/calibre/lib/libreadline.so.* \
/opt/calibre/lib/libusb-1.0.so.*; \
rm -rf /opt/calibre/lib/qt6/plugins/platformthemes \
/opt/calibre/lib/qt6/plugins/multimedia \
/opt/calibre/lib/qt6/plugins/designer \
/opt/calibre/lib/qt6/plugins/qmltooling; \
\
# Remove GUI executables but keep ebook-convert, ebook-meta, and calibre-parallel.
rm -f /opt/calibre/calibre \
/opt/calibre/calibre-server \
/opt/calibre/calibre-smtp \
/opt/calibre/calibre-debug \
/opt/calibre/calibre-customize \
/opt/calibre/calibredb \
/opt/calibre/ebook-viewer \
/opt/calibre/ebook-edit \
/opt/calibre/ebook-polish \
/opt/calibre/ebook-device \
/opt/calibre/fetch-ebook-metadata \
/opt/calibre/lrf2lrs \
/opt/calibre/lrs2lrf \
/opt/calibre/markdown-calibre \
/opt/calibre/web2disk; \
\
# Remove Python modules not needed for conversion.
rm -rf /opt/calibre/lib/calibre/gui2 \
/opt/calibre/lib/calibre/devices \
/opt/calibre/lib/calibre/library \
/opt/calibre/lib/calibre/db \
/opt/calibre/lib/calibre/srv \
/opt/calibre/lib/calibre/spell \
/opt/calibre/lib/calibre/live; \
\
# Remove resources not needed for CLI conversion.
rm -rf /opt/calibre/resources/images \
/opt/calibre/resources/icons \
/opt/calibre/resources/icons.rcc \
/opt/calibre/resources/content-server \
/opt/calibre/resources/editor* \
/opt/calibre/resources/viewer \
/opt/calibre/resources/viewer.js \
/opt/calibre/resources/viewer.html \
/opt/calibre/resources/recipes \
/opt/calibre/resources/dictionaries \
/opt/calibre/resources/hyphenation \
/opt/calibre/resources/catalog \
/opt/calibre/resources/calibre-mimetypes.xml \
/opt/calibre/resources/changelog.json \
/opt/calibre/resources/user-agent-data.json \
/opt/calibre/resources/builtin_recipes.zip \
/opt/calibre/resources/builtin_recipes.xml \
/opt/calibre/resources/builtin_recipes.xml \
/opt/calibre/resources/stylelint-bundle.min.js \
/opt/calibre/resources/stylelint.js \
/opt/calibre/resources/rapydscript \
/opt/calibre/resources/quick_start \
/opt/calibre/resources/piper-voices.json \
/opt/calibre/resources/images.qrc \
/opt/calibre/resources/mozilla-ca-certs.pem \
/opt/calibre/resources/ebook-convert-complete.calibre_msgpack \
/opt/calibre/resources/mathjax \
/opt/calibre/resources/common-english-words.txt \
/opt/calibre/resources/calibre-portable.sh \
/opt/calibre/resources/calibre-portable.bat \
/opt/calibre/resources/metadata_sqlite.sql \
/opt/calibre/resources/notes_sqlite.sql \
/opt/calibre/resources/fts_sqlite.sql \
/opt/calibre/resources/fts_triggers.sql \
/opt/calibre/resources/jacket \
/opt/calibre/resources/editor-functions.json \
/opt/calibre/resources/calibre-ebook-root-CA.crt \
/opt/calibre/resources/csscolorparser.js \
/opt/calibre/resources/lookup.js \
/opt/calibre/resources/pdf-mathjax-loader.js \
/opt/calibre/resources/scraper.js \
/opt/calibre/resources/toc.js \
/opt/calibre/resources/user-manual-translation-stats.json \
/opt/calibre/resources/pin-template.svg \
/opt/calibre/resources/scripts.calibre_msgpack \
/opt/calibre/lib/calibre/ebooks/docx/images \
/opt/calibre/share \
/opt/calibre/man; \
\
# Remove translations and localization while keeping required libraries.
rm -rf /opt/calibre/lib/qt6/translations; \
find /opt/calibre/translations -mindepth 1 -maxdepth 1 ! -name 'qtwebengine_locales' -exec rm -rf {} +; \
rm -rf /opt/calibre/resources/localization/locales.zip \
/opt/calibre/resources/localization/stats.calibre_msgpack \
/opt/calibre/resources/localization/website-languages.txt; \
find /opt/calibre/resources/localization -mindepth 1 -maxdepth 1 ! -name 'iso639.calibre_msgpack' -exec rm -rf {} +; \
\
# Strip debug symbols from calibre extension modules.
# Exclude Qt6 libs and all qt6/ subdirectory files to prevent Chromium renderer crashes.
find /opt/calibre/lib -name '*.so*' \
! -name 'libQt6*' \
! -path '*/qt6/*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
find /opt/calibre -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /opt/calibre -name '*.pyc' -delete 2>/dev/null || true; \
\
# Verify conversion functionality.
# NOTE: txt→epub used intentionally NOT txt→pdf.
# Calibre 7+ uses WebEngine (Chromium) for PDF output, which requires kernel
# capabilities unavailable in Docker RUN steps and segfaults under QEMU.
# epub output exercises the same Python/plugin stack without touching WebEngine.
/opt/calibre/ebook-convert --version; \
echo "Hello" > /tmp/test.txt; \
/opt/calibre/ebook-convert /tmp/test.txt /tmp/test.epub; \
rm -f /tmp/test.txt /tmp/test.epub; \
echo "=== Calibre stripped successfully ==="
# Build the Java application and frontend.
FROM gradle:9.3.1-jdk25 AS app-build
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update \
&& apt-get install -y --no-install-recommends curl ca-certificates \
&& update-ca-certificates \
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y --no-install-recommends nodejs \
&& rm -rf /var/lib/apt/lists/*
# JDK 25+: --add-exports is no longer accepted via JAVA_TOOL_OPTIONS; use JDK_JAVA_OPTIONS instead
ENV JDK_JAVA_OPTIONS="--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED"
WORKDIR /app
COPY build.gradle settings.gradle gradlew ./
COPY gradle/ gradle/
COPY app/core/build.gradle app/core/
COPY app/common/build.gradle app/common/
COPY app/proprietary/build.gradle app/proprietary/
# Use system gradle instead of gradlew to avoid SSL issues downloading gradle distribution on emulated arm64
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
gradle dependencies --no-daemon || true
COPY . .
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
DISABLE_ADDITIONAL_FEATURES=false \
gradle clean build \
-PbuildWithFrontend=true \
-x spotlessApply -x spotlessCheck -x test -x sonarqube \
--no-daemon
# Python Builder stage.
FROM ubuntu:noble AS python-build
ARG UNOSERVER_VERSION=3.6
ENV DEBIAN_FRONTEND=noninteractive
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && apt-get install -y --no-install-recommends \
python3 python3-venv python3-dev \
python3-packaging \
build-essential \
# Build dependencies for ocrmypdf/weasyprint/opencv
zlib1g-dev libjpeg-dev libffi-dev libpango1.0-dev \
&& rm -rf /var/lib/apt/lists/*
RUN python3 -m venv /opt/venv --system-site-packages
ENV PATH="/opt/venv/bin:$PATH"
# Build all heavy python packages here
RUN --mount=type=cache,target=/root/.cache/pip \
pip install \
weasyprint pdf2image opencv-python-headless ocrmypdf \
"unoserver==${UNOSERVER_VERSION}"
# Build Ghostscript 10.06.0 from source in an isolated stage (avoids library conflicts).
FROM ubuntu:noble AS gs-build
ARG GS_VERSION=10.06.0
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/gs-build,id=gs-build-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential curl ca-certificates libfontconfig1-dev && rm -rf /var/lib/apt/lists/* && \
GS_TAG="gs$(printf '%s' "${GS_VERSION}" | tr -d '.')" && \
cd /tmp/gs-build && \
rm -rf ghostscript-* && \
(test -d "ghostscript-${GS_VERSION}" || curl -fsSL "https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/${GS_TAG}/ghostscript-${GS_VERSION}.tar.gz" | tar xz) && \
cd "ghostscript-${GS_VERSION}" && \
./configure \
--prefix=/usr/local \
--without-x \
--disable-cups \
--disable-gtk && \
make -j"$(nproc)" && \
make install && \
cd ..
# Build PDF Tools (QPDF and ImageMagick 7).
FROM ubuntu:noble AS pdf-tools-build
ARG QPDF_VERSION=12.3.2
ARG IM_VERSION=7.1.2-13
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/pdf-tools-build,id=pdf-tools-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake libssl-dev libjpeg-dev zlib1g-dev curl ca-certificates pkg-config \
libpng-dev libtiff-dev libwebp-dev libxml2-dev libfreetype6-dev liblcms2-dev libzip-dev liblqr-1-0-dev \
libltdl-dev libtool && rm -rf /var/lib/apt/lists/* && \
cd /tmp/pdf-tools-build && \
rm -rf qpdf-* ImageMagick-* && \
# Build QPDF
(test -d "qpdf-${QPDF_VERSION}" || curl -fsSL "https://github.com/qpdf/qpdf/releases/download/v${QPDF_VERSION}/qpdf-${QPDF_VERSION}.tar.gz" | tar xz) && \
cd "qpdf-${QPDF_VERSION}" && \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DALLOW_CRYPTO_OPENSSL=ON -DDEFAULT_CRYPTO=openssl && \
cmake --build build --parallel "$(nproc)" && \
cmake --install build && \
cd .. && \
# Build ImageMagick 7
(test -d "ImageMagick-${IM_VERSION}" || curl -fsSL "https://github.com/ImageMagick/ImageMagick/archive/refs/tags/${IM_VERSION}.tar.gz" | tar xz) && \
cd "ImageMagick-${IM_VERSION}" && \
./configure --prefix=/usr/local --with-modules --with-perl=no --with-magick-plus-plus=no --with-quantum-depth=16 --disable-static --enable-shared && \
make -j"$(nproc)" && \
make install && \
# Enable PDF/PS/EPS in policy
sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="PS"/rights="read|write" pattern="PS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="EPS"/rights="read|write" pattern="EPS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
cd .. && \
ldconfig /usr/local/lib
# Final runtime image.
FROM eclipse-temurin:25-jre AS runtime
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
ENV DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
TESS_BASE_PATH=/usr/share/tesseract-ocr/5/tessdata
ARG UNOSERVER_VERSION=3.6
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/root/.cache/pip \
set -eux; \
apt-get update; \
# Add LibreOffice Fresh PPA for latest version (26.2.x)
apt-get install -y --no-install-recommends software-properties-common; \
add-apt-repository -y ppa:libreoffice/ppa; \
apt-get update; \
apt-get install -y --no-install-recommends \
# Core tools
ca-certificates tzdata tini bash fontconfig curl \
ffmpeg poppler-utils fontforge \
gosu unpaper pngquant \
fonts-liberation2 \
fonts-crosextra-caladea fonts-crosextra-carlito \
fonts-noto-core fonts-noto-mono fonts-noto-extra \
fonts-noto-cjk poppler-data \
fonts-freefont-ttf fonts-terminus \
# Python runtime & UNO bridge (python3-full -> python3 optimization)
python3 python3-uno python3-packaging \
# OCR
tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra \
tesseract-ocr-por tesseract-ocr-chi-sim \
# Graphics / AWT headless
libcairo2 libpango-1.0-0 libpangoft2-1.0-0 \
libfreetype6 libfontconfig1 libx11-6 libxt6 libxext6 libxrender1 \
libxtst6 libxi6 libxinerama1 libxkbcommon0 libsm6 libice6 \
# Qt/EGL for Calibre CLI
libegl1 libgl1 libopengl0 libdbus-1-3 libglib2.0-0 libnss3 \
libasound2t64 libxcomposite1 libxrandr2 \
# Virtual framebuffer (required for headless LibreOffice Impress/Draw)
xvfb x11-utils coreutils \
libreoffice-writer-nogui libreoffice-calc-nogui \
libreoffice-impress-nogui libreoffice-draw-nogui \
libreoffice-base-nogui libreoffice-java-common \
; \
\
# Fix LibreOffice UNO bridge and filter availability
libreoffice --version; \
soffice --version 2>/dev/null || true; \
# Rebuild UNO bridge type database
/usr/lib/libreoffice/program/soffice.bin --headless --convert-to pdf /dev/null 2>/dev/null || true; \
# Force font cache rebuild and verify filters are available
fc-cache -f -v 2>&1 | awk 'NR <= 20'; \
\
# Cleanup stage.
\
rm -rf /var/lib/apt/lists/*; \
\
# Docs / man / info / icons / themes / GUI assets (headless server)
rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/info/* \
/usr/share/lintian/* /usr/share/linda/* \
/usr/share/icons/* /usr/share/themes/* \
/usr/share/javascript/* \
/usr/share/gtk-3.0/* \
/usr/share/fontforge/pixmaps \
/usr/share/liblangtag/* \
/usr/share/tcltk/* \
/usr/share/python-wheels/*; \
\
# Clean up system locale data (LANG=C.UTF-8 doesn't use them)
find /usr/share/locale -mindepth 1 -maxdepth 1 -type d \
! -name 'en*' -exec rm -rf {} + 2>/dev/null || true; \
rm -rf /usr/share/i18n/locales /usr/share/i18n/charmaps; \
\
rm -rf /usr/lib/libreoffice/share/gallery \
/usr/lib/libreoffice/share/template \
/usr/lib/libreoffice/share/wizards \
/usr/lib/libreoffice/share/autotext \
/usr/lib/libreoffice/help \
/usr/lib/libreoffice/share/config/images_*.zip \
/usr/lib/libreoffice/share/basic \
/usr/lib/libreoffice/share/Scripts \
/usr/lib/libreoffice/share/autocorr \
/usr/lib/libreoffice/share/classification \
/usr/lib/libreoffice/share/wordbook \
/usr/lib/libreoffice/share/fingerprint \
/usr/lib/libreoffice/share/xdg \
/usr/lib/libreoffice/share/numbertext \
/usr/lib/libreoffice/share/shell \
/usr/lib/libreoffice/share/palette \
/usr/lib/libreoffice/share/theme_definitions \
/usr/lib/libreoffice/share/xslt \
/usr/lib/libreoffice/share/labels \
/usr/lib/libreoffice/share/dtd \
/usr/lib/libreoffice/share/tipoftheday \
/usr/lib/libreoffice/share/toolbarmode \
/usr/lib/libreoffice/share/psprint; \
\
# Preserving soffice.cfg because LibreOffice needs it to load documents.
\
\
\
find /usr/lib -name '*.so*' -type f \
-not -path '*/jvm/*' \
-not -path '*/libreoffice/*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
# Preserving ffmpeg codec libs as they are directly linked.
\
# Remove Mesa/LLVM GPU backends (~179 MB, not needed for headless/offscreen)
MULTIARCH_LIBDIR=$(dpkg-architecture -qDEB_HOST_MULTIARCH 2>/dev/null \
|| find /usr/lib -maxdepth 1 -type d -name '*-linux-gnu' | head -1); \
rm -f \
"${MULTIARCH_LIBDIR}"/libLLVM*.so* \
"${MULTIARCH_LIBDIR}"/libgallium*.so* \
2>/dev/null || true; \
\
# Python stdlib: remove unused modules (~71 MB)
rm -rf /usr/lib/python3.12/test \
/usr/lib/python3.12/idlelib \
/usr/lib/python3.12/tkinter \
/usr/lib/python3.12/lib2to3 \
/usr/lib/python3.12/pydoc_data; \
\
# System Python packages not needed at runtime (~153 MB)
rm -rf /usr/lib/python3/dist-packages/scipy \
/usr/lib/python3/dist-packages/sympy \
/usr/lib/python3/dist-packages/mpmath; \
\
# Duplicate system packages (superseded by venv versions, ~55 MB)
rm -rf /usr/lib/python3/dist-packages/numpy \
/usr/lib/python3/dist-packages/fontTools \
/usr/lib/python3/dist-packages/PIL; \
\
# System-wide Python cache cleanup
find /usr/lib/python3* -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /usr/lib/python3* \( -name '*.pyc' -o -name '*.pyi' \) \
-delete 2>/dev/null || true; \
\
# Additional metadata cleanup
# FIX: Only remove ImageMagick doc/www, NOT the whole dir (preserves policy.xml/delegates.xml)
rm -rf /usr/share/bug /usr/share/lintian /usr/share/linda \
/var/lib/dpkg/info/*.md5sums \
/var/log/dpkg.log /var/log/apt/* \
/usr/local/share/ghostscript/*/doc \
/usr/local/share/ghostscript/*/examples \
/usr/share/ImageMagick-*/doc \
/usr/share/ImageMagick-*/www; \
\
\
# NEW: Tesseract training configs (not needed for OCR, but keep configs/ for hocr/txt output)
rm -rf /usr/share/tesseract-ocr/*/tessdata/tessconfigs; \
\
# Trim CJK fonts to Regular weight only (FIX: Broadened path)
find /usr/share/fonts -name '*CJK*' \
! -name '*Regular*' -type f -delete 2>/dev/null || true; \
\
# Misc caches
rm -rf /var/cache/fontconfig/* /tmp/*
# Python virtual environment.
COPY --from=python-build /opt/venv /opt/venv
RUN set -eux; \
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
# Verify python libs are accessible
/opt/venv/bin/python -c "import cv2; import ocrmypdf; import weasyprint; print('Python libs verified')"; \
# Cleanup venv from builder leftovers
find /opt/venv -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /opt/venv \( -name '*.pyc' -o -name '*.pyi' \) -delete 2>/dev/null || true; \
rm -rf /opt/venv/lib/python*/site-packages/pip \
/opt/venv/lib/python*/site-packages/pip-*.dist-info \
/opt/venv/lib/python*/site-packages/setuptools \
/opt/venv/lib/python*/site-packages/setuptools-*.dist-info;
# Calibre and PDF Tools.
COPY --link --from=calibre-build /opt/calibre /opt/calibre
COPY --link --from=pdf-tools-build /usr/local/bin/qpdf /usr/bin/qpdf
COPY --link --from=pdf-tools-build /usr/local/bin/magick /usr/bin/magick
COPY --link --from=pdf-tools-build /usr/local/lib/libMagick* /usr/local/lib/
COPY --link --from=pdf-tools-build /usr/local/etc/ImageMagick-7 /usr/local/etc/ImageMagick-7
COPY --link --from=gs-build /usr/local/bin/gs /usr/local/bin/gs
COPY --link --from=gs-build /usr/local/share/ghostscript /usr/local/share/ghostscript
RUN set -eux; \
ldconfig /usr/local/lib; \
# Clean pycache that may have been generated during stage-1 verify
find /opt/calibre -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true;
# Non-root user.
ARG PUID=1000
ARG PGID=1000
RUN set -eux; \
if ! getent group stirlingpdfgroup >/dev/null 2>&1; then \
groupadd -g "${PGID}" stirlingpdfgroup 2>/dev/null \
|| groupadd stirlingpdfgroup; \
fi; \
if ! id -u stirlingpdfuser >/dev/null 2>&1; then \
useradd -m -u "${PUID}" -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser 2>/dev/null \
|| useradd -m -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser; \
fi; \
ln -sf /usr/sbin/gosu /usr/local/bin/su-exec
# Application files.
COPY --link --from=app-build --chown=1000:1000 \
/app/app/core/build/libs/*.jar /app.jar
COPY --link --from=app-build --chown=1000:1000 \
/app/build/libs/restart-helper.jar /restart-helper.jar
COPY --link --chown=1000:1000 scripts/ /scripts/
# Fonts go to system dir — root ownership is correct (world-readable)
COPY --link app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/truetype/
# Permissions and configuration.
RUN set -eux; \
ln -sf /opt/calibre/ebook-convert /usr/bin/ebook-convert; \
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
ln -sf /opt/venv/bin/ocrmypdf /usr/local/bin/ocrmypdf; \
ln -sf /opt/venv/bin/weasyprint /usr/local/bin/weasyprint; \
ln -sf /opt/venv/bin/unoping /usr/local/bin/unoping; \
chmod +x /scripts/*; \
mkdir -p /configs /logs /customFiles \
/pipeline/watchedFolders /pipeline/finishedFolders \
/tmp/stirling-pdf/heap_dumps; \
# Create symlinks to allow app to find these in /app/
mkdir -p /app; \
ln -s /logs /app/logs; \
ln -s /configs /app/configs; \
ln -s /customFiles /app/customFiles; \
ln -s /pipeline /app/pipeline; \
chown -R stirlingpdfuser:stirlingpdfgroup \
/home/stirlingpdfuser /configs /logs /customFiles /pipeline \
/tmp/stirling-pdf; \
chown -h stirlingpdfuser:stirlingpdfgroup /app/logs /app/configs /app/customFiles /app/pipeline; \
chown stirlingpdfuser:stirlingpdfgroup /app; \
chmod 1777 /tmp/stirling-pdf; \
fc-cache -f; \
# NOTE: Project Leyden AOT cache is generated in the background on first boot
# by init-without-ocr.sh. The cache is picked up on subsequent boots for
# 15-25% faster startup. See: JEP 483 + 514 + 515 (JDK 25).
\
# Clean Calibre pycache that may have been generated during stage-1 verify
find /opt/calibre -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true
# Environment variables.
ARG VERSION_TAG
ENV VERSION_TAG=$VERSION_TAG \
STIRLING_AOT_ENABLE="false" \
STIRLING_JVM_PROFILE="balanced" \
_JVM_OPTS_BALANCED="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=4m -XX:G1PeriodicGCInterval=60000 -XX:+UseStringDeduplication -XX:+UseCompactObjectHeaders -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
_JVM_OPTS_PERFORMANCE="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseShenandoahGC -XX:ShenandoahGCMode=generational -XX:+UseCompactObjectHeaders -XX:+UseStringDeduplication -XX:+AlwaysPreTouch -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
JAVA_CUSTOM_OPTS="" \
HOME=/home/stirlingpdfuser \
PUID=${PUID} \
PGID=${PGID} \
UMASK=022 \
FAT_DOCKER=true \
INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false \
PATH="/opt/venv/bin:${PATH}" \
UNO_PATH=/usr/lib/libreoffice/program \
LIBREOFFICE_BIN_PATH=/usr/lib/libreoffice/program/soffice.bin \
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
TMPDIR=/tmp/stirling-pdf \
TEMP=/tmp/stirling-pdf \
TMP=/tmp/stirling-pdf \
QTWEBENGINE_CHROMIUM_FLAGS="--no-sandbox --disable-gpu --disable-software-rasterizer" \
DBUS_SESSION_BUS_ADDRESS=/dev/null
# Metadata labels.
LABEL org.opencontainers.image.title="Stirling-PDF Fat" \
org.opencontainers.image.description="Fat version with extra fonts for air-gapped environments, includes Calibre, LibreOffice, Tesseract, OCRmyPDF" \
org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF" \
org.opencontainers.image.licenses="MIT" \
org.opencontainers.image.vendor="Stirling-Tools" \
org.opencontainers.image.url="https://www.stirlingpdf.com" \
org.opencontainers.image.documentation="https://docs.stirlingpdf.com" \
maintainer="Stirling-Tools" \
org.opencontainers.image.authors="Stirling-Tools" \
org.opencontainers.image.version="${VERSION_TAG}" \
org.opencontainers.image.keywords="PDF, manipulation, fat, air-gapped, API, Spring Boot, React"
EXPOSE 8080/tcp
STOPSIGNAL SIGTERM
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:8080/api/v1/info/status || exit 1
ENTRYPOINT ["tini", "--", "/scripts/init.sh"]
CMD []