Base docker image (#5958)

Co-authored-by: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Anthony Stirling
2026-03-25 15:41:58 +00:00
committed by GitHub
parent bb43e9dcdf
commit 9500acd69f
22 changed files with 1102 additions and 2552 deletions

View File

@@ -1,157 +0,0 @@
# Unified Dockerfile - Frontend + Backend in single container
# Supports MODE parameter: BOTH (default), FRONTEND, BACKEND
# Stage 1: Build Frontend
FROM node:20-alpine@sha256:658d0f63e501824d6c23e06d4bb95c71e7d704537c9d9272f488ac03a370d448 AS frontend-build
WORKDIR /app
COPY frontend/package.json frontend/package-lock.json ./
RUN npm ci
COPY frontend .
# Override VITE_API_BASE_URL to use relative paths for production
# This ensures frontend works with nginx proxy setup
RUN DISABLE_ADDITIONAL_FEATURES=false VITE_API_BASE_URL=/ npm run build
# Stage 2: Build Backend (server-only JAR - no UI)
FROM gradle:8.14-jdk21@sha256:051d9a116793bdc5175a3f97a545718b750489eee85a7da20913c8a53f722a72 AS backend-build
COPY build.gradle .
COPY settings.gradle .
COPY gradlew .
COPY gradle gradle/
COPY app/core/build.gradle core/.
COPY app/common/build.gradle common/.
COPY app/proprietary/build.gradle proprietary/.
ENV JAVA_TOOL_OPTIONS="--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED"
RUN ./gradlew build -x spotlessApply -x spotlessCheck -x test -x sonarqube || return 0
WORKDIR /app
COPY . .
# Build server-only JAR (no frontend, includes security features controlled by DOCKER_ENABLE_SECURITY at runtime)
RUN DISABLE_ADDITIONAL_FEATURES=false \
./gradlew clean build -x spotlessApply -x spotlessCheck -x test -x sonarqube
# Stage 3: Final unified image
FROM alpine:3.22.1@sha256:4bcff63911fcb4448bd4fdacec207030997caf25e9bea4045fa6c8c44de311d1
ARG VERSION_TAG
# Labels
LABEL org.opencontainers.image.title="Stirling-PDF Unified"
LABEL org.opencontainers.image.description="Unified container for Stirling-PDF - Frontend + Backend with MODE parameter"
LABEL org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF"
LABEL org.opencontainers.image.licenses="MIT"
LABEL org.opencontainers.image.vendor="Stirling-Tools"
LABEL org.opencontainers.image.url="https://www.stirlingpdf.com"
LABEL org.opencontainers.image.documentation="https://docs.stirlingpdf.com"
LABEL maintainer="Stirling-Tools"
LABEL org.opencontainers.image.authors="Stirling-Tools"
LABEL org.opencontainers.image.version="${VERSION_TAG}"
LABEL org.opencontainers.image.keywords="PDF, manipulation, unified, API, Spring Boot, React"
# Copy backend files
COPY scripts /scripts
COPY app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto/
# Copy built JAR
# Use numeric UID:GID (1000:1000) since the named user doesn't exist yet at COPY time
COPY --from=backend-build --chown=1000:1000 \
/app/app/core/build/libs/*.jar app.jar
# Copy frontend files
COPY --from=frontend-build /app/dist /usr/share/nginx/html
# Copy nginx configuration
COPY docker/unified/nginx.conf /etc/nginx/nginx.conf
COPY docker/unified/entrypoint.sh /entrypoint.sh
# Environment Variables
ENV VERSION_TAG=$VERSION_TAG \
JAVA_BASE_OPTS="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UnlockExperimentalVMOptions -XX:MaxRAMPercentage=75 -XX:InitiatingHeapOccupancyPercent=20 -XX:+G1PeriodicGCInvokesConcurrent -XX:G1PeriodicGCInterval=10000 -XX:+UseStringDeduplication -XX:G1PeriodicGCSystemLoadThreshold=70" \
JAVA_CUSTOM_OPTS="" \
HOME=/home/stirlingpdfuser \
PUID=1000 \
PGID=1000 \
UMASK=022 \
PYTHONPATH=/usr/lib/libreoffice/program:/opt/venv/lib/python3.12/site-packages \
UNO_PATH=/usr/lib/libreoffice/program \
URE_BOOTSTRAP=file:///usr/lib/libreoffice/program/fundamentalrc \
PATH=$PATH:/opt/venv/bin \
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
TMPDIR=/tmp/stirling-pdf \
TEMP=/tmp/stirling-pdf \
TMP=/tmp/stirling-pdf \
MODE=BOTH \
BACKEND_INTERNAL_PORT=8081 \
VITE_API_BASE_URL=http://localhost:8080
# Install all dependencies
# Removed wasteful pip upgrade; chown moved to COPY above
RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
echo "@community https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
apk upgrade --no-cache -a && \
apk add --no-cache \
ca-certificates \
tzdata \
tini \
bash \
curl \
shadow \
su-exec \
openssl \
openssl-dev \
openjdk21-jre \
nginx \
# Doc conversion
gcompat \
libc6-compat \
libreoffice \
imagemagick \
# pdftohtml
poppler-utils \
# OCR MY PDF
unpaper \
tesseract-ocr-data-eng \
tesseract-ocr-data-chi_sim \
tesseract-ocr-data-deu \
tesseract-ocr-data-fra \
tesseract-ocr-data-por \
ocrmypdf \
# CV
py3-opencv \
python3 \
py3-pip \
py3-pillow@testing \
py3-pdf2image@testing && \
python3 -m venv /opt/venv && \
/opt/venv/bin/pip install --no-cache-dir unoserver weasyprint && \
ln -s /usr/lib/libreoffice/program/uno.py /opt/venv/lib/python3.12/site-packages/ && \
ln -s /usr/lib/libreoffice/program/unohelper.py /opt/venv/lib/python3.12/site-packages/ && \
ln -s /usr/lib/libreoffice/program /opt/venv/lib/python3.12/site-packages/LibreOffice && \
# Clean up pip + setuptools from venv
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true; \
rm -rf /opt/venv/lib/python*/site-packages/pip \
/opt/venv/lib/python*/site-packages/setuptools && \
mv /usr/share/tessdata /usr/share/tessdata-original && \
mkdir -p $HOME /configs /configs/heap_dumps /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders /tmp/stirling-pdf /pipeline/watchedFolders /pipeline/finishedFolders && \
mkdir -p /var/lib/nginx/tmp /var/log/nginx && \
fc-cache -f -v && \
chmod +x /scripts/* && \
chmod +x /entrypoint.sh && \
# User permissions
addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \
chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /pipeline /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /tmp/stirling-pdf /var/lib/nginx /var/log/nginx /usr/share/nginx
EXPOSE 8080/tcp
ENTRYPOINT ["tini", "--", "/entrypoint.sh"]

View File

@@ -1,119 +0,0 @@
# Unified Ultra-Lite Dockerfile - Frontend + Backend in single container with minimal dependencies
# Supports MODE parameter: BOTH (default), FRONTEND, BACKEND
# Stage 1: Build Frontend
FROM node:20-alpine@sha256:658d0f63e501824d6c23e06d4bb95c71e7d704537c9d9272f488ac03a370d448 AS frontend-build
WORKDIR /app
COPY frontend/package.json frontend/package-lock.json ./
RUN npm ci
COPY frontend .
# Override VITE_API_BASE_URL to use relative paths for production
# This ensures frontend works with nginx proxy setup
RUN DISABLE_ADDITIONAL_FEATURES=true VITE_API_BASE_URL=/ npm run build
# Stage 2: Build Backend
FROM gradle:8.14-jdk21@sha256:051d9a116793bdc5175a3f97a545718b750489eee85a7da20913c8a53f722a72 AS backend-build
COPY build.gradle .
COPY settings.gradle .
COPY gradlew .
COPY gradle gradle/
COPY app/core/build.gradle core/.
COPY app/common/build.gradle common/.
COPY app/proprietary/build.gradle proprietary/.
ENV JAVA_TOOL_OPTIONS="--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED"
RUN ./gradlew build -x spotlessApply -x spotlessCheck -x test -x sonarqube || return 0
WORKDIR /app
COPY . .
RUN DISABLE_ADDITIONAL_FEATURES=true \
./gradlew clean build -x spotlessApply -x spotlessCheck -x test -x sonarqube
# Stage 3: Final unified ultra-lite image
FROM alpine:3.22.1@sha256:4bcff63911fcb4448bd4fdacec207030997caf25e9bea4045fa6c8c44de311d1
ARG VERSION_TAG
# Labels
LABEL org.opencontainers.image.title="Stirling-PDF Unified Ultra-Lite"
LABEL org.opencontainers.image.description="Unified ultra-lite container for Stirling-PDF - Frontend + Backend with minimal dependencies"
LABEL org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF"
LABEL org.opencontainers.image.licenses="MIT"
LABEL org.opencontainers.image.vendor="Stirling-Tools"
LABEL org.opencontainers.image.url="https://www.stirlingpdf.com"
LABEL org.opencontainers.image.documentation="https://docs.stirlingpdf.com"
LABEL maintainer="Stirling-Tools"
LABEL org.opencontainers.image.authors="Stirling-Tools"
LABEL org.opencontainers.image.version="${VERSION_TAG}"
LABEL org.opencontainers.image.keywords="PDF, manipulation, unified, ultra-lite, API, Spring Boot, React"
# Copy backend files
COPY scripts/init-without-ocr.sh /scripts/init-without-ocr.sh
COPY scripts/installFonts.sh /scripts/installFonts.sh
# Copy built JAR
# Use numeric UID:GID (1000:1000) since the named user doesn't exist yet at COPY time
COPY --from=backend-build --chown=1000:1000 \
/app/app/core/build/libs/*.jar app.jar
# Copy frontend files
COPY --from=frontend-build /app/dist /usr/share/nginx/html
# Copy nginx configuration
COPY docker/unified/nginx.conf /etc/nginx/nginx.conf
COPY docker/unified/entrypoint.sh /entrypoint.sh
# Environment Variables
ENV DISABLE_ADDITIONAL_FEATURES=false \
VERSION_TAG=$VERSION_TAG \
JAVA_BASE_OPTS="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UnlockExperimentalVMOptions -XX:MaxRAMPercentage=75 -XX:InitiatingHeapOccupancyPercent=20 -XX:+G1PeriodicGCInvokesConcurrent -XX:G1PeriodicGCInterval=10000 -XX:+UseStringDeduplication -XX:G1PeriodicGCSystemLoadThreshold=70" \
JAVA_CUSTOM_OPTS="" \
HOME=/home/stirlingpdfuser \
PUID=1000 \
PGID=1000 \
UMASK=022 \
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
TMPDIR=/tmp/stirling-pdf \
TEMP=/tmp/stirling-pdf \
TMP=/tmp/stirling-pdf \
MODE=BOTH \
BACKEND_INTERNAL_PORT=8081 \
ENDPOINTS_GROUPS_TO_REMOVE=CLI
# Install minimal dependencies
# /app.jar chown moved to COPY above
RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
echo "@community https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
apk upgrade --no-cache -a && \
apk add --no-cache \
ca-certificates \
tzdata \
tini \
bash \
curl \
shadow \
su-exec \
openjdk21-jre \
nginx && \
mkdir -p $HOME /configs /configs/heap_dumps /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders /tmp/stirling-pdf /pipeline/watchedFolders /pipeline/finishedFolders && \
mkdir -p /usr/share/fonts/opentype/noto /var/lib/nginx/tmp /var/log/nginx && \
chmod +x /scripts/*.sh && \
chmod +x /entrypoint.sh && \
# User permissions
addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \
chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /pipeline /configs /customFiles /pipeline /tmp/stirling-pdf /var/lib/nginx /var/log/nginx /usr/share/nginx
EXPOSE 8080/tcp
ENTRYPOINT ["tini", "--", "/entrypoint.sh"]

658
docker/base/Dockerfile Normal file
View File

@@ -0,0 +1,658 @@
# Stirling-PDF Base Image
# Pre-built tools: Calibre, Ghostscript, QPDF, ImageMagick, Python venv with AI/OCR packages
# This image is reused by Dockerfile and Dockerfile.fat to avoid rebuilding tools on every app release
ARG TARGETPLATFORM
# Stage 1: Build and strip Calibre
FROM ubuntu:noble AS calibre-build
ARG TARGETPLATFORM
ARG CALIBRE_VERSION=9.4.0
ARG CALIBRE_STRIP_WEBENGINE=false
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates curl xz-utils libnss3 libfontconfig1 \
libgl1 libegl1 libdbus-1-3 libasound2t64 libxcomposite1 \
libxrandr2 libxkbcommon0 libxi6 libxtst6 libopengl0 \
poppler-utils; \
rm -rf /var/lib/apt/lists/*; \
\
case "$(uname -m)" in \
x86_64) CALIBRE_ARCH="x86_64" ;; \
aarch64) CALIBRE_ARCH="arm64" ;; \
*) echo "Unsupported arch: $(uname -m)"; exit 1 ;; \
esac; \
\
for attempt in 1 2 3; do \
echo "Downloading Calibre (attempt $attempt/3)..."; \
if curl -fsSL --max-time 300 --retry 3 --retry-delay 5 --retry-max-time 900 \
"https://download.calibre-ebook.com/${CALIBRE_VERSION}/calibre-${CALIBRE_VERSION}-${CALIBRE_ARCH}.txz" \
-o /tmp/calibre.txz; then \
break; \
fi; \
if [ $attempt -lt 3 ]; then sleep 10; fi; \
done; \
mkdir -p /opt/calibre; \
tar xJf /tmp/calibre.txz -C /opt/calibre; \
rm /tmp/calibre.txz; \
\
# We only need Qt6 WebEngine (Chromium) for ebook->PDF output.
# PDF INPUT now uses the pdftohtml engine (poppler), not Qt.
rm -f /opt/calibre/lib/libQt6Designer* \
/opt/calibre/lib/libQt6Multimedia* \
/opt/calibre/lib/libQt6SpatialAudio.so.* \
/opt/calibre/lib/libQt6NetworkAuth.so.* \
/opt/calibre/lib/libQt6Concurrent.so.* \
/opt/calibre/lib/libQt6OpenGLWidgets.so.* \
/opt/calibre/lib/libQt6QuickWidgets.so.* \
/opt/calibre/lib/libQt6Svg.so.* \
/opt/calibre/lib/libQt6SvgWidgets.so.* \
/opt/calibre/lib/libQt6Pdf*.so.* \
/opt/calibre/lib/libQt6ShaderTools.so.* \
/opt/calibre/lib/libQt6SerialPort.so.* \
/opt/calibre/lib/libQt6Sensors.so.* \
/opt/calibre/lib/libQt6Test.so.* \
/opt/calibre/lib/libQt6Sql.so.* \
/opt/calibre/lib/libQt6RemoteObjects.so.* \
/opt/calibre/lib/libQt6Help.so.* \
/opt/calibre/lib/libQt6VirtualKeyboard.so.* \
/opt/calibre/lib/libQt6WaylandClient.so.* \
/opt/calibre/lib/libQt6WaylandCompositor.so.* \
/opt/calibre/lib/libQt6Bluetooth.so.* \
/opt/calibre/lib/libQt6Nfc.so.* \
/opt/calibre/lib/libQt6Charts.so.* \
/opt/calibre/lib/libQt6DataVisualization.so.* \
/opt/calibre/lib/libQt6Scxml.so.* \
/opt/calibre/lib/libQt6StateMachine.so.* \
/opt/calibre/lib/libQt6TextToSpeech.so.* \
/opt/calibre/lib/libQt63D*.so.* \
/opt/calibre/lib/libavcodec.so.* \
/opt/calibre/lib/libavfilter.so.* \
/opt/calibre/lib/libavformat.so.* \
/opt/calibre/lib/libavutil.so.* \
/opt/calibre/lib/libavdevice.so.* \
/opt/calibre/lib/libpostproc.so.* \
/opt/calibre/lib/libswresample.so.* \
/opt/calibre/lib/libswscale.so.* \
/opt/calibre/lib/libspeex.so.* \
/opt/calibre/lib/libFLAC.so.* \
/opt/calibre/lib/libopus.so.* \
/opt/calibre/lib/libvorbis*.so.* \
/opt/calibre/lib/libasyncns.so.* \
/opt/calibre/lib/libspeechd.so.* \
/opt/calibre/lib/libespeak-ng.so.* \
/opt/calibre/lib/libonnxruntime.so.* \
/opt/calibre/lib/libgio-2.0.so.* \
/opt/calibre/lib/libzstd.so.* \
/opt/calibre/lib/libhunspell-1.7.so.* \
/opt/calibre/lib/libbrotlienc.so.* \
/opt/calibre/lib/libbrotlicommon.so.* \
/opt/calibre/lib/libbrotlidec.so.* \
/opt/calibre/lib/libstemmer.so.* \
/opt/calibre/lib/libmtp.so.* \
/opt/calibre/lib/libncursesw.so.* \
/opt/calibre/lib/libchm.so.* \
/opt/calibre/lib/libgcrypt.so.* \
/opt/calibre/lib/libgpg-error.so.* \
/opt/calibre/lib/libicuio.so.* \
/opt/calibre/lib/libreadline.so.* \
/opt/calibre/lib/libusb-1.0.so.* \
/opt/calibre/lib/libpulse*.so.* \
/opt/calibre/lib/libsndfile.so.* \
/opt/calibre/lib/libmpv.so.* \
/opt/calibre/lib/libass.so.* \
/opt/calibre/lib/librubberband.so.* \
/opt/calibre/lib/libsamplerate.so.*; \
rm -rf /opt/calibre/lib/qt6/plugins/platformthemes \
/opt/calibre/lib/qt6/plugins/multimedia \
/opt/calibre/lib/qt6/plugins/designer \
/opt/calibre/lib/qt6/plugins/virtualkeyboard \
/opt/calibre/lib/qt6/plugins/wayland* \
/opt/calibre/lib/qt6/plugins/texttospeech \
/opt/calibre/lib/qt6/plugins/position \
/opt/calibre/lib/qt6/plugins/sensors \
/opt/calibre/lib/qt6/plugins/sqldrivers \
/opt/calibre/lib/qt6/plugins/canbus \
/opt/calibre/lib/qt6/plugins/sceneparsers \
/opt/calibre/lib/qt6/plugins/renderers \
/opt/calibre/lib/qt6/plugins/geometryloaders \
/opt/calibre/lib/qt6/plugins/generic \
/opt/calibre/lib/qt6/plugins/qmltooling \
/opt/calibre/lib/qt6/libexec/QtWebEngineProcess.bak; \
rm -rf /opt/calibre/plugins/sqldrivers \
/opt/calibre/plugins/multimedia \
/opt/calibre/plugins/wayland-shell-integration \
/opt/calibre/plugins/wayland-graphics-integration-client \
/opt/calibre/plugins/wayland-decoration-client \
/opt/calibre/plugins/texttospeech \
/opt/calibre/plugins/platformthemes \
/opt/calibre/plugins/platforminputcontexts \
/opt/calibre/plugins/egldeviceintegrations \
/opt/calibre/plugins/iconengines; \
\
# Remove GUI executables but keep ebook-convert, ebook-meta, and calibre-parallel.
rm -f /opt/calibre/calibre \
/opt/calibre/calibre-server \
/opt/calibre/calibre-smtp \
/opt/calibre/calibre-debug \
/opt/calibre/calibre-customize \
/opt/calibre/calibredb \
/opt/calibre/ebook-viewer \
/opt/calibre/ebook-edit \
/opt/calibre/ebook-polish \
/opt/calibre/ebook-device \
/opt/calibre/fetch-ebook-metadata \
/opt/calibre/lrf2lrs \
/opt/calibre/lrs2lrf \
/opt/calibre/markdown-calibre \
/opt/calibre/web2disk; \
\
# Remove Python modules not needed for conversion.
rm -rf /opt/calibre/lib/calibre/gui2 \
/opt/calibre/lib/calibre/devices \
/opt/calibre/lib/calibre/library \
/opt/calibre/lib/calibre/db \
/opt/calibre/lib/calibre/srv \
/opt/calibre/lib/calibre/spell \
/opt/calibre/lib/calibre/live \
/opt/calibre/lib/calibre/utils/piper \
/opt/calibre/lib/calibre/utils/certgen.so \
/opt/calibre/lib/calibre/utils/https \
/opt/calibre/lib/calibre/utils/mdns; \
\
# Remove resources not needed for CLI conversion.
rm -rf /opt/calibre/resources/images \
/opt/calibre/resources/icons \
/opt/calibre/resources/icons.rcc \
/opt/calibre/resources/content-server \
/opt/calibre/resources/editor* \
/opt/calibre/resources/viewer \
/opt/calibre/resources/viewer.js \
/opt/calibre/resources/viewer.html \
/opt/calibre/resources/recipes \
/opt/calibre/resources/dictionaries \
/opt/calibre/resources/hyphenation \
/opt/calibre/resources/catalog \
/opt/calibre/resources/calibre-mimetypes.xml \
/opt/calibre/resources/changelog.json \
/opt/calibre/resources/user-agent-data.json \
/opt/calibre/resources/builtin_recipes.zip \
/opt/calibre/resources/builtin_recipes.xml \
/opt/calibre/resources/stylelint-bundle.min.js \
/opt/calibre/resources/stylelint.js \
/opt/calibre/resources/rapydscript \
/opt/calibre/resources/quick_start \
/opt/calibre/resources/piper-voices.json \
/opt/calibre/resources/images.qrc \
/opt/calibre/resources/mozilla-ca-certs.pem \
/opt/calibre/resources/ebook-convert-complete.calibre_msgpack \
/opt/calibre/resources/mathjax \
/opt/calibre/resources/common-english-words.txt \
/opt/calibre/resources/calibre-portable.sh \
/opt/calibre/resources/calibre-portable.bat \
/opt/calibre/resources/metadata_sqlite.sql \
/opt/calibre/resources/notes_sqlite.sql \
/opt/calibre/resources/fts_sqlite.sql \
/opt/calibre/resources/fts_triggers.sql \
/opt/calibre/resources/jacket \
/opt/calibre/resources/editor-functions.json \
/opt/calibre/resources/calibre-ebook-root-CA.crt \
/opt/calibre/resources/csscolorparser.js \
/opt/calibre/resources/lookup.js \
/opt/calibre/resources/pdf-mathjax-loader.js \
/opt/calibre/resources/scraper.js \
/opt/calibre/resources/toc.js \
/opt/calibre/resources/user-manual-translation-stats.json \
/opt/calibre/resources/pin-template.svg \
/opt/calibre/resources/scripts.calibre_msgpack \
/opt/calibre/resources/fonts \
/opt/calibre/resources/qtwebengine_devtools_resources.pak \
/opt/calibre/lib/calibre/ebooks/docx/images \
/opt/calibre/share \
/opt/calibre/man; \
\
# Remove translations and localization while keeping required libraries.
# Keep iso639.calibre_msgpack (required)
# Keep only en-US.pak from qtwebengine_locales (required for WebEngine)
rm -rf /opt/calibre/lib/qt6/translations; \
find /opt/calibre/translations -mindepth 1 -maxdepth 1 ! -name 'qtwebengine_locales' -exec rm -rf {} +; \
find /opt/calibre/translations/qtwebengine_locales -type f ! -name 'en-US.pak' -delete 2>/dev/null || true; \
if [ -d /opt/calibre/resources/localization ]; then \
rm -rf /opt/calibre/resources/localization/locales.zip \
/opt/calibre/resources/localization/stats.calibre_msgpack \
/opt/calibre/resources/localization/website-languages.txt; \
find /opt/calibre/resources/localization -mindepth 1 -maxdepth 1 ! -name 'iso639.calibre_msgpack' -exec rm -rf {} +; \
fi; \
\
# Strip debug symbols from calibre extension modules.
# Exclude Qt6 libs: libQt6WebEngineCore and friends embed Chromium V8 JIT code
# and internal resource blobs that strip corrupts, causing segfaults at render time.
find /opt/calibre/lib -name '*.so*' \
! -name 'libQt6*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
# Remove Python bytecode caches.
find /opt/calibre -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /opt/calibre -name '*.pyc' -delete 2>/dev/null || true; \
\
# Verify conversion still works
# NOTE: txt->epub used intentionally NOT txt->pdf.
# Calibre 7+ uses WebEngine (Chromium) for PDF output, which requires kernel
# capabilities unavailable in Docker RUN steps and segfaults under QEMU.
# epub output exercises the same Python/plugin stack without touching WebEngine.
/opt/calibre/ebook-convert --version; \
echo "Hello" > /tmp/test.txt; \
/opt/calibre/ebook-convert /tmp/test.txt /tmp/test.epub; \
rm -f /tmp/test.txt /tmp/test.epub; \
\
# Verify pdftohtml (poppler) is available for the pdftohtml PDF engine.
pdftohtml -v >/dev/null 2>&1 && echo "pdftohtml OK" || { echo "ERROR: pdftohtml not found"; exit 1; }; \
echo "=== Calibre stripped successfully ==="
# Optional: strip Chromium/WebEngine (~80 MB savings) when PDF output via Calibre is not needed.
# Build with --build-arg CALIBRE_STRIP_WEBENGINE=true to enable.
RUN if [ "${CALIBRE_STRIP_WEBENGINE}" = "true" ]; then \
echo "Stripping Calibre WebEngine (Chromium), PDF output via Calibre will be disabled"; \
rm -rf /opt/calibre/lib/qt6/libexec/QtWebEngineProcess \
/opt/calibre/lib/qt6/resources \
/opt/calibre/lib/libQt6WebEngine*.so.* \
/opt/calibre/lib/libQt6Quick*.so.* \
/opt/calibre/lib/libQt6Qml*.so.* \
/opt/calibre/translations/qtwebengine_locales 2>/dev/null || true; \
echo "WebEngine stripped, Calibre PDF output disabled"; \
else \
echo "CALIBRE_STRIP_WEBENGINE=false, keeping WebEngine for PDF output"; \
fi
# Stage 2: Build Ghostscript from source
FROM ubuntu:noble AS gs-build
ARG TARGETPLATFORM
ARG GS_VERSION=10.06.0
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/gs-build,id=gs-build-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential curl ca-certificates libfontconfig1-dev && rm -rf /var/lib/apt/lists/* && \
GS_TAG="gs$(printf '%s' "${GS_VERSION}" | tr -d '.')" && \
cd /tmp/gs-build && \
rm -rf ghostscript-* && \
(test -d "ghostscript-${GS_VERSION}" || curl -fsSL --max-time 300 --retry 3 --retry-delay 5 "https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/${GS_TAG}/ghostscript-${GS_VERSION}.tar.gz" | tar xz) && \
cd "ghostscript-${GS_VERSION}" && \
./configure \
--prefix=/usr/local \
--without-x \
--disable-cups \
--disable-gtk && \
make -j"$(nproc)" && \
make install && \
cd ..
# Stage 3: Build PDF Tools (QPDF and ImageMagick 7)
FROM ubuntu:noble AS pdf-tools-build
ARG TARGETPLATFORM
ARG QPDF_VERSION=12.3.2
ARG IM_VERSION=7.1.2-13
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/pdf-tools-build,id=pdf-tools-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake libssl-dev libjpeg-dev zlib1g-dev curl ca-certificates pkg-config \
libpng-dev libtiff-dev libwebp-dev libxml2-dev libfreetype6-dev liblcms2-dev libzip-dev liblqr-1-0-dev \
libltdl-dev libtool && rm -rf /var/lib/apt/lists/* && \
cd /tmp/pdf-tools-build && \
rm -rf qpdf-* ImageMagick-* && \
# Build QPDF
(test -d "qpdf-${QPDF_VERSION}" || curl -fsSL --max-time 300 --retry 3 --retry-delay 5 "https://github.com/qpdf/qpdf/releases/download/v${QPDF_VERSION}/qpdf-${QPDF_VERSION}.tar.gz" | tar xz) && \
cd "qpdf-${QPDF_VERSION}" && \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DALLOW_CRYPTO_OPENSSL=ON -DDEFAULT_CRYPTO=openssl && \
cmake --build build --parallel "$(nproc)" && \
cmake --install build --strip && \
cd .. && \
# Build ImageMagick 7
(test -d "ImageMagick-${IM_VERSION}" || curl -fsSL --max-time 300 --retry 3 --retry-delay 5 "https://github.com/ImageMagick/ImageMagick/archive/refs/tags/${IM_VERSION}.tar.gz" | tar xz) && \
cd "ImageMagick-${IM_VERSION}" && \
./configure --prefix=/usr/local --with-modules --with-perl=no --with-magick-plus-plus=no --with-quantum-depth=16 --disable-static --enable-shared && \
make -j"$(nproc)" && \
make install-strip && \
# Enable PDF/PS/EPS in policy
sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="PS"/rights="read|write" pattern="PS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="EPS"/rights="read|write" pattern="EPS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
cd .. && \
ldconfig /usr/local/lib
# Stage ImageMagick outputs into a single directory so runtime can import them with one COPY
# (reduces 4 separate COPY layers to 1 independent --link layer).
RUN mkdir -p /magick-export/usr/bin \
/magick-export/usr/local/lib \
/magick-export/usr/local/etc && \
cp /usr/local/bin/magick /magick-export/usr/bin/ && \
cp -a /usr/local/lib/libMagick*.so* /magick-export/usr/local/lib/ && \
cp -a /usr/local/lib/ImageMagick-7* /magick-export/usr/local/lib/ && \
cp -a /usr/local/etc/ImageMagick-7 /magick-export/usr/local/etc/
# Stage 4: Build Python venv
FROM ubuntu:noble AS python-venv-build
ARG TARGETPLATFORM
ARG UNOSERVER_VERSION=3.6
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && apt-get install -y --no-install-recommends \
python3 python3-venv ca-certificates binutils && \
rm -rf /var/lib/apt/lists/*
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
python3 -m venv /opt/venv --system-site-packages && \
/opt/venv/bin/pip install --no-cache-dir --prefer-binary \
weasyprint pdf2image opencv-python-headless ocrmypdf \
cryptography \
"unoserver==${UNOSERVER_VERSION}" && \
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
find /opt/venv \( -name '*.pyc' -o -name '*.pyi' \) -delete 2>/dev/null || true && \
rm -rf /opt/venv/lib/python*/site-packages/pip \
/opt/venv/lib/python*/site-packages/pip-*.dist-info \
/opt/venv/lib/python*/site-packages/setuptools \
/opt/venv/lib/python*/site-packages/setuptools-*.dist-info && \
find /opt/venv -name '*.so' -exec strip --strip-unneeded {} + 2>/dev/null || true
# Final runtime image - the actual base image
FROM eclipse-temurin:25-jre-noble AS runtime
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
ENV DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
TESS_BASE_PATH=/usr/share/tesseract-ocr/5/tessdata
ARG UNOSERVER_VERSION=3.6
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
apt-get update; \
# Add LibreOffice Fresh PPA for latest version (26.2.x)
apt-get install -y --no-install-recommends software-properties-common; \
add-apt-repository -y ppa:libreoffice/ppa; \
apt-get update; \
apt-get install -y --no-install-recommends \
# Core tools
ca-certificates tzdata tini bash fontconfig curl \
ffmpeg poppler-utils fontforge \
gosu unpaper pngquant \
# Fonts: full coverage for standard + fat variants
fonts-dejavu \
fonts-liberation2 \
fonts-crosextra-caladea fonts-crosextra-carlito \
fonts-noto-core fonts-noto-mono fonts-noto-extra \
fonts-noto-cjk poppler-data \
fonts-freefont-ttf fonts-terminus \
# Python runtime & UNO bridge
python3 python3-uno python3-packaging \
# OCR
tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra \
tesseract-ocr-por tesseract-ocr-chi-sim \
# Tesseract OSD for orientation detection
tesseract-ocr-osd \
# Graphics / AWT headless
libcairo2 libpango-1.0-0 libpangoft2-1.0-0 libgdk-pixbuf-2.0-0 \
libfreetype6 libfontconfig1 libx11-6 libxt6 libxext6 libxrender1 \
libxtst6 libxi6 libxinerama1 libxkbcommon0 libsm6 libice6 \
# Qt/EGL for Calibre CLI
libegl1 libgl1 libopengl0 libdbus-1-3 libglib2.0-0 libnss3 \
libasound2t64 libxcomposite1 libxrandr2 \
# Virtual framebuffer (required for headless LibreOffice Impress/Draw)
xvfb x11-utils coreutils \
libreoffice-writer-nogui libreoffice-calc-nogui \
libreoffice-impress-nogui libreoffice-draw-nogui \
libreoffice-base-nogui libreoffice-java-common \
; \
\
# Verify and fix LibreOffice
libreoffice --version; \
soffice --version 2>/dev/null || true; \
# Rebuild UNO bridge type database
/usr/lib/libreoffice/program/soffice.bin --headless --convert-to pdf /dev/null 2>/dev/null || true; \
# Force font cache rebuild
fc-cache -f -v 2>&1 | awk 'NR <= 20'; \
\
# Cleanup stage.
\
# Remove PPA helper, no longer needed after apt-get update
apt-get remove --purge -y software-properties-common || true; \
apt-get autoremove --purge -y || true; \
rm -rf /var/lib/apt/lists/*; \
\
# Docs / man / info / icons / themes / GUI assets (headless server)
rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/info/* \
/usr/share/lintian/* /usr/share/linda/* \
/usr/share/icons/* /usr/share/themes/* \
/usr/share/javascript/* \
/usr/share/gtk-3.0/* \
/usr/share/fontforge/pixmaps \
/usr/share/fontforge/osx \
/usr/share/fontforge/cidmap \
/usr/share/fontforge/prefs \
/usr/share/liblangtag/* \
/usr/share/tcltk/* \
/usr/share/python-wheels/* \
/usr/share/glib-2.0/schemas/* \
/usr/share/mime/* \
/usr/share/xml/iso-codes \
/usr/share/GConf \
/usr/share/bash-completion \
/usr/share/zsh \
/usr/share/libmysofa \
/usr/share/alsa \
/usr/share/iso-codes \
/usr/share/perl5 \
/usr/share/libthai \
/usr/share/libexttextcat \
/usr/share/openal \
/usr/share/gcc; \
\
find /usr/share/locale -mindepth 1 -maxdepth 1 -type d \
! -name 'en*' -exec rm -rf {} + 2>/dev/null || true; \
rm -rf /usr/share/i18n/locales /usr/share/i18n/charmaps; \
\
rm -rf /usr/lib/libreoffice/share/gallery \
/usr/lib/libreoffice/share/template \
/usr/lib/libreoffice/share/wizards \
/usr/lib/libreoffice/share/autotext \
/usr/lib/libreoffice/help \
/usr/lib/libreoffice/share/config/images_*.zip \
/usr/lib/libreoffice/share/basic \
/usr/lib/libreoffice/share/Scripts \
/usr/lib/libreoffice/share/autocorr \
/usr/lib/libreoffice/share/classification \
/usr/lib/libreoffice/share/wordbook \
/usr/lib/libreoffice/share/fingerprint \
/usr/lib/libreoffice/share/xdg \
/usr/lib/libreoffice/share/numbertext \
/usr/lib/libreoffice/share/shell \
/usr/lib/libreoffice/share/palette \
/usr/lib/libreoffice/share/theme_definitions \
/usr/lib/libreoffice/share/xslt \
/usr/lib/libreoffice/share/labels \
/usr/lib/libreoffice/share/dtd \
/usr/lib/libreoffice/share/tipoftheday \
/usr/lib/libreoffice/share/toolbarmode \
/usr/lib/libreoffice/share/psprint \
/usr/lib/libreoffice/CREDITS.fodt \
/usr/lib/libreoffice/LICENSE.html; \
\
# Remove unused LO extensions (GUI-only; not needed for document conversion)
rm -rf /usr/lib/libreoffice/share/extensions/wiki-publisher \
/usr/lib/libreoffice/share/extensions/nlpsolver \
/usr/lib/libreoffice/share/extensions/dict-* 2>/dev/null || true; \
# Remove LO database components (LO Base; not needed for Writer/Calc/Impress conversion)
rm -rf /usr/lib/libreoffice/program/libdba* \
/usr/lib/libreoffice/program/libdbahsql* \
/usr/lib/libreoffice/program/libdbu* \
/usr/lib/libreoffice/program/libreport* 2>/dev/null || true; \
\
rm -rf /usr/lib/python3.12/test \
/usr/lib/python3.12/idlelib \
/usr/lib/python3.12/tkinter \
/usr/lib/python3.12/lib2to3 \
/usr/lib/python3.12/pydoc_data; \
\
rm -rf /usr/lib/python3/dist-packages/scipy \
/usr/lib/python3/dist-packages/sympy \
/usr/lib/python3/dist-packages/mpmath; \
\
rm -rf \
/usr/lib/python3/dist-packages/cffi \
/usr/lib/python3/dist-packages/cffi-*.dist-info \
/usr/lib/python3/dist-packages/_cffi_backend*.so \
/usr/lib/python3/dist-packages/_cffi_backend*.cpython*.so \
2>/dev/null || true; \
\
# Strip debug symbols from ALL shared libraries
find /usr/lib -name '*.so*' -type f \
-not -path '*/jvm/*' \
-not -path '*/libreoffice/*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
# Remove GPU backends not needed for headless operation.
MULTIARCH_LIBDIR=$(dpkg-architecture -qDEB_HOST_MULTIARCH 2>/dev/null \
|| find /usr/lib -maxdepth 1 -type d -name '*-linux-gnu' | head -1); \
rm -f \
"${MULTIARCH_LIBDIR}"/libLLVM*.so* \
"${MULTIARCH_LIBDIR}"/libgallium*.so* \
2>/dev/null || true; \
\
find /usr/lib/python3* -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /usr/lib/python3* \( -name '*.pyc' -o -name '*.pyi' \) \
-delete 2>/dev/null || true; \
\
rm -rf /usr/share/bug /usr/share/lintian /usr/share/linda \
/var/lib/dpkg/info/*.list \
/var/lib/dpkg/info/*.md5sums \
/var/lib/dpkg/info/*.conffiles \
/var/lib/dpkg/info/*.postinst \
/var/lib/dpkg/info/*.preinst \
/var/lib/dpkg/info/*.prerm \
/var/lib/dpkg/info/*.postrm \
/var/lib/dpkg/info/*.triggers \
/var/lib/dpkg/info/*.shlibs \
/var/lib/dpkg/info/*.symbols \
/var/lib/dpkg/info/*.templates \
/var/log/dpkg.log /var/log/apt/* \
/usr/local/share/ghostscript/*/doc \
/usr/local/share/ghostscript/*/examples \
/usr/share/ImageMagick-*/doc \
/usr/share/ImageMagick-*/www; \
\
# Tesseract training configs (not needed for OCR, but keep configs/ for hocr/txt output)
rm -rf /usr/share/tesseract-ocr/*/tessdata/tessconfigs; \
\
# Noto fonts: keep Regular weight only (~370MB savings)
find /usr/share/fonts/truetype/noto -type f \
! -name '*Regular*' -delete 2>/dev/null || true; \
find /usr/share/fonts/opentype -type f \
! -name '*Regular*' -delete 2>/dev/null || true; \
# DejaVu: keep Regular and Bold only
find /usr/share/fonts/truetype -name '*DejaVu*' \
! -name '*-Regular*' ! -name '*-Bold*' ! -name '*Bold.ttf' \
! -name 'DejaVuSans.ttf' ! -name 'DejaVuSerif.ttf' ! -name 'DejaVuSansMono.ttf' \
-type f -delete 2>/dev/null || true; \
# Remove empty font directories after cleanup
find /usr/share/fonts -type d -empty -delete 2>/dev/null || true; \
\
# gconv: keep only essential charset conversion modules
GCONV_DIR=$(find /usr/lib -type d -name gconv 2>/dev/null | head -1); \
if [ -n "$GCONV_DIR" ] && [ -d "$GCONV_DIR" ]; then \
mkdir -p /tmp/gconv-keep; \
for mod in UTF-8.so UTF-16.so UTF-32.so UTF-7.so \
ISO8859-1.so ISO8859-15.so ISO8859-2.so ISO8859-9.so \
UNICODE.so CP1252.so CP1251.so CP1250.so \
EUC-JP.so EUC-KR.so EUC-CN.so \
SHIFT_JIS.so GB18030.so BIG5.so \
gconv-modules gconv-modules.d gconv-modules.cache; do \
[ -e "$GCONV_DIR/$mod" ] && \
cp -a "$GCONV_DIR/$mod" /tmp/gconv-keep/ 2>/dev/null || true; \
done; \
rm -rf "$GCONV_DIR"/*; \
cp -a /tmp/gconv-keep/* "$GCONV_DIR/" 2>/dev/null || true; \
rm -rf /tmp/gconv-keep; \
fi; \
\
# Misc caches
rm -rf /var/cache/fontconfig/* /tmp/*
# External tool layers, all use --link for independent layer caching and parallel pulls.
COPY --link --from=calibre-build /opt/calibre /opt/calibre
COPY --link --from=pdf-tools-build /usr/local/bin/qpdf /usr/bin/qpdf
# ImageMagick: 4 layers collapsed to 1 via the magick-export staging dir in pdf-tools-build
COPY --link --from=pdf-tools-build /magick-export/ /
COPY --link --from=gs-build /usr/local/bin/gs /usr/local/bin/gs
COPY --link --from=gs-build /usr/local/share/ghostscript /usr/local/share/ghostscript
# Python venv pre-built (no pip install at runtime, no build tools needed)
COPY --link --from=python-venv-build /opt/venv /opt/venv
RUN ldconfig /usr/local/lib && \
PYTHONDONTWRITEBYTECODE=1 \
/opt/venv/bin/python -c "import cffi; print('cffi OK:', cffi.__version__)" && \
PYTHONDONTWRITEBYTECODE=1 \
/opt/venv/bin/python -c "import cv2; print('OpenCV', cv2.__version__)" && \
PYTHONDONTWRITEBYTECODE=1 \
/opt/venv/bin/python -c "import ocrmypdf; print('ocrmypdf OK')" && \
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
# Non-root user
ARG PUID=1000
ARG PGID=1000
RUN set -eux; \
if ! getent group stirlingpdfgroup >/dev/null 2>&1; then \
groupadd -g "${PGID}" stirlingpdfgroup 2>/dev/null \
|| groupadd stirlingpdfgroup; \
fi; \
if ! id -u stirlingpdfuser >/dev/null 2>&1; then \
useradd -m -u "${PUID}" -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser 2>/dev/null \
|| useradd -m -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser; \
fi; \
ln -sf /usr/sbin/gosu /usr/local/bin/su-exec
# Application directories
RUN set -eux; \
mkdir -p /configs /configs/cache /configs/heap_dumps /logs /customFiles \
/pipeline/watchedFolders /pipeline/finishedFolders \
/tmp/stirling-pdf/heap_dumps; \
chown -R stirlingpdfuser:stirlingpdfgroup \
/home/stirlingpdfuser /configs /logs /customFiles /pipeline \
/tmp/stirling-pdf; \
chmod 750 /tmp/stirling-pdf; \
chmod 750 /tmp/stirling-pdf/heap_dumps
# Tool symlinks
RUN set -eux; \
ln -sf /opt/calibre/ebook-convert /usr/bin/ebook-convert; \
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
ln -sf /opt/venv/bin/ocrmypdf /usr/local/bin/ocrmypdf; \
ln -sf /opt/venv/bin/weasyprint /usr/local/bin/weasyprint; \
ln -sf /opt/venv/bin/unoping /usr/local/bin/unoping; \
fc-cache -f
# Metadata labels - base image
LABEL org.opencontainers.image.title="Stirling-PDF Base" \
org.opencontainers.image.description="Pre-built base image with Calibre, Ghostscript, QPDF, ImageMagick, LibreOffice, OCRmyPDF and dependencies" \
org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF" \
org.opencontainers.image.licenses="MIT" \
org.opencontainers.image.vendor="Stirling-Tools" \
org.opencontainers.image.url="https://www.stirlingpdf.com" \
org.opencontainers.image.documentation="https://docs.stirlingpdf.com" \
maintainer="Stirling-Tools" \
org.opencontainers.image.authors="Stirling-Tools"

View File

@@ -1,269 +1,13 @@
# Stirling-PDF - Full version (embedded frontend)
# Uses pre-built base image for fast builds
FROM ubuntu:noble AS calibre-build
ARG TARGETPLATFORM
ARG CALIBRE_VERSION=9.4.0
ARG CALIBRE_STRIP_WEBENGINE=false
ARG BASE_VERSION=1.0.0
ARG BASE_IMAGE=ghcr.io/stirling-tools/stirling-pdf-base:${BASE_VERSION}
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates curl xz-utils libnss3 libfontconfig1 \
libgl1 libegl1 libdbus-1-3 libasound2t64 libxcomposite1 \
libxrandr2 libxkbcommon0 libxi6 libxtst6 libopengl0 \
poppler-utils; \
rm -rf /var/lib/apt/lists/*; \
\
case "$(uname -m)" in \
x86_64) CALIBRE_ARCH="x86_64" ;; \
aarch64) CALIBRE_ARCH="arm64" ;; \
*) echo "Unsupported arch: $(uname -m)"; exit 1 ;; \
esac; \
\
curl -fsSL \
"https://download.calibre-ebook.com/${CALIBRE_VERSION}/calibre-${CALIBRE_VERSION}-${CALIBRE_ARCH}.txz" \
-o /tmp/calibre.txz; \
mkdir -p /opt/calibre; \
tar xJf /tmp/calibre.txz -C /opt/calibre; \
rm /tmp/calibre.txz; \
\
# We only need Qt6 WebEngine (Chromium) for ebook->PDF output.
# PDF INPUT now uses the pdftohtml engine (poppler), not Qt.
rm -f /opt/calibre/lib/libQt6Designer* \
/opt/calibre/lib/libQt6Multimedia* \
/opt/calibre/lib/libQt6SpatialAudio.so.* \
/opt/calibre/lib/libQt6NetworkAuth.so.* \
/opt/calibre/lib/libQt6Concurrent.so.* \
/opt/calibre/lib/libQt6OpenGLWidgets.so.* \
/opt/calibre/lib/libQt6QuickWidgets.so.* \
/opt/calibre/lib/libQt6Svg.so.* \
/opt/calibre/lib/libQt6SvgWidgets.so.* \
/opt/calibre/lib/libQt6Pdf*.so.* \
/opt/calibre/lib/libQt6ShaderTools.so.* \
/opt/calibre/lib/libQt6SerialPort.so.* \
/opt/calibre/lib/libQt6Sensors.so.* \
/opt/calibre/lib/libQt6Test.so.* \
/opt/calibre/lib/libQt6Sql.so.* \
/opt/calibre/lib/libQt6RemoteObjects.so.* \
/opt/calibre/lib/libQt6Help.so.* \
/opt/calibre/lib/libQt6VirtualKeyboard.so.* \
/opt/calibre/lib/libQt6WaylandClient.so.* \
/opt/calibre/lib/libQt6WaylandCompositor.so.* \
/opt/calibre/lib/libQt6Bluetooth.so.* \
/opt/calibre/lib/libQt6Nfc.so.* \
/opt/calibre/lib/libQt6Charts.so.* \
/opt/calibre/lib/libQt6DataVisualization.so.* \
/opt/calibre/lib/libQt6Scxml.so.* \
/opt/calibre/lib/libQt6StateMachine.so.* \
/opt/calibre/lib/libQt6TextToSpeech.so.* \
/opt/calibre/lib/libQt63D*.so.* \
/opt/calibre/lib/libavcodec.so.* \
/opt/calibre/lib/libavfilter.so.* \
/opt/calibre/lib/libavformat.so.* \
/opt/calibre/lib/libavutil.so.* \
/opt/calibre/lib/libavdevice.so.* \
/opt/calibre/lib/libpostproc.so.* \
/opt/calibre/lib/libswresample.so.* \
/opt/calibre/lib/libswscale.so.* \
/opt/calibre/lib/libspeex.so.* \
/opt/calibre/lib/libFLAC.so.* \
/opt/calibre/lib/libopus.so.* \
/opt/calibre/lib/libvorbis*.so.* \
/opt/calibre/lib/libasyncns.so.* \
/opt/calibre/lib/libspeechd.so.* \
/opt/calibre/lib/libespeak-ng.so.* \
/opt/calibre/lib/libonnxruntime.so.* \
/opt/calibre/lib/libgio-2.0.so.* \
/opt/calibre/lib/libzstd.so.* \
/opt/calibre/lib/libhunspell-1.7.so.* \
/opt/calibre/lib/libbrotlienc.so.* \
/opt/calibre/lib/libbrotlicommon.so.* \
/opt/calibre/lib/libbrotlidec.so.* \
/opt/calibre/lib/libstemmer.so.* \
/opt/calibre/lib/libmtp.so.* \
/opt/calibre/lib/libncursesw.so.* \
/opt/calibre/lib/libchm.so.* \
/opt/calibre/lib/libgcrypt.so.* \
/opt/calibre/lib/libgpg-error.so.* \
/opt/calibre/lib/libicuio.so.* \
/opt/calibre/lib/libreadline.so.* \
/opt/calibre/lib/libusb-1.0.so.* \
/opt/calibre/lib/libpulse*.so.* \
/opt/calibre/lib/libsndfile.so.* \
/opt/calibre/lib/libmpv.so.* \
/opt/calibre/lib/libass.so.* \
/opt/calibre/lib/librubberband.so.* \
/opt/calibre/lib/libsamplerate.so.*; \
rm -rf /opt/calibre/lib/qt6/plugins/platformthemes \
/opt/calibre/lib/qt6/plugins/multimedia \
/opt/calibre/lib/qt6/plugins/designer \
/opt/calibre/lib/qt6/plugins/virtualkeyboard \
/opt/calibre/lib/qt6/plugins/wayland* \
/opt/calibre/lib/qt6/plugins/texttospeech \
/opt/calibre/lib/qt6/plugins/position \
/opt/calibre/lib/qt6/plugins/sensors \
/opt/calibre/lib/qt6/plugins/sqldrivers \
/opt/calibre/lib/qt6/plugins/canbus \
/opt/calibre/lib/qt6/plugins/sceneparsers \
/opt/calibre/lib/qt6/plugins/renderers \
/opt/calibre/lib/qt6/plugins/geometryloaders \
/opt/calibre/lib/qt6/plugins/generic \
/opt/calibre/lib/qt6/plugins/qmltooling \
/opt/calibre/lib/qt6/libexec/QtWebEngineProcess.bak; \
rm -rf /opt/calibre/plugins/sqldrivers \
/opt/calibre/plugins/multimedia \
/opt/calibre/plugins/wayland-shell-integration \
/opt/calibre/plugins/wayland-graphics-integration-client \
/opt/calibre/plugins/wayland-decoration-client \
/opt/calibre/plugins/texttospeech \
/opt/calibre/plugins/platformthemes \
/opt/calibre/plugins/platforminputcontexts \
/opt/calibre/plugins/egldeviceintegrations \
/opt/calibre/plugins/iconengines; \
\
# Remove GUI executables but keep ebook-convert, ebook-meta, and calibre-parallel.
rm -f /opt/calibre/calibre \
/opt/calibre/calibre-server \
/opt/calibre/calibre-smtp \
/opt/calibre/calibre-debug \
/opt/calibre/calibre-customize \
/opt/calibre/calibredb \
/opt/calibre/ebook-viewer \
/opt/calibre/ebook-edit \
/opt/calibre/ebook-polish \
/opt/calibre/ebook-device \
/opt/calibre/fetch-ebook-metadata \
/opt/calibre/lrf2lrs \
/opt/calibre/lrs2lrf \
/opt/calibre/markdown-calibre \
/opt/calibre/web2disk; \
\
# Remove Python modules not needed for conversion.
rm -rf /opt/calibre/lib/calibre/gui2 \
/opt/calibre/lib/calibre/devices \
/opt/calibre/lib/calibre/library \
/opt/calibre/lib/calibre/db \
/opt/calibre/lib/calibre/srv \
/opt/calibre/lib/calibre/spell \
/opt/calibre/lib/calibre/live \
/opt/calibre/lib/calibre/utils/piper \
/opt/calibre/lib/calibre/utils/certgen.so \
/opt/calibre/lib/calibre/utils/https \
/opt/calibre/lib/calibre/utils/mdns; \
\
# Remove resources not needed for CLI conversion.
rm -rf /opt/calibre/resources/images \
/opt/calibre/resources/icons \
/opt/calibre/resources/icons.rcc \
/opt/calibre/resources/content-server \
/opt/calibre/resources/editor* \
/opt/calibre/resources/viewer \
/opt/calibre/resources/viewer.js \
/opt/calibre/resources/viewer.html \
/opt/calibre/resources/recipes \
/opt/calibre/resources/dictionaries \
/opt/calibre/resources/hyphenation \
/opt/calibre/resources/catalog \
/opt/calibre/resources/calibre-mimetypes.xml \
/opt/calibre/resources/changelog.json \
/opt/calibre/resources/user-agent-data.json \
/opt/calibre/resources/builtin_recipes.zip \
/opt/calibre/resources/builtin_recipes.xml \
/opt/calibre/resources/builtin_recipes.xml \
/opt/calibre/resources/stylelint-bundle.min.js \
/opt/calibre/resources/stylelint.js \
/opt/calibre/resources/rapydscript \
/opt/calibre/resources/quick_start \
/opt/calibre/resources/piper-voices.json \
/opt/calibre/resources/images.qrc \
/opt/calibre/resources/mozilla-ca-certs.pem \
/opt/calibre/resources/ebook-convert-complete.calibre_msgpack \
/opt/calibre/resources/mathjax \
/opt/calibre/resources/common-english-words.txt \
/opt/calibre/resources/calibre-portable.sh \
/opt/calibre/resources/calibre-portable.bat \
/opt/calibre/resources/metadata_sqlite.sql \
/opt/calibre/resources/notes_sqlite.sql \
/opt/calibre/resources/fts_sqlite.sql \
/opt/calibre/resources/fts_triggers.sql \
/opt/calibre/resources/jacket \
/opt/calibre/resources/editor-functions.json \
/opt/calibre/resources/calibre-ebook-root-CA.crt \
/opt/calibre/resources/csscolorparser.js \
/opt/calibre/resources/lookup.js \
/opt/calibre/resources/pdf-mathjax-loader.js \
/opt/calibre/resources/scraper.js \
/opt/calibre/resources/toc.js \
/opt/calibre/resources/user-manual-translation-stats.json \
/opt/calibre/resources/pin-template.svg \
/opt/calibre/resources/scripts.calibre_msgpack \
/opt/calibre/resources/fonts \
/opt/calibre/resources/qtwebengine_devtools_resources.pak \
/opt/calibre/lib/calibre/ebooks/docx/images \
/opt/calibre/share \
/opt/calibre/man; \
\
# Remove translations and localization while keeping required libraries.
# Keep iso639.calibre_msgpack (required)
# Keep only en-US.pak from qtwebengine_locales (required for WebEngine)
rm -rf /opt/calibre/lib/qt6/translations; \
find /opt/calibre/translations -mindepth 1 -maxdepth 1 ! -name 'qtwebengine_locales' -exec rm -rf {} +; \
find /opt/calibre/translations/qtwebengine_locales -type f ! -name 'en-US.pak' -delete 2>/dev/null || true; \
if [ -d /opt/calibre/resources/localization ]; then \
rm -rf /opt/calibre/resources/localization/locales.zip \
/opt/calibre/resources/localization/stats.calibre_msgpack \
/opt/calibre/resources/localization/website-languages.txt; \
find /opt/calibre/resources/localization -mindepth 1 -maxdepth 1 ! -name 'iso639.calibre_msgpack' -exec rm -rf {} +; \
fi; \
\
# Strip debug symbols from calibre extension modules.
# Exclude Qt6 libs: libQt6WebEngineCore and friends embed Chromium V8 JIT code
# and internal resource blobs that strip corrupts, causing segfaults at render time.
find /opt/calibre/lib -name '*.so*' \
! -name 'libQt6*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
# Remove Python bytecode caches.
find /opt/calibre -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /opt/calibre -name '*.pyc' -delete 2>/dev/null || true; \
\
# ── Verify conversion still works ──
# NOTE: txt->epub used intentionally NOT txt->pdf.
# Calibre 7+ uses WebEngine (Chromium) for PDF output, which requires kernel
# capabilities unavailable in Docker RUN steps and segfaults under QEMU.
# epub output exercises the same Python/plugin stack without touching WebEngine.
/opt/calibre/ebook-convert --version; \
echo "Hello" > /tmp/test.txt; \
/opt/calibre/ebook-convert /tmp/test.txt /tmp/test.epub; \
rm -f /tmp/test.txt /tmp/test.epub; \
\
# Verify pdftohtml (poppler) is available for the pdftohtml PDF engine.
pdftohtml -v >/dev/null 2>&1 && echo "pdftohtml OK" || { echo "ERROR: pdftohtml not found"; exit 1; }; \
echo "=== Calibre stripped successfully ==="
# Optional: strip Chromium/WebEngine (~80 MB savings) when PDF output via Calibre is not needed.
# Build with --build-arg CALIBRE_STRIP_WEBENGINE=true to enable.
RUN if [ "${CALIBRE_STRIP_WEBENGINE}" = "true" ]; then \
echo "Stripping Calibre WebEngine (Chromium), PDF output via Calibre will be disabled"; \
rm -rf /opt/calibre/lib/qt6/libexec/QtWebEngineProcess \
/opt/calibre/lib/qt6/resources \
/opt/calibre/lib/libQt6WebEngine*.so.* \
/opt/calibre/lib/libQt6Quick*.so.* \
/opt/calibre/lib/libQt6Qml*.so.* \
/opt/calibre/translations/qtwebengine_locales 2>/dev/null || true; \
echo "WebEngine stripped, Calibre PDF output disabled"; \
else \
echo "CALIBRE_STRIP_WEBENGINE=false, keeping WebEngine for PDF output"; \
fi
# Build the Java application and frontend.
# Stage 1: Build the Java application and frontend
FROM gradle:9.3.1-jdk25 AS app-build
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update \
RUN apt-get update \
&& apt-get install -y --no-install-recommends curl ca-certificates \
&& update-ca-certificates \
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
@@ -286,403 +30,31 @@ COPY app/common/build.gradle app/common/
COPY app/proprietary/build.gradle app/proprietary/
# Use system gradle instead of gradlew to avoid SSL issues downloading gradle distribution on emulated arm64
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
gradle dependencies --no-daemon || true
RUN gradle dependencies --no-daemon || true
COPY . .
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
--mount=type=cache,target=/root/.npm,sharing=locked \
DISABLE_ADDITIONAL_FEATURES=false \
RUN DISABLE_ADDITIONAL_FEATURES=false \
gradle clean build \
-PbuildWithFrontend=true \
-x spotlessApply -x spotlessCheck -x test -x sonarqube \
--no-daemon
# Extract Spring Boot Layers.
# Stage 2: Extract Spring Boot Layers
FROM eclipse-temurin:25-jre-noble AS jar-extract
WORKDIR /tmp
COPY --from=app-build /app/app/core/build/libs/*.jar app.jar
RUN java -Djarmode=tools -jar app.jar extract --layers --destination /layers
# Build Ghostscript 10.06.0 from source in an isolated stage (avoids library conflicts).
FROM ubuntu:noble AS gs-build
ARG TARGETPLATFORM
ARG GS_VERSION=10.06.0
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/gs-build,id=gs-build-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential curl ca-certificates libfontconfig1-dev && rm -rf /var/lib/apt/lists/* && \
GS_TAG="gs$(printf '%s' "${GS_VERSION}" | tr -d '.')" && \
cd /tmp/gs-build && \
rm -rf ghostscript-* && \
(test -d "ghostscript-${GS_VERSION}" || curl -fsSL "https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/${GS_TAG}/ghostscript-${GS_VERSION}.tar.gz" | tar xz) && \
cd "ghostscript-${GS_VERSION}" && \
./configure \
--prefix=/usr/local \
--without-x \
--disable-cups \
--disable-gtk && \
make -j"$(nproc)" && \
make install && \
cd ..
# Stage 3: Final runtime image on top of pre-built base
FROM ${BASE_IMAGE}
ARG VERSION_TAG
# Build PDF Tools (QPDF and ImageMagick 7).
FROM ubuntu:noble AS pdf-tools-build
ARG TARGETPLATFORM
ARG QPDF_VERSION=12.3.2
ARG IM_VERSION=7.1.2-13
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/pdf-tools-build,id=pdf-tools-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake libssl-dev libjpeg-dev zlib1g-dev curl ca-certificates pkg-config \
libpng-dev libtiff-dev libwebp-dev libxml2-dev libfreetype6-dev liblcms2-dev libzip-dev liblqr-1-0-dev \
libltdl-dev libtool && rm -rf /var/lib/apt/lists/* && \
cd /tmp/pdf-tools-build && \
rm -rf qpdf-* ImageMagick-* && \
# Build QPDF
(test -d "qpdf-${QPDF_VERSION}" || curl -fsSL "https://github.com/qpdf/qpdf/releases/download/v${QPDF_VERSION}/qpdf-${QPDF_VERSION}.tar.gz" | tar xz) && \
cd "qpdf-${QPDF_VERSION}" && \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DALLOW_CRYPTO_OPENSSL=ON -DDEFAULT_CRYPTO=openssl && \
cmake --build build --parallel "$(nproc)" && \
cmake --install build --strip && \
cd .. && \
# Build ImageMagick 7
(test -d "ImageMagick-${IM_VERSION}" || curl -fsSL "https://github.com/ImageMagick/ImageMagick/archive/refs/tags/${IM_VERSION}.tar.gz" | tar xz) && \
cd "ImageMagick-${IM_VERSION}" && \
./configure --prefix=/usr/local --with-modules --with-perl=no --with-magick-plus-plus=no --with-quantum-depth=16 --disable-static --enable-shared && \
make -j"$(nproc)" && \
make install-strip && \
# Enable PDF/PS/EPS in policy
sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="PS"/rights="read|write" pattern="PS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="EPS"/rights="read|write" pattern="EPS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
cd .. && \
ldconfig /usr/local/lib
# Stage ImageMagick outputs into a single directory so runtime can import them with one COPY
# (reduces 4 separate COPY layers to 1 independent --link layer).
RUN mkdir -p /magick-export/usr/bin \
/magick-export/usr/local/lib \
/magick-export/usr/local/etc && \
cp /usr/local/bin/magick /magick-export/usr/bin/ && \
cp -a /usr/local/lib/libMagick*.so* /magick-export/usr/local/lib/ && \
cp -a /usr/local/lib/ImageMagick-7* /magick-export/usr/local/lib/ && \
cp -a /usr/local/etc/ImageMagick-7 /magick-export/usr/local/etc/
# Build Python venv in an isolated stage so runtime image never needs build tools.
# Packages with native extensions (opencv, cryptography) use pre-built wheels (--prefer-binary).
# python3-uno is intentionally NOT installed here, it is a system package in the runtime stage
# and accessed via --system-site-packages at runtime.
FROM ubuntu:noble AS python-venv-build
ARG TARGETPLATFORM
ARG UNOSERVER_VERSION=3.6
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && apt-get install -y --no-install-recommends \
python3 python3-venv ca-certificates binutils && \
rm -rf /var/lib/apt/lists/*
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
python3 -m venv /opt/venv --system-site-packages && \
/opt/venv/bin/pip install --no-cache-dir --prefer-binary \
weasyprint pdf2image opencv-python-headless ocrmypdf \
cryptography \
"unoserver==${UNOSERVER_VERSION}" && \
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
find /opt/venv \( -name '*.pyc' -o -name '*.pyi' \) -delete 2>/dev/null || true && \
rm -rf /opt/venv/lib/python*/site-packages/pip \
/opt/venv/lib/python*/site-packages/pip-*.dist-info \
/opt/venv/lib/python*/site-packages/setuptools \
/opt/venv/lib/python*/site-packages/setuptools-*.dist-info && \
find /opt/venv -name '*.so' -exec strip --strip-unneeded {} + 2>/dev/null || true
# Final runtime image.
FROM eclipse-temurin:25-jre-noble AS runtime
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
ENV DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
TESS_BASE_PATH=/usr/share/tesseract-ocr/5/tessdata
ARG UNOSERVER_VERSION=3.6
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
apt-get update; \
# Add LibreOffice Fresh PPA for latest version (26.2.x)
apt-get install -y --no-install-recommends software-properties-common; \
add-apt-repository -y ppa:libreoffice/ppa; \
apt-get update; \
apt-get install -y --no-install-recommends \
# Core tools
ca-certificates tzdata tini bash fontconfig curl \
ffmpeg poppler-utils fontforge \
gosu unpaper \
# Fonts: full CJK coverage retained
fonts-dejavu \
fonts-liberation2 \
fonts-crosextra-caladea fonts-crosextra-carlito \
fonts-noto-core fonts-noto-mono fonts-noto-extra \
fonts-noto-cjk poppler-data \
# python3-uno required for UNO bridge (accessed by venv via --system-site-packages)
# python3-venv is NOT needed: the copied /opt/venv works without it at runtime
# python3-dev is NOT needed, venv is pre-built in python-venv-build stage
python3 python3-uno \
# Python packages are in /opt/venv (copied from python-venv-build stage below)
# OCR
tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra \
tesseract-ocr-por tesseract-ocr-chi-sim \
# Tesseract OSD for orientation detection
tesseract-ocr-osd \
# Graphics / AWT headless
libcairo2 libpango-1.0-0 libpangoft2-1.0-0 libgdk-pixbuf-2.0-0 \
libfreetype6 libfontconfig1 libx11-6 libxt6 libxext6 libxrender1 \
libxtst6 libxi6 libxinerama1 libxkbcommon0 libsm6 libice6 \
# Qt/EGL for Calibre CLI
libegl1 libgl1 libopengl0 libdbus-1-3 libglib2.0-0 libnss3 \
libasound2t64 libxcomposite1 libxrandr2 \
# Virtual framebuffer (required for headless LibreOffice Impress/Draw)
xvfb x11-utils coreutils \
libreoffice-writer-nogui libreoffice-calc-nogui \
libreoffice-impress-nogui libreoffice-draw-nogui \
libreoffice-java-common \
; \
\
\
# Verify and fix LibreOffice
libreoffice --version; \
soffice --version 2>/dev/null || true; \
# Rebuild UNO bridge type database
/usr/lib/libreoffice/program/soffice.bin --headless --convert-to pdf /dev/null 2>/dev/null || true; \
# Force font cache rebuild
fc-cache -f -v 2>&1 | awk 'NR <= 20'; \
\
# Cleanup stage.
\
# Remove PPA helper, no longer needed after apt-get update
apt-get remove --purge -y software-properties-common || true; \
apt-get autoremove --purge -y || true; \
rm -rf /var/lib/apt/lists/*; \
\
# Docs / man / info / icons / themes / GUI assets (headless server)
rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/info/* \
/usr/share/lintian/* /usr/share/linda/* \
/usr/share/icons/* /usr/share/themes/* \
/usr/share/javascript/* \
/usr/share/gtk-3.0/* \
/usr/share/fontforge/pixmaps \
/usr/share/fontforge/osx \
/usr/share/fontforge/cidmap \
/usr/share/fontforge/prefs \
/usr/share/liblangtag/* \
/usr/share/tcltk/* \
/usr/share/python-wheels/* \
/usr/share/glib-2.0/schemas/* \
/usr/share/mime/* \
/usr/share/xml/iso-codes \
/usr/share/GConf \
/usr/share/bash-completion \
/usr/share/zsh \
/usr/share/libmysofa \
/usr/share/alsa \
/usr/share/iso-codes \
/usr/share/perl5 \
/usr/share/libthai \
/usr/share/libexttextcat \
/usr/share/openal \
/usr/share/gcc; \
\
find /usr/share/locale -mindepth 1 -maxdepth 1 -type d \
! -name 'en*' -exec rm -rf {} + 2>/dev/null || true; \
rm -rf /usr/share/i18n/locales /usr/share/i18n/charmaps; \
\
rm -rf /usr/lib/libreoffice/share/gallery \
/usr/lib/libreoffice/share/template \
/usr/lib/libreoffice/share/wizards \
/usr/lib/libreoffice/share/autotext \
/usr/lib/libreoffice/help \
/usr/lib/libreoffice/share/config/images_*.zip \
/usr/lib/libreoffice/share/basic \
/usr/lib/libreoffice/share/Scripts \
/usr/lib/libreoffice/share/autocorr \
/usr/lib/libreoffice/share/classification \
/usr/lib/libreoffice/share/wordbook \
/usr/lib/libreoffice/share/fingerprint \
/usr/lib/libreoffice/share/xdg \
/usr/lib/libreoffice/share/numbertext \
/usr/lib/libreoffice/share/shell \
/usr/lib/libreoffice/share/palette \
/usr/lib/libreoffice/share/theme_definitions \
/usr/lib/libreoffice/share/xslt \
/usr/lib/libreoffice/share/labels \
/usr/lib/libreoffice/share/dtd \
/usr/lib/libreoffice/share/tipoftheday \
/usr/lib/libreoffice/share/toolbarmode \
/usr/lib/libreoffice/share/psprint \
/usr/lib/libreoffice/CREDITS.fodt \
/usr/lib/libreoffice/LICENSE.html; \
\
# Remove unused LO extensions (GUI-only; not needed for document conversion)
rm -rf /usr/lib/libreoffice/share/extensions/wiki-publisher \
/usr/lib/libreoffice/share/extensions/nlpsolver \
/usr/lib/libreoffice/share/extensions/dict-* 2>/dev/null || true; \
# Remove LO database components (LO Base; not needed for Writer/Calc/Impress conversion)
rm -rf /usr/lib/libreoffice/program/libdba* \
/usr/lib/libreoffice/program/libdbahsql* \
/usr/lib/libreoffice/program/libdbu* \
/usr/lib/libreoffice/program/libreport* 2>/dev/null || true; \
\
rm -rf /usr/lib/python3.12/test \
/usr/lib/python3.12/idlelib \
/usr/lib/python3.12/tkinter \
/usr/lib/python3.12/lib2to3 \
/usr/lib/python3.12/pydoc_data; \
\
rm -rf /usr/lib/python3/dist-packages/scipy \
/usr/lib/python3/dist-packages/sympy \
/usr/lib/python3/dist-packages/mpmath; \
\
rm -rf \
/usr/lib/python3/dist-packages/cffi \
/usr/lib/python3/dist-packages/cffi-*.dist-info \
/usr/lib/python3/dist-packages/_cffi_backend*.so \
/usr/lib/python3/dist-packages/_cffi_backend*.cpython*.so \
2>/dev/null || true; \
\
# Strip debug symbols from ALL shared libraries
find /usr/lib -name '*.so*' -type f \
-not -path '*/jvm/*' \
-not -path '*/libreoffice/*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
# Preserving ffmpeg codec libs as they are directly linked.
\
# Remove GPU backends not needed for headless operation.
MULTIARCH_LIBDIR=$(dpkg-architecture -qDEB_HOST_MULTIARCH 2>/dev/null \
|| find /usr/lib -maxdepth 1 -type d -name '*-linux-gnu' | head -1); \
rm -f \
"${MULTIARCH_LIBDIR}"/libLLVM*.so* \
"${MULTIARCH_LIBDIR}"/libgallium*.so* \
2>/dev/null || true; \
\
find /usr/lib/python3* -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /usr/lib/python3* \( -name '*.pyc' -o -name '*.pyi' \) \
-delete 2>/dev/null || true; \
\
rm -rf /usr/share/bug /usr/share/lintian /usr/share/linda \
/var/log/dpkg.log /var/log/apt/* \
/usr/local/share/ghostscript/*/doc \
/usr/local/share/ghostscript/*/examples \
/usr/share/ImageMagick-*/doc \
/usr/share/ImageMagick-*/www; \
\
\
# NEW: Tesseract training configs (not needed for OCR, but keep configs/ for hocr/txt output)
rm -rf /usr/share/tesseract-ocr/*/tessdata/tessconfigs; \
\
# Noto fonts ship 1800+ files in many weights (Bold, Italic, SemiBold, etc.)
# For PDF processing, Regular weight covers all scripts. Saves ~370MB.
find /usr/share/fonts/truetype/noto -type f \
! -name '*Regular*' -delete 2>/dev/null || true; \
find /usr/share/fonts/opentype -type f \
! -name '*Regular*' -delete 2>/dev/null || true; \
# DejaVu: keep Regular and Bold only (commonly referenced in PDFs)
find /usr/share/fonts/truetype -name '*DejaVu*' \
! -name '*-Regular*' ! -name '*-Bold*' ! -name '*Bold.ttf' \
! -name 'DejaVuSans.ttf' ! -name 'DejaVuSerif.ttf' ! -name 'DejaVuSansMono.ttf' \
-type f -delete 2>/dev/null || true; \
# Remove empty font directories after cleanup
find /usr/share/fonts -type d -empty -delete 2>/dev/null || true; \
\
# ── gconv: keep only essential charset conversion modules (~6MB savings) ──
# PDF processing needs UTF-8, ISO-8859-*, and a few CJK encodings.
GCONV_DIR=$(find /usr/lib -type d -name gconv 2>/dev/null | head -1); \
if [ -n "$GCONV_DIR" ] && [ -d "$GCONV_DIR" ]; then \
mkdir -p /tmp/gconv-keep; \
for mod in UTF-8.so UTF-16.so UTF-32.so UTF-7.so \
ISO8859-1.so ISO8859-15.so ISO8859-2.so ISO8859-9.so \
UNICODE.so CP1252.so CP1251.so CP1250.so \
EUC-JP.so EUC-KR.so EUC-CN.so \
SHIFT_JIS.so GB18030.so BIG5.so \
gconv-modules gconv-modules.d gconv-modules.cache; do \
[ -e "$GCONV_DIR/$mod" ] && \
cp -a "$GCONV_DIR/$mod" /tmp/gconv-keep/ 2>/dev/null || true; \
done; \
rm -rf "$GCONV_DIR"/*; \
cp -a /tmp/gconv-keep/* "$GCONV_DIR/" 2>/dev/null || true; \
rm -rf /tmp/gconv-keep; \
fi; \
\
# NOTE: flite TTS voice libs (~26MB) are kept because ffmpeg directly links them.
# Removing them breaks ffmpeg startup. To save these 26MB, ffmpeg would need
# to be rebuilt without --enable-libflite (not worth the complexity).
\
# ── dpkg metadata cleanup (~14MB) ──
# Not needed at runtime, container won't run apt-get.
rm -rf /var/lib/dpkg/info/*.list \
/var/lib/dpkg/info/*.md5sums \
/var/lib/dpkg/info/*.conffiles \
/var/lib/dpkg/info/*.postinst \
/var/lib/dpkg/info/*.preinst \
/var/lib/dpkg/info/*.prerm \
/var/lib/dpkg/info/*.postrm \
/var/lib/dpkg/info/*.triggers \
/var/lib/dpkg/info/*.shlibs \
/var/lib/dpkg/info/*.symbols \
/var/lib/dpkg/info/*.templates; \
\
# Misc caches
rm -rf /var/cache/fontconfig/* /tmp/*
# External tool layers, all use --link for independent layer caching and parallel pulls.
COPY --link --from=calibre-build /opt/calibre /opt/calibre
COPY --link --from=pdf-tools-build /usr/local/bin/qpdf /usr/bin/qpdf
# ImageMagick: 4 layers collapsed to 1 via the magick-export staging dir in pdf-tools-build
COPY --link --from=pdf-tools-build /magick-export/ /
COPY --link --from=gs-build /usr/local/bin/gs /usr/local/bin/gs
COPY --link --from=gs-build /usr/local/share/ghostscript /usr/local/share/ghostscript
# Python venv pre-built in python-venv-build (no pip install at runtime, no build tools needed)
COPY --link --from=python-venv-build /opt/venv /opt/venv
RUN ldconfig /usr/local/lib && \
PYTHONDONTWRITEBYTECODE=1 \
/opt/venv/bin/python -c "import cffi; print('cffi OK:', cffi.__version__)" && \
PYTHONDONTWRITEBYTECODE=1 \
/opt/venv/bin/python -c "import cv2; print('OpenCV', cv2.__version__)" && \
PYTHONDONTWRITEBYTECODE=1 \
/opt/venv/bin/python -c "import ocrmypdf; print('ocrmypdf OK')" && \
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
# ---
# Non-root user
# ---
ARG PUID=1000
ARG PGID=1000
RUN set -eux; \
if ! getent group stirlingpdfgroup >/dev/null 2>&1; then \
groupadd -g "${PGID}" stirlingpdfgroup 2>/dev/null \
|| groupadd stirlingpdfgroup; \
fi; \
if ! id -u stirlingpdfuser >/dev/null 2>&1; then \
useradd -m -u "${PUID}" -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser 2>/dev/null \
|| useradd -m -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser; \
fi; \
ln -sf /usr/sbin/gosu /usr/local/bin/su-exec
# Application files.
WORKDIR /app
# Application layers
COPY --link --from=jar-extract --chown=1000:1000 /layers/dependencies/ /app/
COPY --link --from=jar-extract --chown=1000:1000 /layers/spring-boot-loader/ /app/
COPY --link --from=jar-extract --chown=1000:1000 /layers/snapshot-dependencies/ /app/
@@ -695,63 +67,33 @@ COPY --link --chown=1000:1000 scripts/ /scripts/
# Fonts go to system dir, root ownership is correct (world-readable)
COPY app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/truetype/
# Permissions and configuration.
# Permissions and configuration
RUN set -eux; \
ln -sf /opt/calibre/ebook-convert /usr/bin/ebook-convert; \
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
ln -sf /opt/venv/bin/ocrmypdf /usr/local/bin/ocrmypdf; \
ln -sf /opt/venv/bin/weasyprint /usr/local/bin/weasyprint; \
ln -sf /opt/venv/bin/unoping /usr/local/bin/unoping; \
chmod +x /scripts/*; \
mkdir -p /configs /configs/cache /configs/heap_dumps /logs /customFiles \
/pipeline/watchedFolders /pipeline/finishedFolders \
/tmp/stirling-pdf/heap_dumps; \
# Create symlinks to allow app to find these in /app/
ln -s /logs /app/logs; \
ln -s /configs /app/configs; \
ln -s /customFiles /app/customFiles; \
ln -s /pipeline /app/pipeline; \
chown -R stirlingpdfuser:stirlingpdfgroup \
/home/stirlingpdfuser /configs /logs /customFiles /pipeline \
/tmp/stirling-pdf; \
chown -h stirlingpdfuser:stirlingpdfgroup /app/logs /app/configs /app/customFiles /app/pipeline; \
chown stirlingpdfuser:stirlingpdfgroup /app; \
chmod 750 /tmp/stirling-pdf; \
chmod 750 /tmp/stirling-pdf/heap_dumps; \
fc-cache -f
# NOTE: Project Leyden AOT cache is generated in the background on first boot
# by init-without-ocr.sh and stored in /configs/cache/stirling.aot (persistent volume).
# The cache is picked up on subsequent boots for 15-25% faster startup.
# See: JEP 483 + 514 + 515 (JDK 25).
# Environment variables.
ARG VERSION_TAG
# Write version to a file so it is readable by scripts without env-var inheritance.
# init-without-ocr.sh reads /etc/stirling_version for the AOT cache fingerprint.
RUN echo "${VERSION_TAG:-dev}" > /etc/stirling_version
# Environment variables
ENV VERSION_TAG=$VERSION_TAG \
STIRLING_AOT_ENABLE="false" \
STIRLING_JVM_PROFILE="balanced" \
_JVM_OPTS_BALANCED="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=4m -XX:G1PeriodicGCInterval=60000 -XX:+UseStringDeduplication -XX:+UseCompactObjectHeaders -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
_JVM_OPTS_PERFORMANCE="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseShenandoahGC -XX:ShenandoahGCMode=generational -XX:+UseCompactObjectHeaders -XX:+UseStringDeduplication -XX:+AlwaysPreTouch -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
JAVA_CUSTOM_OPTS="" \
HOME=/home/stirlingpdfuser \
PUID=${PUID} \
PGID=${PGID} \
UMASK=022 \
PATH="/opt/venv/bin:${PATH}" \
UNO_PATH=/usr/lib/libreoffice/program \
LIBREOFFICE_BIN_PATH=/usr/lib/libreoffice/program/soffice.bin \
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
TMPDIR=/tmp/stirling-pdf \
TEMP=/tmp/stirling-pdf \
TMP=/tmp/stirling-pdf \
QTWEBENGINE_CHROMIUM_FLAGS="--no-sandbox --disable-gpu --disable-software-rasterizer" \
DBUS_SESSION_BUS_ADDRESS=/dev/null
SAL_TMP=/tmp/stirling-pdf/libre
# Metadata labels.
# Metadata labels
LABEL org.opencontainers.image.title="Stirling-PDF" \
org.opencontainers.image.description="Full version with Calibre, LibreOffice, Tesseract, OCRmyPDF" \
org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF" \

View File

@@ -1,192 +1,14 @@
# Stirling-PDF - Fat version (embedded frontend)
# Extra fonts for air-gapped environments
# Uses pre-built base image for fast builds
FROM ubuntu:noble AS calibre-build
ARG BASE_VERSION=1.0.0
ARG BASE_IMAGE=ghcr.io/stirling-tools/stirling-pdf-base:${BASE_VERSION}
ARG CALIBRE_VERSION=9.4.0
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates curl xz-utils libnss3 libfontconfig1 \
libgl1 libegl1 libdbus-1-3 libasound2t64 libxcomposite1 \
libxrandr2 libxkbcommon0 libxi6 libxtst6 libopengl0; \
rm -rf /var/lib/apt/lists/*; \
\
case "$(uname -m)" in \
x86_64) CALIBRE_ARCH="x86_64" ;; \
aarch64) CALIBRE_ARCH="arm64" ;; \
*) echo "Unsupported arch: $(uname -m)"; exit 1 ;; \
esac; \
\
curl -fsSL \
"https://download.calibre-ebook.com/${CALIBRE_VERSION}/calibre-${CALIBRE_VERSION}-${CALIBRE_ARCH}.txz" \
-o /tmp/calibre.txz; \
mkdir -p /opt/calibre; \
tar xJf /tmp/calibre.txz -C /opt/calibre; \
rm /tmp/calibre.txz; \
\
# Remove GUI-only shared libraries.
# Libs required by WebEngine PDF output are preserved.
rm -f /opt/calibre/lib/libQt6Designer* \
/opt/calibre/lib/libQt6Multimedia* \
/opt/calibre/lib/libQt6SpatialAudio.so.* \
/opt/calibre/lib/libQt6NetworkAuth.so.* \
/opt/calibre/lib/libQt6Concurrent.so.* \
/opt/calibre/lib/libQt6OpenGLWidgets.so.* \
/opt/calibre/lib/libQt6QuickWidgets.so.* \
# AV / multimedia
/opt/calibre/lib/libavcodec.so.* \
/opt/calibre/lib/libavfilter.so.* \
/opt/calibre/lib/libavformat.so.* \
/opt/calibre/lib/libavutil.so.* \
/opt/calibre/lib/libavdevice.so.* \
/opt/calibre/lib/libpostproc.so.* \
/opt/calibre/lib/libswresample.so.* \
/opt/calibre/lib/libswscale.so.* \
# Audio / speech / TTS
/opt/calibre/lib/libspeex.so.* \
/opt/calibre/lib/libFLAC.so.* \
/opt/calibre/lib/libopus.so.* \
/opt/calibre/lib/libvorbis*.so.* \
/opt/calibre/lib/libasyncns.so.* \
/opt/calibre/lib/libspeechd.so.* \
/opt/calibre/lib/libespeak-ng.so.* \
# Other unused libs
/opt/calibre/lib/libonnxruntime.so.* \
/opt/calibre/lib/libgio-2.0.so.* \
/opt/calibre/lib/libzstd.so.* \
/opt/calibre/lib/libhunspell-1.7.so.* \
/opt/calibre/lib/libbrotlienc.so.* \
/opt/calibre/lib/libbrotlicommon.so.* \
/opt/calibre/lib/libbrotlidec.so.* \
/opt/calibre/lib/libstemmer.so.* \
/opt/calibre/lib/libmtp.so.* \
/opt/calibre/lib/libncursesw.so.* \
/opt/calibre/lib/libchm.so.* \
/opt/calibre/lib/libgcrypt.so.* \
/opt/calibre/lib/libgpg-error.so.* \
/opt/calibre/lib/libicuio.so.* \
/opt/calibre/lib/libreadline.so.* \
/opt/calibre/lib/libusb-1.0.so.*; \
rm -rf /opt/calibre/lib/qt6/plugins/platformthemes \
/opt/calibre/lib/qt6/plugins/multimedia \
/opt/calibre/lib/qt6/plugins/designer \
/opt/calibre/lib/qt6/plugins/qmltooling; \
\
# Remove GUI executables but keep ebook-convert, ebook-meta, and calibre-parallel.
rm -f /opt/calibre/calibre \
/opt/calibre/calibre-server \
/opt/calibre/calibre-smtp \
/opt/calibre/calibre-debug \
/opt/calibre/calibre-customize \
/opt/calibre/calibredb \
/opt/calibre/ebook-viewer \
/opt/calibre/ebook-edit \
/opt/calibre/ebook-polish \
/opt/calibre/ebook-device \
/opt/calibre/fetch-ebook-metadata \
/opt/calibre/lrf2lrs \
/opt/calibre/lrs2lrf \
/opt/calibre/markdown-calibre \
/opt/calibre/web2disk; \
\
# Remove Python modules not needed for conversion.
rm -rf /opt/calibre/lib/calibre/gui2 \
/opt/calibre/lib/calibre/devices \
/opt/calibre/lib/calibre/library \
/opt/calibre/lib/calibre/db \
/opt/calibre/lib/calibre/srv \
/opt/calibre/lib/calibre/spell \
/opt/calibre/lib/calibre/live; \
\
# Remove resources not needed for CLI conversion.
rm -rf /opt/calibre/resources/images \
/opt/calibre/resources/icons \
/opt/calibre/resources/icons.rcc \
/opt/calibre/resources/content-server \
/opt/calibre/resources/editor* \
/opt/calibre/resources/viewer \
/opt/calibre/resources/viewer.js \
/opt/calibre/resources/viewer.html \
/opt/calibre/resources/recipes \
/opt/calibre/resources/dictionaries \
/opt/calibre/resources/hyphenation \
/opt/calibre/resources/catalog \
/opt/calibre/resources/calibre-mimetypes.xml \
/opt/calibre/resources/changelog.json \
/opt/calibre/resources/user-agent-data.json \
/opt/calibre/resources/builtin_recipes.zip \
/opt/calibre/resources/builtin_recipes.xml \
/opt/calibre/resources/builtin_recipes.xml \
/opt/calibre/resources/stylelint-bundle.min.js \
/opt/calibre/resources/stylelint.js \
/opt/calibre/resources/rapydscript \
/opt/calibre/resources/quick_start \
/opt/calibre/resources/piper-voices.json \
/opt/calibre/resources/images.qrc \
/opt/calibre/resources/mozilla-ca-certs.pem \
/opt/calibre/resources/ebook-convert-complete.calibre_msgpack \
/opt/calibre/resources/mathjax \
/opt/calibre/resources/common-english-words.txt \
/opt/calibre/resources/calibre-portable.sh \
/opt/calibre/resources/calibre-portable.bat \
/opt/calibre/resources/metadata_sqlite.sql \
/opt/calibre/resources/notes_sqlite.sql \
/opt/calibre/resources/fts_sqlite.sql \
/opt/calibre/resources/fts_triggers.sql \
/opt/calibre/resources/jacket \
/opt/calibre/resources/editor-functions.json \
/opt/calibre/resources/calibre-ebook-root-CA.crt \
/opt/calibre/resources/csscolorparser.js \
/opt/calibre/resources/lookup.js \
/opt/calibre/resources/pdf-mathjax-loader.js \
/opt/calibre/resources/scraper.js \
/opt/calibre/resources/toc.js \
/opt/calibre/resources/user-manual-translation-stats.json \
/opt/calibre/resources/pin-template.svg \
/opt/calibre/resources/scripts.calibre_msgpack \
/opt/calibre/lib/calibre/ebooks/docx/images \
/opt/calibre/share \
/opt/calibre/man; \
\
# Remove translations and localization while keeping required libraries.
rm -rf /opt/calibre/lib/qt6/translations; \
find /opt/calibre/translations -mindepth 1 -maxdepth 1 ! -name 'qtwebengine_locales' -exec rm -rf {} +; \
rm -rf /opt/calibre/resources/localization/locales.zip \
/opt/calibre/resources/localization/stats.calibre_msgpack \
/opt/calibre/resources/localization/website-languages.txt; \
find /opt/calibre/resources/localization -mindepth 1 -maxdepth 1 ! -name 'iso639.calibre_msgpack' -exec rm -rf {} +; \
\
# Strip debug symbols from calibre extension modules.
# Exclude Qt6 libs and all qt6/ subdirectory files to prevent Chromium renderer crashes.
find /opt/calibre/lib -name '*.so*' \
! -name 'libQt6*' \
! -path '*/qt6/*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
find /opt/calibre -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /opt/calibre -name '*.pyc' -delete 2>/dev/null || true; \
\
# Verify conversion functionality.
# NOTE: txt→epub used intentionally NOT txt→pdf.
# Calibre 7+ uses WebEngine (Chromium) for PDF output, which requires kernel
# capabilities unavailable in Docker RUN steps and segfaults under QEMU.
# epub output exercises the same Python/plugin stack without touching WebEngine.
/opt/calibre/ebook-convert --version; \
echo "Hello" > /tmp/test.txt; \
/opt/calibre/ebook-convert /tmp/test.txt /tmp/test.epub; \
rm -f /tmp/test.txt /tmp/test.epub; \
echo "=== Calibre stripped successfully ==="
# Build the Java application and frontend.
# Stage 1: Build the Java application and frontend
FROM gradle:9.3.1-jdk25 AS app-build
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update \
RUN apt-get update \
&& apt-get install -y --no-install-recommends curl ca-certificates \
&& update-ca-certificates \
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
@@ -209,381 +31,71 @@ COPY app/common/build.gradle app/common/
COPY app/proprietary/build.gradle app/proprietary/
# Use system gradle instead of gradlew to avoid SSL issues downloading gradle distribution on emulated arm64
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
gradle dependencies --no-daemon || true
RUN gradle dependencies --no-daemon || true
COPY . .
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
DISABLE_ADDITIONAL_FEATURES=false \
RUN DISABLE_ADDITIONAL_FEATURES=false \
gradle clean build \
-PbuildWithFrontend=true \
-x spotlessApply -x spotlessCheck -x test -x sonarqube \
--no-daemon
# Python Builder stage.
FROM ubuntu:noble AS python-build
ARG UNOSERVER_VERSION=3.6
ENV DEBIAN_FRONTEND=noninteractive
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && apt-get install -y --no-install-recommends \
python3 python3-venv python3-dev \
python3-packaging \
build-essential \
# Build dependencies for ocrmypdf/weasyprint/opencv
zlib1g-dev libjpeg-dev libffi-dev libpango1.0-dev \
&& rm -rf /var/lib/apt/lists/*
RUN python3 -m venv /opt/venv --system-site-packages
ENV PATH="/opt/venv/bin:$PATH"
# Build all heavy python packages here
RUN --mount=type=cache,target=/root/.cache/pip \
pip install \
weasyprint pdf2image opencv-python-headless ocrmypdf \
"unoserver==${UNOSERVER_VERSION}"
# Stage 2: Extract Spring Boot Layers
FROM eclipse-temurin:25-jre-noble AS jar-extract
WORKDIR /tmp
COPY --from=app-build /app/app/core/build/libs/*.jar app.jar
RUN java -Djarmode=tools -jar app.jar extract --layers --destination /layers
# Build Ghostscript 10.06.0 from source in an isolated stage (avoids library conflicts).
FROM ubuntu:noble AS gs-build
ARG GS_VERSION=10.06.0
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/gs-build,id=gs-build-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential curl ca-certificates libfontconfig1-dev && rm -rf /var/lib/apt/lists/* && \
GS_TAG="gs$(printf '%s' "${GS_VERSION}" | tr -d '.')" && \
cd /tmp/gs-build && \
rm -rf ghostscript-* && \
(test -d "ghostscript-${GS_VERSION}" || curl -fsSL "https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/${GS_TAG}/ghostscript-${GS_VERSION}.tar.gz" | tar xz) && \
cd "ghostscript-${GS_VERSION}" && \
./configure \
--prefix=/usr/local \
--without-x \
--disable-cups \
--disable-gtk && \
make -j"$(nproc)" && \
make install && \
cd ..
# Stage 3: Final runtime image on top of pre-built base
FROM ${BASE_IMAGE}
ARG VERSION_TAG
# Build PDF Tools (QPDF and ImageMagick 7).
FROM ubuntu:noble AS pdf-tools-build
ARG QPDF_VERSION=12.3.2
ARG IM_VERSION=7.1.2-13
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/pdf-tools-build,id=pdf-tools-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake libssl-dev libjpeg-dev zlib1g-dev curl ca-certificates pkg-config \
libpng-dev libtiff-dev libwebp-dev libxml2-dev libfreetype6-dev liblcms2-dev libzip-dev liblqr-1-0-dev \
libltdl-dev libtool && rm -rf /var/lib/apt/lists/* && \
cd /tmp/pdf-tools-build && \
rm -rf qpdf-* ImageMagick-* && \
# Build QPDF
(test -d "qpdf-${QPDF_VERSION}" || curl -fsSL "https://github.com/qpdf/qpdf/releases/download/v${QPDF_VERSION}/qpdf-${QPDF_VERSION}.tar.gz" | tar xz) && \
cd "qpdf-${QPDF_VERSION}" && \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DALLOW_CRYPTO_OPENSSL=ON -DDEFAULT_CRYPTO=openssl && \
cmake --build build --parallel "$(nproc)" && \
cmake --install build && \
cd .. && \
# Build ImageMagick 7
(test -d "ImageMagick-${IM_VERSION}" || curl -fsSL "https://github.com/ImageMagick/ImageMagick/archive/refs/tags/${IM_VERSION}.tar.gz" | tar xz) && \
cd "ImageMagick-${IM_VERSION}" && \
./configure --prefix=/usr/local --with-modules --with-perl=no --with-magick-plus-plus=no --with-quantum-depth=16 --disable-static --enable-shared && \
make -j"$(nproc)" && \
make install && \
# Enable PDF/PS/EPS in policy
sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="PS"/rights="read|write" pattern="PS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="EPS"/rights="read|write" pattern="EPS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
cd .. && \
ldconfig /usr/local/lib
WORKDIR /app
# Application layers
COPY --link --from=jar-extract --chown=1000:1000 /layers/dependencies/ /app/
COPY --link --from=jar-extract --chown=1000:1000 /layers/spring-boot-loader/ /app/
COPY --link --from=jar-extract --chown=1000:1000 /layers/snapshot-dependencies/ /app/
COPY --link --from=jar-extract --chown=1000:1000 /layers/application/ /app/
# Final runtime image.
FROM eclipse-temurin:25-jre AS runtime
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
ENV DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
TESS_BASE_PATH=/usr/share/tesseract-ocr/5/tessdata
ARG UNOSERVER_VERSION=3.6
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/root/.cache/pip \
set -eux; \
apt-get update; \
# Add LibreOffice Fresh PPA for latest version (26.2.x)
apt-get install -y --no-install-recommends software-properties-common; \
add-apt-repository -y ppa:libreoffice/ppa; \
apt-get update; \
apt-get install -y --no-install-recommends \
# Core tools
ca-certificates tzdata tini bash fontconfig curl \
ffmpeg poppler-utils fontforge \
gosu unpaper pngquant \
fonts-liberation2 \
fonts-crosextra-caladea fonts-crosextra-carlito \
fonts-noto-core fonts-noto-mono fonts-noto-extra \
fonts-noto-cjk poppler-data \
fonts-freefont-ttf fonts-terminus \
# Python runtime & UNO bridge (python3-full -> python3 optimization)
python3 python3-uno python3-packaging \
# OCR
tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra \
tesseract-ocr-por tesseract-ocr-chi-sim \
# Graphics / AWT headless
libcairo2 libpango-1.0-0 libpangoft2-1.0-0 \
libfreetype6 libfontconfig1 libx11-6 libxt6 libxext6 libxrender1 \
libxtst6 libxi6 libxinerama1 libxkbcommon0 libsm6 libice6 \
# Qt/EGL for Calibre CLI
libegl1 libgl1 libopengl0 libdbus-1-3 libglib2.0-0 libnss3 \
libasound2t64 libxcomposite1 libxrandr2 \
# Virtual framebuffer (required for headless LibreOffice Impress/Draw)
xvfb x11-utils coreutils \
libreoffice-writer-nogui libreoffice-calc-nogui \
libreoffice-impress-nogui libreoffice-draw-nogui \
libreoffice-base-nogui libreoffice-java-common \
; \
\
# Fix LibreOffice UNO bridge and filter availability
libreoffice --version; \
soffice --version 2>/dev/null || true; \
# Rebuild UNO bridge type database
/usr/lib/libreoffice/program/soffice.bin --headless --convert-to pdf /dev/null 2>/dev/null || true; \
# Force font cache rebuild and verify filters are available
fc-cache -f -v 2>&1 | awk 'NR <= 20'; \
\
# Cleanup stage.
\
rm -rf /var/lib/apt/lists/*; \
\
# Docs / man / info / icons / themes / GUI assets (headless server)
rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/info/* \
/usr/share/lintian/* /usr/share/linda/* \
/usr/share/icons/* /usr/share/themes/* \
/usr/share/javascript/* \
/usr/share/gtk-3.0/* \
/usr/share/fontforge/pixmaps \
/usr/share/liblangtag/* \
/usr/share/tcltk/* \
/usr/share/python-wheels/*; \
\
# Clean up system locale data (LANG=C.UTF-8 doesn't use them)
find /usr/share/locale -mindepth 1 -maxdepth 1 -type d \
! -name 'en*' -exec rm -rf {} + 2>/dev/null || true; \
rm -rf /usr/share/i18n/locales /usr/share/i18n/charmaps; \
\
rm -rf /usr/lib/libreoffice/share/gallery \
/usr/lib/libreoffice/share/template \
/usr/lib/libreoffice/share/wizards \
/usr/lib/libreoffice/share/autotext \
/usr/lib/libreoffice/help \
/usr/lib/libreoffice/share/config/images_*.zip \
/usr/lib/libreoffice/share/basic \
/usr/lib/libreoffice/share/Scripts \
/usr/lib/libreoffice/share/autocorr \
/usr/lib/libreoffice/share/classification \
/usr/lib/libreoffice/share/wordbook \
/usr/lib/libreoffice/share/fingerprint \
/usr/lib/libreoffice/share/xdg \
/usr/lib/libreoffice/share/numbertext \
/usr/lib/libreoffice/share/shell \
/usr/lib/libreoffice/share/palette \
/usr/lib/libreoffice/share/theme_definitions \
/usr/lib/libreoffice/share/xslt \
/usr/lib/libreoffice/share/labels \
/usr/lib/libreoffice/share/dtd \
/usr/lib/libreoffice/share/tipoftheday \
/usr/lib/libreoffice/share/toolbarmode \
/usr/lib/libreoffice/share/psprint; \
\
# Preserving soffice.cfg because LibreOffice needs it to load documents.
\
\
\
find /usr/lib -name '*.so*' -type f \
-not -path '*/jvm/*' \
-not -path '*/libreoffice/*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
# Preserving ffmpeg codec libs as they are directly linked.
\
# Remove Mesa/LLVM GPU backends (~179 MB, not needed for headless/offscreen)
MULTIARCH_LIBDIR=$(dpkg-architecture -qDEB_HOST_MULTIARCH 2>/dev/null \
|| find /usr/lib -maxdepth 1 -type d -name '*-linux-gnu' | head -1); \
rm -f \
"${MULTIARCH_LIBDIR}"/libLLVM*.so* \
"${MULTIARCH_LIBDIR}"/libgallium*.so* \
2>/dev/null || true; \
\
# Python stdlib: remove unused modules (~71 MB)
rm -rf /usr/lib/python3.12/test \
/usr/lib/python3.12/idlelib \
/usr/lib/python3.12/tkinter \
/usr/lib/python3.12/lib2to3 \
/usr/lib/python3.12/pydoc_data; \
\
# System Python packages not needed at runtime (~153 MB)
rm -rf /usr/lib/python3/dist-packages/scipy \
/usr/lib/python3/dist-packages/sympy \
/usr/lib/python3/dist-packages/mpmath; \
\
# Duplicate system packages (superseded by venv versions, ~55 MB)
rm -rf /usr/lib/python3/dist-packages/numpy \
/usr/lib/python3/dist-packages/fontTools \
/usr/lib/python3/dist-packages/PIL; \
\
# System-wide Python cache cleanup
find /usr/lib/python3* -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /usr/lib/python3* \( -name '*.pyc' -o -name '*.pyi' \) \
-delete 2>/dev/null || true; \
\
# Additional metadata cleanup
# FIX: Only remove ImageMagick doc/www, NOT the whole dir (preserves policy.xml/delegates.xml)
rm -rf /usr/share/bug /usr/share/lintian /usr/share/linda \
/var/lib/dpkg/info/*.md5sums \
/var/log/dpkg.log /var/log/apt/* \
/usr/local/share/ghostscript/*/doc \
/usr/local/share/ghostscript/*/examples \
/usr/share/ImageMagick-*/doc \
/usr/share/ImageMagick-*/www; \
\
\
# NEW: Tesseract training configs (not needed for OCR, but keep configs/ for hocr/txt output)
rm -rf /usr/share/tesseract-ocr/*/tessdata/tessconfigs; \
\
# Trim CJK fonts to Regular weight only (FIX: Broadened path)
find /usr/share/fonts -name '*CJK*' \
! -name '*Regular*' -type f -delete 2>/dev/null || true; \
\
# Misc caches
rm -rf /var/cache/fontconfig/* /tmp/*
# Python virtual environment.
COPY --from=python-build /opt/venv /opt/venv
RUN set -eux; \
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
# Verify python libs are accessible
/opt/venv/bin/python -c "import cv2; import ocrmypdf; import weasyprint; print('Python libs verified')"; \
# Cleanup venv from builder leftovers
find /opt/venv -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /opt/venv \( -name '*.pyc' -o -name '*.pyi' \) -delete 2>/dev/null || true; \
rm -rf /opt/venv/lib/python*/site-packages/pip \
/opt/venv/lib/python*/site-packages/pip-*.dist-info \
/opt/venv/lib/python*/site-packages/setuptools \
/opt/venv/lib/python*/site-packages/setuptools-*.dist-info;
# Calibre and PDF Tools.
COPY --link --from=calibre-build /opt/calibre /opt/calibre
COPY --link --from=pdf-tools-build /usr/local/bin/qpdf /usr/bin/qpdf
COPY --link --from=pdf-tools-build /usr/local/bin/magick /usr/bin/magick
COPY --link --from=pdf-tools-build /usr/local/lib/libMagick* /usr/local/lib/
COPY --link --from=pdf-tools-build /usr/local/etc/ImageMagick-7 /usr/local/etc/ImageMagick-7
COPY --link --from=gs-build /usr/local/bin/gs /usr/local/bin/gs
COPY --link --from=gs-build /usr/local/share/ghostscript /usr/local/share/ghostscript
RUN set -eux; \
ldconfig /usr/local/lib; \
# Clean pycache that may have been generated during stage-1 verify
find /opt/calibre -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true;
# Non-root user.
ARG PUID=1000
ARG PGID=1000
RUN set -eux; \
if ! getent group stirlingpdfgroup >/dev/null 2>&1; then \
groupadd -g "${PGID}" stirlingpdfgroup 2>/dev/null \
|| groupadd stirlingpdfgroup; \
fi; \
if ! id -u stirlingpdfuser >/dev/null 2>&1; then \
useradd -m -u "${PUID}" -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser 2>/dev/null \
|| useradd -m -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser; \
fi; \
ln -sf /usr/sbin/gosu /usr/local/bin/su-exec
# Application files.
COPY --link --from=app-build --chown=1000:1000 \
/app/app/core/build/libs/*.jar /app.jar
COPY --link --from=app-build --chown=1000:1000 \
/app/build/libs/restart-helper.jar /restart-helper.jar
COPY --link --chown=1000:1000 scripts/ /scripts/
# Fonts go to system dir root ownership is correct (world-readable)
COPY --link app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/truetype/
# Fonts go to system dir, root ownership is correct (world-readable)
COPY app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/truetype/
# Permissions and configuration.
# Permissions and configuration
RUN set -eux; \
ln -sf /opt/calibre/ebook-convert /usr/bin/ebook-convert; \
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
ln -sf /opt/venv/bin/ocrmypdf /usr/local/bin/ocrmypdf; \
ln -sf /opt/venv/bin/weasyprint /usr/local/bin/weasyprint; \
ln -sf /opt/venv/bin/unoping /usr/local/bin/unoping; \
chmod +x /scripts/*; \
mkdir -p /configs /logs /customFiles \
/pipeline/watchedFolders /pipeline/finishedFolders \
/tmp/stirling-pdf/heap_dumps; \
# Create symlinks to allow app to find these in /app/
mkdir -p /app; \
ln -s /logs /app/logs; \
ln -s /configs /app/configs; \
ln -s /customFiles /app/customFiles; \
ln -s /pipeline /app/pipeline; \
chown -R stirlingpdfuser:stirlingpdfgroup \
/home/stirlingpdfuser /configs /logs /customFiles /pipeline \
/tmp/stirling-pdf; \
chown -h stirlingpdfuser:stirlingpdfgroup /app/logs /app/configs /app/customFiles /app/pipeline; \
chown stirlingpdfuser:stirlingpdfgroup /app; \
chmod 1777 /tmp/stirling-pdf; \
fc-cache -f; \
# NOTE: Project Leyden AOT cache is generated in the background on first boot
# by init-without-ocr.sh. The cache is picked up on subsequent boots for
# 15-25% faster startup. See: JEP 483 + 514 + 515 (JDK 25).
\
# Clean Calibre pycache that may have been generated during stage-1 verify
find /opt/calibre -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true
chmod 750 /tmp/stirling-pdf; \
chmod 750 /tmp/stirling-pdf/heap_dumps; \
fc-cache -f
# Environment variables.
ARG VERSION_TAG
# Write version to a file so it is readable by scripts without env-var inheritance.
RUN echo "${VERSION_TAG:-dev}" > /etc/stirling_version
# Environment variables
ENV VERSION_TAG=$VERSION_TAG \
STIRLING_AOT_ENABLE="false" \
STIRLING_JVM_PROFILE="balanced" \
_JVM_OPTS_BALANCED="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=4m -XX:G1PeriodicGCInterval=60000 -XX:+UseStringDeduplication -XX:+UseCompactObjectHeaders -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
_JVM_OPTS_PERFORMANCE="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseShenandoahGC -XX:ShenandoahGCMode=generational -XX:+UseCompactObjectHeaders -XX:+UseStringDeduplication -XX:+AlwaysPreTouch -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
JAVA_CUSTOM_OPTS="" \
HOME=/home/stirlingpdfuser \
PUID=${PUID} \
PGID=${PGID} \
UMASK=022 \
FAT_DOCKER=true \
INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false \
PATH="/opt/venv/bin:${PATH}" \
UNO_PATH=/usr/lib/libreoffice/program \
LIBREOFFICE_BIN_PATH=/usr/lib/libreoffice/program/soffice.bin \
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
TMPDIR=/tmp/stirling-pdf \
TEMP=/tmp/stirling-pdf \
TMP=/tmp/stirling-pdf \
QTWEBENGINE_CHROMIUM_FLAGS="--no-sandbox --disable-gpu --disable-software-rasterizer" \
DBUS_SESSION_BUS_ADDRESS=/dev/null
SAL_TMP=/tmp/stirling-pdf/libre
# Metadata labels.
# Metadata labels
LABEL org.opencontainers.image.title="Stirling-PDF Fat" \
org.opencontainers.image.description="Fat version with extra fonts for air-gapped environments, includes Calibre, LibreOffice, Tesseract, OCRmyPDF" \
org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF" \
@@ -599,8 +111,8 @@ LABEL org.opencontainers.image.title="Stirling-PDF Fat" \
EXPOSE 8080/tcp
STOPSIGNAL SIGTERM
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:8080/api/v1/info/status || exit 1
HEALTHCHECK --interval=30s --timeout=15s --start-period=120s --retries=5 \
CMD curl -fs --max-time 10 http://localhost:8080/api/v1/info/status || exit 1
ENTRYPOINT ["tini", "--", "/scripts/init.sh"]
CMD []

View File

@@ -5,8 +5,7 @@
FROM gradle:9.3.1-jdk25 AS build
# Install Node.js and npm for frontend build
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && apt-get install -y --no-install-recommends \
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y --no-install-recommends nodejs \
@@ -30,15 +29,13 @@ ENV JDK_JAVA_OPTIONS="--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNN
--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED"
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
./gradlew dependencies --no-daemon || true
RUN ./gradlew dependencies --no-daemon || true
# Copy entire project
COPY . .
# Build ultra-lite JAR with embedded frontend (minimal features)
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
DISABLE_ADDITIONAL_FEATURES=true \
RUN DISABLE_ADDITIONAL_FEATURES=true \
./gradlew clean build \
-PbuildWithFrontend=true \
-x spotlessApply -x spotlessCheck -x test -x sonarqube \

View File

@@ -2,6 +2,9 @@
services:
stirling-pdf:
container_name: Stirling-PDF-Fat-Disable-Endpoints
build:
context: ../../..
dockerfile: docker/embedded/Dockerfile.fat
image: docker.stirlingpdf.com/stirlingtools/stirling-pdf:fat
build:
context: ../../../

View File

@@ -1,6 +1,9 @@
services:
stirling-pdf:
container_name: Stirling-PDF-Security-Fat
build:
context: ../../..
dockerfile: docker/embedded/Dockerfile.fat
image: docker.stirlingpdf.com/stirlingtools/stirling-pdf:fat
build:
context: ../../../

View File

@@ -1,6 +1,9 @@
services:
stirling-pdf:
container_name: Stirling-PDF-Ultra-Lite
build:
context: ../../..
dockerfile: docker/embedded/Dockerfile.ultra-lite
image: docker.stirlingpdf.com/stirlingtools/stirling-pdf:ultra-lite
build:
context: ../../../

View File

@@ -1,6 +1,9 @@
services:
stirling-pdf:
container_name: Stirling-PDF-Security-Fat-with-login
build:
context: ../../..
dockerfile: docker/embedded/Dockerfile.fat
image: docker.stirlingpdf.com/stirlingtools/stirling-pdf:fat
build:
context: ../../../

View File

@@ -1,463 +0,0 @@
# Stirling-PDF Unified Container
Single Docker container that can run as **frontend + backend**, **frontend only**, or **backend only** using the `MODE` environment variable.
## Quick Start
### MODE=BOTH (Default)
Single container with both frontend and backend on port 8080:
```bash
docker run -p 8080:8080 \
-e MODE=BOTH \
stirlingtools/stirling-pdf:unified
```
Access at: `http://localhost:8080`
### MODE=FRONTEND
Frontend only, connecting to separate backend:
```bash
docker run -p 8080:8080 \
-e MODE=FRONTEND \
-e VITE_API_BASE_URL=http://backend:8080 \
stirlingtools/stirling-pdf:unified
```
### MODE=BACKEND
Backend API only:
```bash
docker run -p 8080:8080 \
-e MODE=BACKEND \
stirlingtools/stirling-pdf:unified
```
Access API at: `http://localhost:8080/api`
Swagger UI at: `http://localhost:8080/swagger-ui/index.html`
---
## Architecture
### MODE=BOTH (Default)
```
┌─────────────────────────────────────┐
│ Port 8080 (External) │
│ ┌───────────────────────────────┐ │
│ │ Nginx │ │
│ │ • Serves frontend (/) │ │
│ │ • Proxies /api/* → backend │ │
│ └───────────┬───────────────────┘ │
│ │ │
│ ┌───────────▼───────────────────┐ │
│ │ Backend (Internal 8081) │ │
│ │ • Spring Boot │ │
│ │ • PDF Processing │ │
│ │ • UnoServer │ │
│ └───────────────────────────────┘ │
└─────────────────────────────────────┘
```
### MODE=FRONTEND
```
┌─────────────────────────────┐ ┌──────────────────┐
│ Frontend Container │ │ Backend │
│ Port 8080 │ │ (External) │
│ ┌───────────────────────┐ │ │ │
│ │ Nginx │ │──────▶ :8080/api │
│ │ • Serves frontend │ │ │ │
│ │ • Proxies to backend │ │ │ │
│ └───────────────────────┘ │ └──────────────────┘
└─────────────────────────────┘
```
### MODE=BACKEND
```
┌─────────────────────────────┐
│ Backend Container │
│ Port 8080 │
│ ┌───────────────────────┐ │
│ │ Spring Boot │ │
│ │ • API Endpoints │ │
│ │ • PDF Processing │ │
│ │ • UnoServer │ │
│ └───────────────────────┘ │
└─────────────────────────────┘
```
---
## Environment Variables
### MODE Configuration
| Variable | Values | Default | Description |
|----------|--------|---------|-------------|
| `MODE` | `BOTH`, `FRONTEND`, `BACKEND` | `BOTH` | Container operation mode |
### MODE=BOTH Specific
| Variable | Default | Description |
|----------|---------|-------------|
| `BACKEND_INTERNAL_PORT` | `8081` | Internal port for backend when MODE=BOTH |
### MODE=FRONTEND Specific
| Variable | Default | Description |
|----------|---------|-------------|
| `VITE_API_BASE_URL` | `http://backend:8080` | Backend URL for API proxying |
### Standard Configuration
All modes support standard Stirling-PDF environment variables:
- `DISABLE_ADDITIONAL_FEATURES` - Enable/disable OCR and LibreOffice features
- `DOCKER_ENABLE_SECURITY` - Enable authentication
- `PUID` / `PGID` - User/Group IDs
- `SYSTEM_MAXFILESIZE` - Max upload size (MB)
- `TESSERACT_LANGS` - Comma-separated OCR language codes
- `JAVA_CUSTOM_OPTS` - Additional JVM options
- `PROCESS_EXECUTOR_AUTO_UNO_SERVER` - Overrides `processExecutor.autoUnoServer` (true or false)
- `PROCESS_EXECUTOR_SESSION_LIMIT_LIBRE_OFFICE_SESSION_LIMIT` - Overrides `processExecutor.sessionLimit.libreOfficeSessionLimit`
- `UNO_SERVER_AUTO` - Legacy alias for `processExecutor.autoUnoServer`
- `UNO_SERVER_COUNT` - Legacy alias for `processExecutor.sessionLimit.libreOfficeSessionLimit`
- `UNO_SERVER_HEALTH_INTERVAL` - Seconds between unoserver PID checks (default: 30)
See full configuration docs at: https://docs.stirlingpdf.com
---
## Docker Compose Examples
### Example 1: All-in-One (MODE=BOTH)
**File:** `docker/compose/docker-compose-unified-both.yml`
```yaml
services:
stirling-pdf:
image: stirlingtools/stirling-pdf:unified
ports:
- "8080:8080"
volumes:
- ./data:/usr/share/tessdata:rw
- ./config:/configs:rw
environment:
MODE: BOTH
restart: unless-stopped
```
### Example 2: Separate Frontend & Backend
**File:** `docker/compose/docker-compose-unified-frontend.yml`
```yaml
services:
backend:
image: stirlingtools/stirling-pdf:unified
ports:
- "8081:8080"
environment:
MODE: BACKEND
volumes:
- ./data:/usr/share/tessdata:rw
- ./config:/configs:rw
frontend:
image: stirlingtools/stirling-pdf:unified
ports:
- "8080:8080"
environment:
MODE: FRONTEND
VITE_API_BASE_URL: http://backend:8080
depends_on:
- backend
```
### Example 3: Backend API Only
**File:** `docker/compose/docker-compose-unified-backend.yml`
```yaml
services:
stirling-pdf-api:
image: stirlingtools/stirling-pdf:unified
ports:
- "8080:8080"
environment:
MODE: BACKEND
volumes:
- ./data:/usr/share/tessdata:rw
- ./config:/configs:rw
restart: unless-stopped
```
---
## Building the Image
```bash
# From repository root
docker build -t stirlingtools/stirling-pdf:unified -f docker/Dockerfile.unified .
```
### Build Arguments
| Argument | Description |
|----------|-------------|
| `VERSION_TAG` | Version tag for the image |
Example:
```bash
docker build \
--build-arg VERSION_TAG=v1.0.0 \
-t stirlingtools/stirling-pdf:unified \
-f docker/Dockerfile.unified .
```
---
## Use Cases
### 1. Simple Deployment (MODE=BOTH)
- **Best for:** Personal use, small teams, simple deployments
- **Pros:** Single container, easy setup, minimal configuration
- **Cons:** Frontend and backend scale together
### 2. Scaled Frontend (MODE=FRONTEND + BACKEND)
- **Best for:** High traffic, need to scale frontend independently
- **Pros:** Scale frontend containers separately, CDN-friendly
- **Example:**
```yaml
services:
backend:
image: stirlingtools/stirling-pdf:unified
environment:
MODE: BACKEND
deploy:
replicas: 1
frontend:
image: stirlingtools/stirling-pdf:unified
environment:
MODE: FRONTEND
VITE_API_BASE_URL: http://backend:8080
deploy:
replicas: 5 # Scale frontend independently
```
### 3. API-Only (MODE=BACKEND)
- **Best for:** Headless deployments, custom frontends, API integrations
- **Pros:** Minimal resources, no nginx overhead
- **Example:** Use with external frontend or API consumers
### 4. Multi-Backend Setup
- **Best for:** Load balancing, high availability
- **Example:**
```yaml
services:
backend-1:
image: stirlingtools/stirling-pdf:unified
environment:
MODE: BACKEND
backend-2:
image: stirlingtools/stirling-pdf:unified
environment:
MODE: BACKEND
frontend:
image: stirlingtools/stirling-pdf:unified
environment:
MODE: FRONTEND
VITE_API_BASE_URL: http://load-balancer:8080
```
---
## Port Configuration
All modes use **port 8080** by default:
- **MODE=BOTH**: Nginx listens on 8080, proxies to backend on internal 8081
- **MODE=FRONTEND**: Nginx listens on 8080
- **MODE=BACKEND**: Spring Boot listens on 8080
**Expose port 8080** in all configurations:
```yaml
ports:
- "8080:8080"
```
---
## Health Checks
### MODE=BOTH and MODE=BACKEND
```yaml
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8080/api/v1/info/status || exit 1"]
interval: 30s
timeout: 10s
retries: 3
```
### MODE=FRONTEND
```yaml
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8080/ || exit 1"]
interval: 30s
timeout: 10s
retries: 3
```
---
## Troubleshooting
### Check logs
```bash
docker logs stirling-pdf-container
```
Look for the startup banner:
```
===================================
Stirling-PDF Unified Container
MODE: BOTH
===================================
```
### Invalid MODE error
```
ERROR: Invalid MODE 'XYZ'. Must be BOTH, FRONTEND, or BACKEND
```
**Fix:** Set `MODE` to one of the three valid values.
### Frontend can't connect to backend (MODE=FRONTEND)
**Check:**
1. `VITE_API_BASE_URL` points to correct backend URL
2. Backend container is running and accessible
3. Network connectivity between containers
### Backend not starting (MODE=BOTH or BACKEND)
**Check:**
1. Sufficient memory allocated (4GB recommended)
2. Java heap size (`JAVA_CUSTOM_OPTS`)
3. Volume permissions for `/tmp/stirling-pdf`
---
## Migration Guide
### From Separate Containers → MODE=BOTH
**Before:**
```yaml
services:
frontend:
image: stirlingtools/stirling-pdf:frontend
ports: ["80:80"]
backend:
image: stirlingtools/stirling-pdf:backend
ports: ["8080:8080"]
```
**After:**
```yaml
services:
stirling-pdf:
image: stirlingtools/stirling-pdf:unified
ports: ["8080:8080"]
environment:
MODE: BOTH
```
### From Legacy → MODE=BACKEND
```yaml
services:
stirling-pdf:
image: stirlingtools/stirling-pdf:latest
ports: ["8080:8080"]
```
**Becomes:**
```yaml
services:
stirling-pdf:
image: stirlingtools/stirling-pdf:unified
ports: ["8080:8080"]
environment:
MODE: BACKEND
```
---
## Performance Tuning
### MODE=BOTH
```yaml
environment:
JAVA_CUSTOM_OPTS: "-Xmx4g -XX:MaxRAMPercentage=75"
BACKEND_INTERNAL_PORT: 8081
deploy:
resources:
limits:
memory: 4G
reservations:
memory: 2G
```
### MODE=FRONTEND (Lightweight)
```yaml
deploy:
resources:
limits:
memory: 512M
reservations:
memory: 256M
```
### MODE=BACKEND (Heavy Processing)
```yaml
environment:
JAVA_CUSTOM_OPTS: "-Xmx8g"
deploy:
resources:
limits:
memory: 10G
reservations:
memory: 4G
```
---
## Security Considerations
1. **MODE=BOTH**: Backend not exposed externally (runs on internal port)
2. **MODE=BACKEND**: API exposed directly - consider API authentication
3. **MODE=FRONTEND**: Only serves static files - minimal attack surface
Enable security features:
```yaml
environment:
DOCKER_ENABLE_SECURITY: "true"
SECURITY_ENABLELOGIN: "true"
```
---
## Support
- Documentation: https://docs.stirlingpdf.com
- GitHub Issues: https://github.com/Stirling-Tools/Stirling-PDF/issues
- Docker Hub: https://hub.docker.com/r/stirlingtools/stirling-pdf
---
## License
MIT License - See repository for full details

View File

@@ -1,38 +0,0 @@
#!/bin/bash
# Build script for Stirling-PDF Unified Container
# Usage: ./build.sh [version-tag]
set -e
VERSION_TAG=${1:-latest}
IMAGE_NAME="stirlingtools/stirling-pdf:unified-${VERSION_TAG}"
echo "==================================="
echo "Building Stirling-PDF Unified Container"
echo "Version: $VERSION_TAG"
echo "Image: $IMAGE_NAME"
echo "==================================="
# Navigate to repository root (assuming script is in docker/unified/)
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
REPO_ROOT="$SCRIPT_DIR/../.."
cd "$REPO_ROOT"
# Build the image
docker build \
--build-arg VERSION_TAG="$VERSION_TAG" \
-t "$IMAGE_NAME" \
-f docker/Dockerfile.unified \
.
echo "==================================="
echo "✓ Build complete!"
echo "Image: $IMAGE_NAME"
echo ""
echo "Test the image:"
echo " MODE=BOTH: docker run -p 8080:8080 -e MODE=BOTH $IMAGE_NAME"
echo " MODE=FRONTEND: docker run -p 8080:8080 -e MODE=FRONTEND $IMAGE_NAME"
echo " MODE=BACKEND: docker run -p 8080:8080 -e MODE=BACKEND $IMAGE_NAME"
echo "==================================="

View File

@@ -1,379 +0,0 @@
#!/bin/bash
set -e
# Default MODE to BOTH if not set
MODE=${MODE:-BOTH}
echo "==================================="
echo "Stirling-PDF Unified Container"
echo "MODE: $MODE"
echo "==================================="
# Function to setup OCR (from init.sh)
setup_ocr() {
echo "Setting up OCR languages..."
# In Alpine, tesseract uses /usr/share/tessdata
TESSDATA_DIR="/usr/share/tessdata"
# Create tessdata directory
mkdir -p "$TESSDATA_DIR"
# Restore system languages from backup (Dockerfile moved them to tessdata-original)
if [ -d /usr/share/tessdata-original ]; then
echo "Restoring system tessdata from backup..."
cp -rn /usr/share/tessdata-original/* "$TESSDATA_DIR"/ 2>/dev/null || true
fi
# Note: If user mounted custom languages to /usr/share/tessdata, they'll be overlaid here.
# The cp -rn above won't overwrite user files, just adds missing system files.
# Install additional languages if specified
if [ -n "$TESSERACT_LANGS" ]; then
SPACE_SEPARATED_LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')
for LANG in $SPACE_SEPARATED_LANGS; do
case "$LANG" in
[a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z][a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z]_[a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z][a-zA-Z]_[a-zA-Z][a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z][a-zA-Z][a-zA-Z]_[a-zA-Z][a-zA-Z][a-zA-Z][a-zA-Z])
apk add --no-cache "tesseract-ocr-data-$LANG" 2>/dev/null || true
;;
esac
done
fi
# Point to the consolidated location
export TESSDATA_PREFIX="$TESSDATA_DIR"
echo "Using TESSDATA_PREFIX=$TESSDATA_PREFIX"
}
# Function to setup user permissions (from init-without-ocr.sh)
setup_permissions() {
echo "Setting up user permissions..."
export JAVA_TOOL_OPTIONS="${JAVA_BASE_OPTS} ${JAVA_CUSTOM_OPTS}"
# Update user and group IDs
if [ ! -z "$PUID" ] && [ "$PUID" != "$(id -u stirlingpdfuser)" ]; then
usermod -o -u "$PUID" stirlingpdfuser || true
fi
if [ ! -z "$PGID" ] && [ "$PGID" != "$(getent group stirlingpdfgroup | cut -d: -f3)" ]; then
groupmod -o -g "$PGID" stirlingpdfgroup || true
fi
umask "$UMASK" || true
# Install fonts if needed
if [[ -n "$LANGS" ]]; then
/scripts/installFonts.sh $LANGS
fi
# Ensure directories exist with correct permissions
mkdir -p /tmp/stirling-pdf || true
# Set ownership and permissions
chown -R stirlingpdfuser:stirlingpdfgroup \
$HOME /logs /scripts /usr/share/fonts/opentype/noto \
/configs /customFiles /pipeline /tmp/stirling-pdf \
/var/lib/nginx /var/log/nginx /usr/share/nginx \
/app.jar 2>/dev/null || echo "[WARN] Some chown operations failed, may run as host user"
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto \
/configs /customFiles /pipeline /tmp/stirling-pdf 2>/dev/null || true
}
# Function to configure nginx
configure_nginx() {
local backend_url=$1
echo "Configuring nginx with backend URL: $backend_url"
sed -i "s|\${BACKEND_URL}|${backend_url}|g" /etc/nginx/nginx.conf
}
# Function to run as user or root depending on permissions
run_as_user() {
if [ "$(id -u)" = "0" ]; then
# Running as root, use su-exec
su-exec stirlingpdfuser "$@"
else
# Already running as non-root
exec "$@"
fi
}
run_with_timeout() {
local secs=$1; shift
if command -v timeout >/dev/null 2>&1; then
timeout "${secs}s" "$@"
else
"$@"
fi
}
run_as_user_with_timeout() {
local secs=$1; shift
if command -v timeout >/dev/null 2>&1; then
run_as_user timeout "${secs}s" "$@"
else
run_as_user "$@"
fi
}
tcp_port_check() {
local host=$1
local port=$2
local timeout_secs=${3:-5}
# Try nc first (most portable)
if command -v nc >/dev/null 2>&1; then
run_with_timeout "$timeout_secs" nc -z "$host" "$port" 2>/dev/null
return $?
fi
# Fallback to /dev/tcp (bash-specific)
if [ -n "${BASH_VERSION:-}" ] && command -v bash >/dev/null 2>&1; then
run_with_timeout "$timeout_secs" bash -c "exec 3<>/dev/tcp/${host}/${port}" 2>/dev/null
local result=$?
exec 3>&- 2>/dev/null || true
return $result
fi
# No TCP check method available
return 2
}
CONFIG_FILE=${CONFIG_FILE:-/configs/settings.yml}
UNOSERVER_PIDS=()
UNOSERVER_PORTS=()
UNOSERVER_UNO_PORTS=()
read_setting_value() {
local key=$1
if [ ! -f "$CONFIG_FILE" ]; then
return
fi
awk -F: -v key="$key" '
$1 ~ "^[[:space:]]*"key"[[:space:]]*$" {
val=$2
sub(/#.*/, "", val)
gsub(/^[[:space:]]+|[[:space:]]+$/, "", val)
gsub(/^["'"'"']|["'"'"']$/, "", val)
print val
exit
}
' "$CONFIG_FILE"
}
get_unoserver_auto() {
if [ -n "${PROCESS_EXECUTOR_AUTO_UNO_SERVER:-}" ]; then
echo "$PROCESS_EXECUTOR_AUTO_UNO_SERVER"
return
fi
if [ -n "${UNO_SERVER_AUTO:-}" ]; then
echo "$UNO_SERVER_AUTO"
return
fi
read_setting_value "autoUnoServer"
}
get_unoserver_count() {
if [ -n "${PROCESS_EXECUTOR_SESSION_LIMIT_LIBRE_OFFICE_SESSION_LIMIT:-}" ]; then
echo "$PROCESS_EXECUTOR_SESSION_LIMIT_LIBRE_OFFICE_SESSION_LIMIT"
return
fi
if [ -n "${UNO_SERVER_COUNT:-}" ]; then
echo "$UNO_SERVER_COUNT"
return
fi
read_setting_value "libreOfficeSessionLimit"
}
start_unoserver_instance() {
local port=$1
local uno_port=$2
run_as_user /opt/venv/bin/unoserver --port "$port" --interface 127.0.0.1 --uno-port "$uno_port" &
LAST_UNOSERVER_PID=$!
}
start_unoserver_watchdog() {
local interval=${UNO_SERVER_HEALTH_INTERVAL:-30}
case "$interval" in
''|*[!0-9]*) interval=30 ;;
esac
(
while true; do
local i=0
while [ "$i" -lt "${#UNOSERVER_PIDS[@]}" ]; do
local pid=${UNOSERVER_PIDS[$i]}
local port=${UNOSERVER_PORTS[$i]}
local uno_port=${UNOSERVER_UNO_PORTS[$i]}
local needs_restart=false
# Check 1: PID exists
if [ -z "$pid" ] || ! kill -0 "$pid" 2>/dev/null; then
echo "unoserver PID ${pid} not found for port ${port}"
needs_restart=true
else
# PID exists, now check if server is actually healthy
local health_ok=false
# Check 2A: Health check with unoping (best - checks actual server health)
if command -v unoping >/dev/null 2>&1; then
if run_as_user_with_timeout 5 unoping --host 127.0.0.1 --port "$port" >/dev/null 2>&1; then
health_ok=true
else
echo "unoserver health check failed (unoping) for port ${port}, trying TCP fallback"
fi
fi
# Check 2B: Fallback to TCP port check (verifies service is listening)
if [ "$health_ok" = false ]; then
tcp_port_check "127.0.0.1" "$port" 5
local tcp_rc=$?
if [ $tcp_rc -eq 0 ]; then
health_ok=true
elif [ $tcp_rc -eq 2 ]; then
echo "No TCP check available; falling back to PID-only for port ${port}"
health_ok=true
else
echo "unoserver TCP check failed for port ${port}"
needs_restart=true
fi
fi
fi
if [ "$needs_restart" = true ]; then
echo "Restarting unoserver on 127.0.0.1:${port} (uno-port ${uno_port})"
# Kill the old process if it exists
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
kill -TERM "$pid" 2>/dev/null || true
sleep 1
kill -KILL "$pid" 2>/dev/null || true
fi
start_unoserver_instance "$port" "$uno_port"
UNOSERVER_PIDS[$i]=$LAST_UNOSERVER_PID
fi
i=$((i + 1))
done
sleep "$interval"
done
) &
}
start_unoserver_pool() {
local auto
auto="$(get_unoserver_auto)"
auto="${auto,,}"
if [ -z "$auto" ]; then
auto="true"
fi
if [ "$auto" != "true" ]; then
echo "Skipping local unoserver pool (autoUnoServer=$auto)"
return
fi
local count
count="$(get_unoserver_count)"
case "$count" in
''|*[!0-9]*) count=1 ;;
esac
if [ "$count" -le 0 ]; then
count=1
fi
local i=0
while [ "$i" -lt "$count" ]; do
local port=$((2003 + (i * 2)))
local uno_port=$((2004 + (i * 2)))
echo "Starting unoserver on 127.0.0.1:${port} (uno-port ${uno_port})"
UNOSERVER_PORTS+=("$port")
UNOSERVER_UNO_PORTS+=("$uno_port")
start_unoserver_instance "$port" "$uno_port"
UNOSERVER_PIDS+=("$LAST_UNOSERVER_PID")
i=$((i + 1))
done
start_unoserver_watchdog
}
# Setup OCR and permissions
setup_ocr
setup_permissions
# Handle different modes
case "$MODE" in
BOTH)
echo "Starting in BOTH mode: Frontend + Backend on port 8080"
# Configure nginx to proxy to internal backend
configure_nginx "http://localhost:${BACKEND_INTERNAL_PORT:-8081}"
# Start backend on internal port
echo "Starting backend on port ${BACKEND_INTERNAL_PORT:-8081}..."
run_as_user sh -c "java -Dfile.encoding=UTF-8 \
-Djava.io.tmpdir=/tmp/stirling-pdf \
-Dserver.port=${BACKEND_INTERNAL_PORT:-8081} \
-jar /app.jar" &
BACKEND_PID=$!
# Start unoserver pool for document conversion
start_unoserver_pool
# Wait for backend to start
sleep 3
# Start nginx on port 8080
echo "Starting nginx on port 8080..."
run_as_user nginx -g "daemon off;" &
NGINX_PID=$!
echo "==================================="
echo "✓ Frontend available at: http://localhost:8080"
echo "✓ Backend API at: http://localhost:8080/api"
echo "✓ Backend running internally on port ${BACKEND_INTERNAL_PORT:-8081}"
echo "==================================="
;;
FRONTEND)
echo "Starting in FRONTEND mode: Frontend only on port 8080"
# Configure nginx with external backend URL
BACKEND_URL=${VITE_API_BASE_URL:-http://backend:8080}
configure_nginx "$BACKEND_URL"
# Start nginx on port 8080
echo "Starting nginx on port 8080..."
run_as_user nginx -g "daemon off;" &
NGINX_PID=$!
echo "==================================="
echo "✓ Frontend available at: http://localhost:8080"
echo "✓ Proxying API calls to: $BACKEND_URL"
echo "==================================="
;;
BACKEND)
echo "Starting in BACKEND mode: Backend only on port 8080"
# Start backend on port 8080
echo "Starting backend on port 8080..."
run_as_user sh -c "java -Dfile.encoding=UTF-8 \
-Djava.io.tmpdir=/tmp/stirling-pdf \
-Dserver.port=8080 \
-jar /app.jar" &
BACKEND_PID=$!
start_unoserver_pool
echo "==================================="
echo "✓ Backend API available at: http://localhost:8080/api"
echo "✓ Swagger UI at: http://localhost:8080/swagger-ui/index.html"
echo "==================================="
;;
*)
echo "ERROR: Invalid MODE '$MODE'. Must be BOTH, FRONTEND, or BACKEND"
exit 1
;;
esac
# Wait for all background processes
wait

View File

@@ -1,121 +0,0 @@
# Run nginx as non-root user
pid /tmp/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
# Add .mjs MIME type mapping
types {
text/javascript mjs;
}
# Gzip compression
gzip on;
gzip_vary on;
gzip_min_length 1024;
gzip_types text/plain text/css text/xml text/javascript application/javascript application/xml+rss application/json;
server {
listen 8080;
server_name _;
root /usr/share/nginx/html;
index index.html index.htm;
# Global settings for file uploads
client_max_body_size 100m;
# Handle client-side routing - support subpaths
location / {
try_files $uri $uri/ /index.html;
}
# Proxy API calls to backend
location /api/ {
proxy_pass ${BACKEND_URL}/api/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
# Additional headers for proper API proxying
proxy_set_header Connection '';
proxy_http_version 1.1;
proxy_buffering off;
proxy_cache off;
# Timeout settings for large file uploads
proxy_connect_timeout 60s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
# Request size limits for file uploads
client_max_body_size 100m;
proxy_request_buffering off;
}
# Proxy Swagger UI to backend (including versioned paths)
location ~ ^/swagger-ui(.*)$ {
proxy_pass ${BACKEND_URL}/swagger-ui$1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header Connection '';
proxy_http_version 1.1;
proxy_buffering off;
proxy_cache off;
}
# Proxy API docs to backend (with query parameters and sub-paths)
location ~ ^/v3/api-docs(.*)$ {
proxy_pass ${BACKEND_URL}/v3/api-docs$1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
}
# Proxy v1 API docs to backend (with query parameters and sub-paths)
location ~ ^/v1/api-docs(.*)$ {
proxy_pass ${BACKEND_URL}/v1/api-docs$1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
}
# Serve .mjs files with correct MIME type (must come before general static assets)
location ~* \.mjs$ {
try_files $uri =404;
add_header Content-Type "text/javascript; charset=utf-8" always;
expires 1y;
add_header Cache-Control "public, immutable";
}
# Cache static assets (but not API endpoints)
location ~* ^(?!/api/).*\.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
expires 1y;
add_header Cache-Control "public, immutable";
}
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
add_header X-XSS-Protection "1; mode=block" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
}
}