Base docker image (#5958)

Co-authored-by: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Anthony Stirling
2026-03-25 15:41:58 +00:00
committed by GitHub
parent bb43e9dcdf
commit 9500acd69f
22 changed files with 1102 additions and 2552 deletions

View File

@@ -8,6 +8,9 @@ build/
**/build/
out/
target/
**/target/
bin/
version_builds/
# Gradle caches (local, not what's in the container)
.gradle/
@@ -16,9 +19,15 @@ target/
# Node / frontend
node_modules/
**/node_modules/
frontend/node_modules/
frontend/dist/
.npm/
.yarn/
# Tauri/desktop builds
src-tauri/target/
src-tauri/dist/
# IDE and editor
.idea/
.vscode/
@@ -46,7 +55,21 @@ Dockerfile*
**/test-results/
**/jacoco/
# Testing and documentation (not needed in build)
testing/
docs/
*.md
README*
# Local env
.env
.env.*
!.env.example
# Misc
*.swp
*.swo
*~
.DS_Store
.cache/
.pytest_cache/

View File

@@ -6,14 +6,20 @@ openapi: &openapi
- *build
- app/(common|core|proprietary)/src/main/java/**
docker-base: &docker-base
- docker/base/Dockerfile
- ".github/workflows/push-docker-base.yml"
docker: &docker
- Dockerfile
- Dockerfile.fat
- Dockerfile.ultra-lite
- docker/embedded/Dockerfile
- docker/embedded/Dockerfile.fat
- docker/embedded/Dockerfile.ultra-lite
- ".github/workflows/build.yml"
- ".github/workflows/push-docker.yml"
- scripts/init.sh
- scripts/init-without-ocr.sh
- exampleYmlFiles/**
- *docker-base
project: &project
- app/(common|core|proprietary)/src/(main|test)/java/**
@@ -24,6 +30,7 @@ project: &project
- libs/**
- "testing/**/!(requirements*.txt|requirements*.in)*"
- *docker
- *docker-base
- gradle.properties
- gradlew
- gradlew.bat

View File

@@ -30,6 +30,7 @@ jobs:
project: ${{ steps.changes.outputs.project }}
openapi: ${{ steps.changes.outputs.openapi }}
frontend: ${{ steps.changes.outputs.frontend }}
docker-base: ${{ steps.changes.outputs.docker-base }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@58077d3c7e43986b6b15fba718e8ea69e387dfcc # v2.15.1
@@ -402,6 +403,17 @@ jobs:
- name: Checkout Repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Login to GitHub Container Registry
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ github.token }}
- name: Convert repository owner to lowercase
id: repoowner
run: echo "lowercase=$(echo ${{ github.repository_owner }} | awk '{print tolower($0)}')" >> $GITHUB_OUTPUT
- name: Free disk space on runner
run: |
echo "Disk space before cleanup:" && df -h
@@ -446,6 +458,22 @@ jobs:
id: buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
- name: Build base image locally (PR base change only)
if: github.event_name == 'pull_request' && needs.files-changed.outputs.docker-base == 'true'
run: |
docker build -t stirling-pdf-base:pr-test -f docker/base/Dockerfile docker/base
- name: Set base image and platform for this build
id: build-params
run: |
if [ "${{ github.event_name }}" == "pull_request" ] && [ "${{ needs.files-changed.outputs.docker-base }}" == "true" ]; then
echo "base_image=stirling-pdf-base:pr-test" >> $GITHUB_OUTPUT
echo "platforms=linux/amd64" >> $GITHUB_OUTPUT
else
echo "base_image=ghcr.io/${{ steps.repoowner.outputs.lowercase }}/stirling-pdf-base:latest" >> $GITHUB_OUTPUT
echo "platforms=linux/amd64,linux/arm64/v8" >> $GITHUB_OUTPUT
fi
- name: Build ${{ matrix.docker-rev }}
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
with:
@@ -455,7 +483,9 @@ jobs:
push: false
cache-from: type=gha,scope=${{ matrix.cache-scope }}
cache-to: type=gha,mode=max,scope=${{ matrix.cache-scope }}
platforms: linux/amd64,linux/arm64/v8
platforms: ${{ steps.build-params.outputs.platforms }}
build-args: |
BASE_IMAGE=${{ steps.build-params.outputs.base_image }}
provenance: true
sbom: true

119
.github/workflows/push-docker-base.yml vendored Normal file
View File

@@ -0,0 +1,119 @@
name: Push Docker Base Image
on:
push:
branches:
- baseDockerImage
workflow_dispatch:
inputs:
version:
description: 'Base image version (e.g., 1.0.0, 1.0.1)'
required: true
type: string
permissions:
contents: read
jobs:
push-base:
if: ${{ vars.CI_PROFILE != 'lite' && github.actor == 'Frooodle' }}
runs-on: ubuntu-24.04-8core
permissions:
packages: write
id-token: write
steps:
- name: Verify authorized user
run: |
if [ "${{ github.actor }}" != "Frooodle" ]; then
echo "Error: Only Frooodle is authorized to run this workflow"
exit 1
fi
- name: Set version
id: version
run: |
if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.version }}"
else
VERSION="1.0.0"
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
- name: Harden Runner
uses: step-security/harden-runner@58077d3c7e43986b6b15fba718e8ea69e387dfcc # v2.15.1
with:
egress-policy: audit
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Login to Docker Hub
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_API }}
- name: Login to GitHub Container Registry
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ github.token }}
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
- name: Set up QEMU
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0
- name: Convert repository owner to lowercase
id: repoowner
run: echo "lowercase=$(echo ${{ github.repository_owner }} | awk '{print tolower($0)}')" >> $GITHUB_OUTPUT
- name: Generate tags for base image
id: meta
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # v6.0.0
with:
images: |
${{ secrets.DOCKER_HUB_ORG_USERNAME }}/stirling-pdf-base
ghcr.io/${{ steps.repoowner.outputs.lowercase }}/stirling-pdf-base
tags: |
type=raw,value=${{ steps.version.outputs.version }}
- name: Build and push base image
id: build-push-base
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
with:
builder: ${{ steps.buildx.outputs.name }}
context: docker/base
file: ./docker/base/Dockerfile
push: true
cache-from: type=gha,scope=stirling-pdf-base
cache-to: type=gha,mode=max,scope=stirling-pdf-base
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64/v8
provenance: true
sbom: true
- name: Install cosign
uses: sigstore/cosign-installer@faadad0cce49287aee09b3a48701e75088a2c6ad # v4.0.0
with:
cosign-release: "v2.4.1"
- name: Sign base images
env:
DIGEST: ${{ steps.build-push-base.outputs.digest }}
TAGS: ${{ steps.meta.outputs.tags }}
COSIGN_PRIVATE_KEY: ${{ secrets.COSIGN_PRIVATE_KEY }}
COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }}
run: |
if [ -n "$COSIGN_PRIVATE_KEY" ]; then
echo "$TAGS" | tr ',' '\n' | while read -r tag; do
cosign sign --yes \
--key env://COSIGN_PRIVATE_KEY \
"${tag}@${DIGEST}"
done
else
echo "Warning: COSIGN_PRIVATE_KEY not set, skipping image signing"
fi

View File

@@ -130,7 +130,9 @@ jobs:
cache-to: type=gha,mode=max,scope=stirling-pdf-latest
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: VERSION_TAG=${{ steps.versionNumber.outputs.versionNumber }}
build-args: |
VERSION_TAG=${{ steps.versionNumber.outputs.versionNumber }}
BASE_VERSION=1.0.0
platforms: linux/amd64,linux/arm64/v8
provenance: true
sbom: true

3
.gitignore vendored
View File

@@ -29,6 +29,9 @@ clientWebUI/
exampleYmlFiles/stirling/
/stirling/
/testing/file_snapshots
/testing/cucumber/junit/
/testing/cucumber/report.html
/testing/.failed_tests
SwaggerDoc.json
# Runtime storage for uploaded files and user data (not Java source code)

View File

@@ -1,157 +0,0 @@
# Unified Dockerfile - Frontend + Backend in single container
# Supports MODE parameter: BOTH (default), FRONTEND, BACKEND
# Stage 1: Build Frontend
FROM node:20-alpine@sha256:658d0f63e501824d6c23e06d4bb95c71e7d704537c9d9272f488ac03a370d448 AS frontend-build
WORKDIR /app
COPY frontend/package.json frontend/package-lock.json ./
RUN npm ci
COPY frontend .
# Override VITE_API_BASE_URL to use relative paths for production
# This ensures frontend works with nginx proxy setup
RUN DISABLE_ADDITIONAL_FEATURES=false VITE_API_BASE_URL=/ npm run build
# Stage 2: Build Backend (server-only JAR - no UI)
FROM gradle:8.14-jdk21@sha256:051d9a116793bdc5175a3f97a545718b750489eee85a7da20913c8a53f722a72 AS backend-build
COPY build.gradle .
COPY settings.gradle .
COPY gradlew .
COPY gradle gradle/
COPY app/core/build.gradle core/.
COPY app/common/build.gradle common/.
COPY app/proprietary/build.gradle proprietary/.
ENV JAVA_TOOL_OPTIONS="--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED"
RUN ./gradlew build -x spotlessApply -x spotlessCheck -x test -x sonarqube || return 0
WORKDIR /app
COPY . .
# Build server-only JAR (no frontend, includes security features controlled by DOCKER_ENABLE_SECURITY at runtime)
RUN DISABLE_ADDITIONAL_FEATURES=false \
./gradlew clean build -x spotlessApply -x spotlessCheck -x test -x sonarqube
# Stage 3: Final unified image
FROM alpine:3.22.1@sha256:4bcff63911fcb4448bd4fdacec207030997caf25e9bea4045fa6c8c44de311d1
ARG VERSION_TAG
# Labels
LABEL org.opencontainers.image.title="Stirling-PDF Unified"
LABEL org.opencontainers.image.description="Unified container for Stirling-PDF - Frontend + Backend with MODE parameter"
LABEL org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF"
LABEL org.opencontainers.image.licenses="MIT"
LABEL org.opencontainers.image.vendor="Stirling-Tools"
LABEL org.opencontainers.image.url="https://www.stirlingpdf.com"
LABEL org.opencontainers.image.documentation="https://docs.stirlingpdf.com"
LABEL maintainer="Stirling-Tools"
LABEL org.opencontainers.image.authors="Stirling-Tools"
LABEL org.opencontainers.image.version="${VERSION_TAG}"
LABEL org.opencontainers.image.keywords="PDF, manipulation, unified, API, Spring Boot, React"
# Copy backend files
COPY scripts /scripts
COPY app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto/
# Copy built JAR
# Use numeric UID:GID (1000:1000) since the named user doesn't exist yet at COPY time
COPY --from=backend-build --chown=1000:1000 \
/app/app/core/build/libs/*.jar app.jar
# Copy frontend files
COPY --from=frontend-build /app/dist /usr/share/nginx/html
# Copy nginx configuration
COPY docker/unified/nginx.conf /etc/nginx/nginx.conf
COPY docker/unified/entrypoint.sh /entrypoint.sh
# Environment Variables
ENV VERSION_TAG=$VERSION_TAG \
JAVA_BASE_OPTS="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UnlockExperimentalVMOptions -XX:MaxRAMPercentage=75 -XX:InitiatingHeapOccupancyPercent=20 -XX:+G1PeriodicGCInvokesConcurrent -XX:G1PeriodicGCInterval=10000 -XX:+UseStringDeduplication -XX:G1PeriodicGCSystemLoadThreshold=70" \
JAVA_CUSTOM_OPTS="" \
HOME=/home/stirlingpdfuser \
PUID=1000 \
PGID=1000 \
UMASK=022 \
PYTHONPATH=/usr/lib/libreoffice/program:/opt/venv/lib/python3.12/site-packages \
UNO_PATH=/usr/lib/libreoffice/program \
URE_BOOTSTRAP=file:///usr/lib/libreoffice/program/fundamentalrc \
PATH=$PATH:/opt/venv/bin \
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
TMPDIR=/tmp/stirling-pdf \
TEMP=/tmp/stirling-pdf \
TMP=/tmp/stirling-pdf \
MODE=BOTH \
BACKEND_INTERNAL_PORT=8081 \
VITE_API_BASE_URL=http://localhost:8080
# Install all dependencies
# Removed wasteful pip upgrade; chown moved to COPY above
RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
echo "@community https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
apk upgrade --no-cache -a && \
apk add --no-cache \
ca-certificates \
tzdata \
tini \
bash \
curl \
shadow \
su-exec \
openssl \
openssl-dev \
openjdk21-jre \
nginx \
# Doc conversion
gcompat \
libc6-compat \
libreoffice \
imagemagick \
# pdftohtml
poppler-utils \
# OCR MY PDF
unpaper \
tesseract-ocr-data-eng \
tesseract-ocr-data-chi_sim \
tesseract-ocr-data-deu \
tesseract-ocr-data-fra \
tesseract-ocr-data-por \
ocrmypdf \
# CV
py3-opencv \
python3 \
py3-pip \
py3-pillow@testing \
py3-pdf2image@testing && \
python3 -m venv /opt/venv && \
/opt/venv/bin/pip install --no-cache-dir unoserver weasyprint && \
ln -s /usr/lib/libreoffice/program/uno.py /opt/venv/lib/python3.12/site-packages/ && \
ln -s /usr/lib/libreoffice/program/unohelper.py /opt/venv/lib/python3.12/site-packages/ && \
ln -s /usr/lib/libreoffice/program /opt/venv/lib/python3.12/site-packages/LibreOffice && \
# Clean up pip + setuptools from venv
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true; \
rm -rf /opt/venv/lib/python*/site-packages/pip \
/opt/venv/lib/python*/site-packages/setuptools && \
mv /usr/share/tessdata /usr/share/tessdata-original && \
mkdir -p $HOME /configs /configs/heap_dumps /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders /tmp/stirling-pdf /pipeline/watchedFolders /pipeline/finishedFolders && \
mkdir -p /var/lib/nginx/tmp /var/log/nginx && \
fc-cache -f -v && \
chmod +x /scripts/* && \
chmod +x /entrypoint.sh && \
# User permissions
addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \
chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /pipeline /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /tmp/stirling-pdf /var/lib/nginx /var/log/nginx /usr/share/nginx
EXPOSE 8080/tcp
ENTRYPOINT ["tini", "--", "/entrypoint.sh"]

View File

@@ -1,119 +0,0 @@
# Unified Ultra-Lite Dockerfile - Frontend + Backend in single container with minimal dependencies
# Supports MODE parameter: BOTH (default), FRONTEND, BACKEND
# Stage 1: Build Frontend
FROM node:20-alpine@sha256:658d0f63e501824d6c23e06d4bb95c71e7d704537c9d9272f488ac03a370d448 AS frontend-build
WORKDIR /app
COPY frontend/package.json frontend/package-lock.json ./
RUN npm ci
COPY frontend .
# Override VITE_API_BASE_URL to use relative paths for production
# This ensures frontend works with nginx proxy setup
RUN DISABLE_ADDITIONAL_FEATURES=true VITE_API_BASE_URL=/ npm run build
# Stage 2: Build Backend
FROM gradle:8.14-jdk21@sha256:051d9a116793bdc5175a3f97a545718b750489eee85a7da20913c8a53f722a72 AS backend-build
COPY build.gradle .
COPY settings.gradle .
COPY gradlew .
COPY gradle gradle/
COPY app/core/build.gradle core/.
COPY app/common/build.gradle common/.
COPY app/proprietary/build.gradle proprietary/.
ENV JAVA_TOOL_OPTIONS="--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED"
RUN ./gradlew build -x spotlessApply -x spotlessCheck -x test -x sonarqube || return 0
WORKDIR /app
COPY . .
RUN DISABLE_ADDITIONAL_FEATURES=true \
./gradlew clean build -x spotlessApply -x spotlessCheck -x test -x sonarqube
# Stage 3: Final unified ultra-lite image
FROM alpine:3.22.1@sha256:4bcff63911fcb4448bd4fdacec207030997caf25e9bea4045fa6c8c44de311d1
ARG VERSION_TAG
# Labels
LABEL org.opencontainers.image.title="Stirling-PDF Unified Ultra-Lite"
LABEL org.opencontainers.image.description="Unified ultra-lite container for Stirling-PDF - Frontend + Backend with minimal dependencies"
LABEL org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF"
LABEL org.opencontainers.image.licenses="MIT"
LABEL org.opencontainers.image.vendor="Stirling-Tools"
LABEL org.opencontainers.image.url="https://www.stirlingpdf.com"
LABEL org.opencontainers.image.documentation="https://docs.stirlingpdf.com"
LABEL maintainer="Stirling-Tools"
LABEL org.opencontainers.image.authors="Stirling-Tools"
LABEL org.opencontainers.image.version="${VERSION_TAG}"
LABEL org.opencontainers.image.keywords="PDF, manipulation, unified, ultra-lite, API, Spring Boot, React"
# Copy backend files
COPY scripts/init-without-ocr.sh /scripts/init-without-ocr.sh
COPY scripts/installFonts.sh /scripts/installFonts.sh
# Copy built JAR
# Use numeric UID:GID (1000:1000) since the named user doesn't exist yet at COPY time
COPY --from=backend-build --chown=1000:1000 \
/app/app/core/build/libs/*.jar app.jar
# Copy frontend files
COPY --from=frontend-build /app/dist /usr/share/nginx/html
# Copy nginx configuration
COPY docker/unified/nginx.conf /etc/nginx/nginx.conf
COPY docker/unified/entrypoint.sh /entrypoint.sh
# Environment Variables
ENV DISABLE_ADDITIONAL_FEATURES=false \
VERSION_TAG=$VERSION_TAG \
JAVA_BASE_OPTS="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UnlockExperimentalVMOptions -XX:MaxRAMPercentage=75 -XX:InitiatingHeapOccupancyPercent=20 -XX:+G1PeriodicGCInvokesConcurrent -XX:G1PeriodicGCInterval=10000 -XX:+UseStringDeduplication -XX:G1PeriodicGCSystemLoadThreshold=70" \
JAVA_CUSTOM_OPTS="" \
HOME=/home/stirlingpdfuser \
PUID=1000 \
PGID=1000 \
UMASK=022 \
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
TMPDIR=/tmp/stirling-pdf \
TEMP=/tmp/stirling-pdf \
TMP=/tmp/stirling-pdf \
MODE=BOTH \
BACKEND_INTERNAL_PORT=8081 \
ENDPOINTS_GROUPS_TO_REMOVE=CLI
# Install minimal dependencies
# /app.jar chown moved to COPY above
RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
echo "@community https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
apk upgrade --no-cache -a && \
apk add --no-cache \
ca-certificates \
tzdata \
tini \
bash \
curl \
shadow \
su-exec \
openjdk21-jre \
nginx && \
mkdir -p $HOME /configs /configs/heap_dumps /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders /tmp/stirling-pdf /pipeline/watchedFolders /pipeline/finishedFolders && \
mkdir -p /usr/share/fonts/opentype/noto /var/lib/nginx/tmp /var/log/nginx && \
chmod +x /scripts/*.sh && \
chmod +x /entrypoint.sh && \
# User permissions
addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \
chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /pipeline /configs /customFiles /pipeline /tmp/stirling-pdf /var/lib/nginx /var/log/nginx /usr/share/nginx
EXPOSE 8080/tcp
ENTRYPOINT ["tini", "--", "/entrypoint.sh"]

658
docker/base/Dockerfile Normal file
View File

@@ -0,0 +1,658 @@
# Stirling-PDF Base Image
# Pre-built tools: Calibre, Ghostscript, QPDF, ImageMagick, Python venv with AI/OCR packages
# This image is reused by Dockerfile and Dockerfile.fat to avoid rebuilding tools on every app release
ARG TARGETPLATFORM
# Stage 1: Build and strip Calibre
FROM ubuntu:noble AS calibre-build
ARG TARGETPLATFORM
ARG CALIBRE_VERSION=9.4.0
ARG CALIBRE_STRIP_WEBENGINE=false
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates curl xz-utils libnss3 libfontconfig1 \
libgl1 libegl1 libdbus-1-3 libasound2t64 libxcomposite1 \
libxrandr2 libxkbcommon0 libxi6 libxtst6 libopengl0 \
poppler-utils; \
rm -rf /var/lib/apt/lists/*; \
\
case "$(uname -m)" in \
x86_64) CALIBRE_ARCH="x86_64" ;; \
aarch64) CALIBRE_ARCH="arm64" ;; \
*) echo "Unsupported arch: $(uname -m)"; exit 1 ;; \
esac; \
\
for attempt in 1 2 3; do \
echo "Downloading Calibre (attempt $attempt/3)..."; \
if curl -fsSL --max-time 300 --retry 3 --retry-delay 5 --retry-max-time 900 \
"https://download.calibre-ebook.com/${CALIBRE_VERSION}/calibre-${CALIBRE_VERSION}-${CALIBRE_ARCH}.txz" \
-o /tmp/calibre.txz; then \
break; \
fi; \
if [ $attempt -lt 3 ]; then sleep 10; fi; \
done; \
mkdir -p /opt/calibre; \
tar xJf /tmp/calibre.txz -C /opt/calibre; \
rm /tmp/calibre.txz; \
\
# We only need Qt6 WebEngine (Chromium) for ebook->PDF output.
# PDF INPUT now uses the pdftohtml engine (poppler), not Qt.
rm -f /opt/calibre/lib/libQt6Designer* \
/opt/calibre/lib/libQt6Multimedia* \
/opt/calibre/lib/libQt6SpatialAudio.so.* \
/opt/calibre/lib/libQt6NetworkAuth.so.* \
/opt/calibre/lib/libQt6Concurrent.so.* \
/opt/calibre/lib/libQt6OpenGLWidgets.so.* \
/opt/calibre/lib/libQt6QuickWidgets.so.* \
/opt/calibre/lib/libQt6Svg.so.* \
/opt/calibre/lib/libQt6SvgWidgets.so.* \
/opt/calibre/lib/libQt6Pdf*.so.* \
/opt/calibre/lib/libQt6ShaderTools.so.* \
/opt/calibre/lib/libQt6SerialPort.so.* \
/opt/calibre/lib/libQt6Sensors.so.* \
/opt/calibre/lib/libQt6Test.so.* \
/opt/calibre/lib/libQt6Sql.so.* \
/opt/calibre/lib/libQt6RemoteObjects.so.* \
/opt/calibre/lib/libQt6Help.so.* \
/opt/calibre/lib/libQt6VirtualKeyboard.so.* \
/opt/calibre/lib/libQt6WaylandClient.so.* \
/opt/calibre/lib/libQt6WaylandCompositor.so.* \
/opt/calibre/lib/libQt6Bluetooth.so.* \
/opt/calibre/lib/libQt6Nfc.so.* \
/opt/calibre/lib/libQt6Charts.so.* \
/opt/calibre/lib/libQt6DataVisualization.so.* \
/opt/calibre/lib/libQt6Scxml.so.* \
/opt/calibre/lib/libQt6StateMachine.so.* \
/opt/calibre/lib/libQt6TextToSpeech.so.* \
/opt/calibre/lib/libQt63D*.so.* \
/opt/calibre/lib/libavcodec.so.* \
/opt/calibre/lib/libavfilter.so.* \
/opt/calibre/lib/libavformat.so.* \
/opt/calibre/lib/libavutil.so.* \
/opt/calibre/lib/libavdevice.so.* \
/opt/calibre/lib/libpostproc.so.* \
/opt/calibre/lib/libswresample.so.* \
/opt/calibre/lib/libswscale.so.* \
/opt/calibre/lib/libspeex.so.* \
/opt/calibre/lib/libFLAC.so.* \
/opt/calibre/lib/libopus.so.* \
/opt/calibre/lib/libvorbis*.so.* \
/opt/calibre/lib/libasyncns.so.* \
/opt/calibre/lib/libspeechd.so.* \
/opt/calibre/lib/libespeak-ng.so.* \
/opt/calibre/lib/libonnxruntime.so.* \
/opt/calibre/lib/libgio-2.0.so.* \
/opt/calibre/lib/libzstd.so.* \
/opt/calibre/lib/libhunspell-1.7.so.* \
/opt/calibre/lib/libbrotlienc.so.* \
/opt/calibre/lib/libbrotlicommon.so.* \
/opt/calibre/lib/libbrotlidec.so.* \
/opt/calibre/lib/libstemmer.so.* \
/opt/calibre/lib/libmtp.so.* \
/opt/calibre/lib/libncursesw.so.* \
/opt/calibre/lib/libchm.so.* \
/opt/calibre/lib/libgcrypt.so.* \
/opt/calibre/lib/libgpg-error.so.* \
/opt/calibre/lib/libicuio.so.* \
/opt/calibre/lib/libreadline.so.* \
/opt/calibre/lib/libusb-1.0.so.* \
/opt/calibre/lib/libpulse*.so.* \
/opt/calibre/lib/libsndfile.so.* \
/opt/calibre/lib/libmpv.so.* \
/opt/calibre/lib/libass.so.* \
/opt/calibre/lib/librubberband.so.* \
/opt/calibre/lib/libsamplerate.so.*; \
rm -rf /opt/calibre/lib/qt6/plugins/platformthemes \
/opt/calibre/lib/qt6/plugins/multimedia \
/opt/calibre/lib/qt6/plugins/designer \
/opt/calibre/lib/qt6/plugins/virtualkeyboard \
/opt/calibre/lib/qt6/plugins/wayland* \
/opt/calibre/lib/qt6/plugins/texttospeech \
/opt/calibre/lib/qt6/plugins/position \
/opt/calibre/lib/qt6/plugins/sensors \
/opt/calibre/lib/qt6/plugins/sqldrivers \
/opt/calibre/lib/qt6/plugins/canbus \
/opt/calibre/lib/qt6/plugins/sceneparsers \
/opt/calibre/lib/qt6/plugins/renderers \
/opt/calibre/lib/qt6/plugins/geometryloaders \
/opt/calibre/lib/qt6/plugins/generic \
/opt/calibre/lib/qt6/plugins/qmltooling \
/opt/calibre/lib/qt6/libexec/QtWebEngineProcess.bak; \
rm -rf /opt/calibre/plugins/sqldrivers \
/opt/calibre/plugins/multimedia \
/opt/calibre/plugins/wayland-shell-integration \
/opt/calibre/plugins/wayland-graphics-integration-client \
/opt/calibre/plugins/wayland-decoration-client \
/opt/calibre/plugins/texttospeech \
/opt/calibre/plugins/platformthemes \
/opt/calibre/plugins/platforminputcontexts \
/opt/calibre/plugins/egldeviceintegrations \
/opt/calibre/plugins/iconengines; \
\
# Remove GUI executables but keep ebook-convert, ebook-meta, and calibre-parallel.
rm -f /opt/calibre/calibre \
/opt/calibre/calibre-server \
/opt/calibre/calibre-smtp \
/opt/calibre/calibre-debug \
/opt/calibre/calibre-customize \
/opt/calibre/calibredb \
/opt/calibre/ebook-viewer \
/opt/calibre/ebook-edit \
/opt/calibre/ebook-polish \
/opt/calibre/ebook-device \
/opt/calibre/fetch-ebook-metadata \
/opt/calibre/lrf2lrs \
/opt/calibre/lrs2lrf \
/opt/calibre/markdown-calibre \
/opt/calibre/web2disk; \
\
# Remove Python modules not needed for conversion.
rm -rf /opt/calibre/lib/calibre/gui2 \
/opt/calibre/lib/calibre/devices \
/opt/calibre/lib/calibre/library \
/opt/calibre/lib/calibre/db \
/opt/calibre/lib/calibre/srv \
/opt/calibre/lib/calibre/spell \
/opt/calibre/lib/calibre/live \
/opt/calibre/lib/calibre/utils/piper \
/opt/calibre/lib/calibre/utils/certgen.so \
/opt/calibre/lib/calibre/utils/https \
/opt/calibre/lib/calibre/utils/mdns; \
\
# Remove resources not needed for CLI conversion.
rm -rf /opt/calibre/resources/images \
/opt/calibre/resources/icons \
/opt/calibre/resources/icons.rcc \
/opt/calibre/resources/content-server \
/opt/calibre/resources/editor* \
/opt/calibre/resources/viewer \
/opt/calibre/resources/viewer.js \
/opt/calibre/resources/viewer.html \
/opt/calibre/resources/recipes \
/opt/calibre/resources/dictionaries \
/opt/calibre/resources/hyphenation \
/opt/calibre/resources/catalog \
/opt/calibre/resources/calibre-mimetypes.xml \
/opt/calibre/resources/changelog.json \
/opt/calibre/resources/user-agent-data.json \
/opt/calibre/resources/builtin_recipes.zip \
/opt/calibre/resources/builtin_recipes.xml \
/opt/calibre/resources/stylelint-bundle.min.js \
/opt/calibre/resources/stylelint.js \
/opt/calibre/resources/rapydscript \
/opt/calibre/resources/quick_start \
/opt/calibre/resources/piper-voices.json \
/opt/calibre/resources/images.qrc \
/opt/calibre/resources/mozilla-ca-certs.pem \
/opt/calibre/resources/ebook-convert-complete.calibre_msgpack \
/opt/calibre/resources/mathjax \
/opt/calibre/resources/common-english-words.txt \
/opt/calibre/resources/calibre-portable.sh \
/opt/calibre/resources/calibre-portable.bat \
/opt/calibre/resources/metadata_sqlite.sql \
/opt/calibre/resources/notes_sqlite.sql \
/opt/calibre/resources/fts_sqlite.sql \
/opt/calibre/resources/fts_triggers.sql \
/opt/calibre/resources/jacket \
/opt/calibre/resources/editor-functions.json \
/opt/calibre/resources/calibre-ebook-root-CA.crt \
/opt/calibre/resources/csscolorparser.js \
/opt/calibre/resources/lookup.js \
/opt/calibre/resources/pdf-mathjax-loader.js \
/opt/calibre/resources/scraper.js \
/opt/calibre/resources/toc.js \
/opt/calibre/resources/user-manual-translation-stats.json \
/opt/calibre/resources/pin-template.svg \
/opt/calibre/resources/scripts.calibre_msgpack \
/opt/calibre/resources/fonts \
/opt/calibre/resources/qtwebengine_devtools_resources.pak \
/opt/calibre/lib/calibre/ebooks/docx/images \
/opt/calibre/share \
/opt/calibre/man; \
\
# Remove translations and localization while keeping required libraries.
# Keep iso639.calibre_msgpack (required)
# Keep only en-US.pak from qtwebengine_locales (required for WebEngine)
rm -rf /opt/calibre/lib/qt6/translations; \
find /opt/calibre/translations -mindepth 1 -maxdepth 1 ! -name 'qtwebengine_locales' -exec rm -rf {} +; \
find /opt/calibre/translations/qtwebengine_locales -type f ! -name 'en-US.pak' -delete 2>/dev/null || true; \
if [ -d /opt/calibre/resources/localization ]; then \
rm -rf /opt/calibre/resources/localization/locales.zip \
/opt/calibre/resources/localization/stats.calibre_msgpack \
/opt/calibre/resources/localization/website-languages.txt; \
find /opt/calibre/resources/localization -mindepth 1 -maxdepth 1 ! -name 'iso639.calibre_msgpack' -exec rm -rf {} +; \
fi; \
\
# Strip debug symbols from calibre extension modules.
# Exclude Qt6 libs: libQt6WebEngineCore and friends embed Chromium V8 JIT code
# and internal resource blobs that strip corrupts, causing segfaults at render time.
find /opt/calibre/lib -name '*.so*' \
! -name 'libQt6*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
# Remove Python bytecode caches.
find /opt/calibre -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /opt/calibre -name '*.pyc' -delete 2>/dev/null || true; \
\
# Verify conversion still works
# NOTE: txt->epub used intentionally NOT txt->pdf.
# Calibre 7+ uses WebEngine (Chromium) for PDF output, which requires kernel
# capabilities unavailable in Docker RUN steps and segfaults under QEMU.
# epub output exercises the same Python/plugin stack without touching WebEngine.
/opt/calibre/ebook-convert --version; \
echo "Hello" > /tmp/test.txt; \
/opt/calibre/ebook-convert /tmp/test.txt /tmp/test.epub; \
rm -f /tmp/test.txt /tmp/test.epub; \
\
# Verify pdftohtml (poppler) is available for the pdftohtml PDF engine.
pdftohtml -v >/dev/null 2>&1 && echo "pdftohtml OK" || { echo "ERROR: pdftohtml not found"; exit 1; }; \
echo "=== Calibre stripped successfully ==="
# Optional: strip Chromium/WebEngine (~80 MB savings) when PDF output via Calibre is not needed.
# Build with --build-arg CALIBRE_STRIP_WEBENGINE=true to enable.
RUN if [ "${CALIBRE_STRIP_WEBENGINE}" = "true" ]; then \
echo "Stripping Calibre WebEngine (Chromium), PDF output via Calibre will be disabled"; \
rm -rf /opt/calibre/lib/qt6/libexec/QtWebEngineProcess \
/opt/calibre/lib/qt6/resources \
/opt/calibre/lib/libQt6WebEngine*.so.* \
/opt/calibre/lib/libQt6Quick*.so.* \
/opt/calibre/lib/libQt6Qml*.so.* \
/opt/calibre/translations/qtwebengine_locales 2>/dev/null || true; \
echo "WebEngine stripped, Calibre PDF output disabled"; \
else \
echo "CALIBRE_STRIP_WEBENGINE=false, keeping WebEngine for PDF output"; \
fi
# Stage 2: Build Ghostscript from source
FROM ubuntu:noble AS gs-build
ARG TARGETPLATFORM
ARG GS_VERSION=10.06.0
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/gs-build,id=gs-build-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential curl ca-certificates libfontconfig1-dev && rm -rf /var/lib/apt/lists/* && \
GS_TAG="gs$(printf '%s' "${GS_VERSION}" | tr -d '.')" && \
cd /tmp/gs-build && \
rm -rf ghostscript-* && \
(test -d "ghostscript-${GS_VERSION}" || curl -fsSL --max-time 300 --retry 3 --retry-delay 5 "https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/${GS_TAG}/ghostscript-${GS_VERSION}.tar.gz" | tar xz) && \
cd "ghostscript-${GS_VERSION}" && \
./configure \
--prefix=/usr/local \
--without-x \
--disable-cups \
--disable-gtk && \
make -j"$(nproc)" && \
make install && \
cd ..
# Stage 3: Build PDF Tools (QPDF and ImageMagick 7)
FROM ubuntu:noble AS pdf-tools-build
ARG TARGETPLATFORM
ARG QPDF_VERSION=12.3.2
ARG IM_VERSION=7.1.2-13
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/pdf-tools-build,id=pdf-tools-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake libssl-dev libjpeg-dev zlib1g-dev curl ca-certificates pkg-config \
libpng-dev libtiff-dev libwebp-dev libxml2-dev libfreetype6-dev liblcms2-dev libzip-dev liblqr-1-0-dev \
libltdl-dev libtool && rm -rf /var/lib/apt/lists/* && \
cd /tmp/pdf-tools-build && \
rm -rf qpdf-* ImageMagick-* && \
# Build QPDF
(test -d "qpdf-${QPDF_VERSION}" || curl -fsSL --max-time 300 --retry 3 --retry-delay 5 "https://github.com/qpdf/qpdf/releases/download/v${QPDF_VERSION}/qpdf-${QPDF_VERSION}.tar.gz" | tar xz) && \
cd "qpdf-${QPDF_VERSION}" && \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DALLOW_CRYPTO_OPENSSL=ON -DDEFAULT_CRYPTO=openssl && \
cmake --build build --parallel "$(nproc)" && \
cmake --install build --strip && \
cd .. && \
# Build ImageMagick 7
(test -d "ImageMagick-${IM_VERSION}" || curl -fsSL --max-time 300 --retry 3 --retry-delay 5 "https://github.com/ImageMagick/ImageMagick/archive/refs/tags/${IM_VERSION}.tar.gz" | tar xz) && \
cd "ImageMagick-${IM_VERSION}" && \
./configure --prefix=/usr/local --with-modules --with-perl=no --with-magick-plus-plus=no --with-quantum-depth=16 --disable-static --enable-shared && \
make -j"$(nproc)" && \
make install-strip && \
# Enable PDF/PS/EPS in policy
sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="PS"/rights="read|write" pattern="PS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="EPS"/rights="read|write" pattern="EPS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
cd .. && \
ldconfig /usr/local/lib
# Stage ImageMagick outputs into a single directory so runtime can import them with one COPY
# (reduces 4 separate COPY layers to 1 independent --link layer).
RUN mkdir -p /magick-export/usr/bin \
/magick-export/usr/local/lib \
/magick-export/usr/local/etc && \
cp /usr/local/bin/magick /magick-export/usr/bin/ && \
cp -a /usr/local/lib/libMagick*.so* /magick-export/usr/local/lib/ && \
cp -a /usr/local/lib/ImageMagick-7* /magick-export/usr/local/lib/ && \
cp -a /usr/local/etc/ImageMagick-7 /magick-export/usr/local/etc/
# Stage 4: Build Python venv
FROM ubuntu:noble AS python-venv-build
ARG TARGETPLATFORM
ARG UNOSERVER_VERSION=3.6
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && apt-get install -y --no-install-recommends \
python3 python3-venv ca-certificates binutils && \
rm -rf /var/lib/apt/lists/*
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
python3 -m venv /opt/venv --system-site-packages && \
/opt/venv/bin/pip install --no-cache-dir --prefer-binary \
weasyprint pdf2image opencv-python-headless ocrmypdf \
cryptography \
"unoserver==${UNOSERVER_VERSION}" && \
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
find /opt/venv \( -name '*.pyc' -o -name '*.pyi' \) -delete 2>/dev/null || true && \
rm -rf /opt/venv/lib/python*/site-packages/pip \
/opt/venv/lib/python*/site-packages/pip-*.dist-info \
/opt/venv/lib/python*/site-packages/setuptools \
/opt/venv/lib/python*/site-packages/setuptools-*.dist-info && \
find /opt/venv -name '*.so' -exec strip --strip-unneeded {} + 2>/dev/null || true
# Final runtime image - the actual base image
FROM eclipse-temurin:25-jre-noble AS runtime
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
ENV DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
TESS_BASE_PATH=/usr/share/tesseract-ocr/5/tessdata
ARG UNOSERVER_VERSION=3.6
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
apt-get update; \
# Add LibreOffice Fresh PPA for latest version (26.2.x)
apt-get install -y --no-install-recommends software-properties-common; \
add-apt-repository -y ppa:libreoffice/ppa; \
apt-get update; \
apt-get install -y --no-install-recommends \
# Core tools
ca-certificates tzdata tini bash fontconfig curl \
ffmpeg poppler-utils fontforge \
gosu unpaper pngquant \
# Fonts: full coverage for standard + fat variants
fonts-dejavu \
fonts-liberation2 \
fonts-crosextra-caladea fonts-crosextra-carlito \
fonts-noto-core fonts-noto-mono fonts-noto-extra \
fonts-noto-cjk poppler-data \
fonts-freefont-ttf fonts-terminus \
# Python runtime & UNO bridge
python3 python3-uno python3-packaging \
# OCR
tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra \
tesseract-ocr-por tesseract-ocr-chi-sim \
# Tesseract OSD for orientation detection
tesseract-ocr-osd \
# Graphics / AWT headless
libcairo2 libpango-1.0-0 libpangoft2-1.0-0 libgdk-pixbuf-2.0-0 \
libfreetype6 libfontconfig1 libx11-6 libxt6 libxext6 libxrender1 \
libxtst6 libxi6 libxinerama1 libxkbcommon0 libsm6 libice6 \
# Qt/EGL for Calibre CLI
libegl1 libgl1 libopengl0 libdbus-1-3 libglib2.0-0 libnss3 \
libasound2t64 libxcomposite1 libxrandr2 \
# Virtual framebuffer (required for headless LibreOffice Impress/Draw)
xvfb x11-utils coreutils \
libreoffice-writer-nogui libreoffice-calc-nogui \
libreoffice-impress-nogui libreoffice-draw-nogui \
libreoffice-base-nogui libreoffice-java-common \
; \
\
# Verify and fix LibreOffice
libreoffice --version; \
soffice --version 2>/dev/null || true; \
# Rebuild UNO bridge type database
/usr/lib/libreoffice/program/soffice.bin --headless --convert-to pdf /dev/null 2>/dev/null || true; \
# Force font cache rebuild
fc-cache -f -v 2>&1 | awk 'NR <= 20'; \
\
# Cleanup stage.
\
# Remove PPA helper, no longer needed after apt-get update
apt-get remove --purge -y software-properties-common || true; \
apt-get autoremove --purge -y || true; \
rm -rf /var/lib/apt/lists/*; \
\
# Docs / man / info / icons / themes / GUI assets (headless server)
rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/info/* \
/usr/share/lintian/* /usr/share/linda/* \
/usr/share/icons/* /usr/share/themes/* \
/usr/share/javascript/* \
/usr/share/gtk-3.0/* \
/usr/share/fontforge/pixmaps \
/usr/share/fontforge/osx \
/usr/share/fontforge/cidmap \
/usr/share/fontforge/prefs \
/usr/share/liblangtag/* \
/usr/share/tcltk/* \
/usr/share/python-wheels/* \
/usr/share/glib-2.0/schemas/* \
/usr/share/mime/* \
/usr/share/xml/iso-codes \
/usr/share/GConf \
/usr/share/bash-completion \
/usr/share/zsh \
/usr/share/libmysofa \
/usr/share/alsa \
/usr/share/iso-codes \
/usr/share/perl5 \
/usr/share/libthai \
/usr/share/libexttextcat \
/usr/share/openal \
/usr/share/gcc; \
\
find /usr/share/locale -mindepth 1 -maxdepth 1 -type d \
! -name 'en*' -exec rm -rf {} + 2>/dev/null || true; \
rm -rf /usr/share/i18n/locales /usr/share/i18n/charmaps; \
\
rm -rf /usr/lib/libreoffice/share/gallery \
/usr/lib/libreoffice/share/template \
/usr/lib/libreoffice/share/wizards \
/usr/lib/libreoffice/share/autotext \
/usr/lib/libreoffice/help \
/usr/lib/libreoffice/share/config/images_*.zip \
/usr/lib/libreoffice/share/basic \
/usr/lib/libreoffice/share/Scripts \
/usr/lib/libreoffice/share/autocorr \
/usr/lib/libreoffice/share/classification \
/usr/lib/libreoffice/share/wordbook \
/usr/lib/libreoffice/share/fingerprint \
/usr/lib/libreoffice/share/xdg \
/usr/lib/libreoffice/share/numbertext \
/usr/lib/libreoffice/share/shell \
/usr/lib/libreoffice/share/palette \
/usr/lib/libreoffice/share/theme_definitions \
/usr/lib/libreoffice/share/xslt \
/usr/lib/libreoffice/share/labels \
/usr/lib/libreoffice/share/dtd \
/usr/lib/libreoffice/share/tipoftheday \
/usr/lib/libreoffice/share/toolbarmode \
/usr/lib/libreoffice/share/psprint \
/usr/lib/libreoffice/CREDITS.fodt \
/usr/lib/libreoffice/LICENSE.html; \
\
# Remove unused LO extensions (GUI-only; not needed for document conversion)
rm -rf /usr/lib/libreoffice/share/extensions/wiki-publisher \
/usr/lib/libreoffice/share/extensions/nlpsolver \
/usr/lib/libreoffice/share/extensions/dict-* 2>/dev/null || true; \
# Remove LO database components (LO Base; not needed for Writer/Calc/Impress conversion)
rm -rf /usr/lib/libreoffice/program/libdba* \
/usr/lib/libreoffice/program/libdbahsql* \
/usr/lib/libreoffice/program/libdbu* \
/usr/lib/libreoffice/program/libreport* 2>/dev/null || true; \
\
rm -rf /usr/lib/python3.12/test \
/usr/lib/python3.12/idlelib \
/usr/lib/python3.12/tkinter \
/usr/lib/python3.12/lib2to3 \
/usr/lib/python3.12/pydoc_data; \
\
rm -rf /usr/lib/python3/dist-packages/scipy \
/usr/lib/python3/dist-packages/sympy \
/usr/lib/python3/dist-packages/mpmath; \
\
rm -rf \
/usr/lib/python3/dist-packages/cffi \
/usr/lib/python3/dist-packages/cffi-*.dist-info \
/usr/lib/python3/dist-packages/_cffi_backend*.so \
/usr/lib/python3/dist-packages/_cffi_backend*.cpython*.so \
2>/dev/null || true; \
\
# Strip debug symbols from ALL shared libraries
find /usr/lib -name '*.so*' -type f \
-not -path '*/jvm/*' \
-not -path '*/libreoffice/*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
# Remove GPU backends not needed for headless operation.
MULTIARCH_LIBDIR=$(dpkg-architecture -qDEB_HOST_MULTIARCH 2>/dev/null \
|| find /usr/lib -maxdepth 1 -type d -name '*-linux-gnu' | head -1); \
rm -f \
"${MULTIARCH_LIBDIR}"/libLLVM*.so* \
"${MULTIARCH_LIBDIR}"/libgallium*.so* \
2>/dev/null || true; \
\
find /usr/lib/python3* -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /usr/lib/python3* \( -name '*.pyc' -o -name '*.pyi' \) \
-delete 2>/dev/null || true; \
\
rm -rf /usr/share/bug /usr/share/lintian /usr/share/linda \
/var/lib/dpkg/info/*.list \
/var/lib/dpkg/info/*.md5sums \
/var/lib/dpkg/info/*.conffiles \
/var/lib/dpkg/info/*.postinst \
/var/lib/dpkg/info/*.preinst \
/var/lib/dpkg/info/*.prerm \
/var/lib/dpkg/info/*.postrm \
/var/lib/dpkg/info/*.triggers \
/var/lib/dpkg/info/*.shlibs \
/var/lib/dpkg/info/*.symbols \
/var/lib/dpkg/info/*.templates \
/var/log/dpkg.log /var/log/apt/* \
/usr/local/share/ghostscript/*/doc \
/usr/local/share/ghostscript/*/examples \
/usr/share/ImageMagick-*/doc \
/usr/share/ImageMagick-*/www; \
\
# Tesseract training configs (not needed for OCR, but keep configs/ for hocr/txt output)
rm -rf /usr/share/tesseract-ocr/*/tessdata/tessconfigs; \
\
# Noto fonts: keep Regular weight only (~370MB savings)
find /usr/share/fonts/truetype/noto -type f \
! -name '*Regular*' -delete 2>/dev/null || true; \
find /usr/share/fonts/opentype -type f \
! -name '*Regular*' -delete 2>/dev/null || true; \
# DejaVu: keep Regular and Bold only
find /usr/share/fonts/truetype -name '*DejaVu*' \
! -name '*-Regular*' ! -name '*-Bold*' ! -name '*Bold.ttf' \
! -name 'DejaVuSans.ttf' ! -name 'DejaVuSerif.ttf' ! -name 'DejaVuSansMono.ttf' \
-type f -delete 2>/dev/null || true; \
# Remove empty font directories after cleanup
find /usr/share/fonts -type d -empty -delete 2>/dev/null || true; \
\
# gconv: keep only essential charset conversion modules
GCONV_DIR=$(find /usr/lib -type d -name gconv 2>/dev/null | head -1); \
if [ -n "$GCONV_DIR" ] && [ -d "$GCONV_DIR" ]; then \
mkdir -p /tmp/gconv-keep; \
for mod in UTF-8.so UTF-16.so UTF-32.so UTF-7.so \
ISO8859-1.so ISO8859-15.so ISO8859-2.so ISO8859-9.so \
UNICODE.so CP1252.so CP1251.so CP1250.so \
EUC-JP.so EUC-KR.so EUC-CN.so \
SHIFT_JIS.so GB18030.so BIG5.so \
gconv-modules gconv-modules.d gconv-modules.cache; do \
[ -e "$GCONV_DIR/$mod" ] && \
cp -a "$GCONV_DIR/$mod" /tmp/gconv-keep/ 2>/dev/null || true; \
done; \
rm -rf "$GCONV_DIR"/*; \
cp -a /tmp/gconv-keep/* "$GCONV_DIR/" 2>/dev/null || true; \
rm -rf /tmp/gconv-keep; \
fi; \
\
# Misc caches
rm -rf /var/cache/fontconfig/* /tmp/*
# External tool layers, all use --link for independent layer caching and parallel pulls.
COPY --link --from=calibre-build /opt/calibre /opt/calibre
COPY --link --from=pdf-tools-build /usr/local/bin/qpdf /usr/bin/qpdf
# ImageMagick: 4 layers collapsed to 1 via the magick-export staging dir in pdf-tools-build
COPY --link --from=pdf-tools-build /magick-export/ /
COPY --link --from=gs-build /usr/local/bin/gs /usr/local/bin/gs
COPY --link --from=gs-build /usr/local/share/ghostscript /usr/local/share/ghostscript
# Python venv pre-built (no pip install at runtime, no build tools needed)
COPY --link --from=python-venv-build /opt/venv /opt/venv
RUN ldconfig /usr/local/lib && \
PYTHONDONTWRITEBYTECODE=1 \
/opt/venv/bin/python -c "import cffi; print('cffi OK:', cffi.__version__)" && \
PYTHONDONTWRITEBYTECODE=1 \
/opt/venv/bin/python -c "import cv2; print('OpenCV', cv2.__version__)" && \
PYTHONDONTWRITEBYTECODE=1 \
/opt/venv/bin/python -c "import ocrmypdf; print('ocrmypdf OK')" && \
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
# Non-root user
ARG PUID=1000
ARG PGID=1000
RUN set -eux; \
if ! getent group stirlingpdfgroup >/dev/null 2>&1; then \
groupadd -g "${PGID}" stirlingpdfgroup 2>/dev/null \
|| groupadd stirlingpdfgroup; \
fi; \
if ! id -u stirlingpdfuser >/dev/null 2>&1; then \
useradd -m -u "${PUID}" -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser 2>/dev/null \
|| useradd -m -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser; \
fi; \
ln -sf /usr/sbin/gosu /usr/local/bin/su-exec
# Application directories
RUN set -eux; \
mkdir -p /configs /configs/cache /configs/heap_dumps /logs /customFiles \
/pipeline/watchedFolders /pipeline/finishedFolders \
/tmp/stirling-pdf/heap_dumps; \
chown -R stirlingpdfuser:stirlingpdfgroup \
/home/stirlingpdfuser /configs /logs /customFiles /pipeline \
/tmp/stirling-pdf; \
chmod 750 /tmp/stirling-pdf; \
chmod 750 /tmp/stirling-pdf/heap_dumps
# Tool symlinks
RUN set -eux; \
ln -sf /opt/calibre/ebook-convert /usr/bin/ebook-convert; \
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
ln -sf /opt/venv/bin/ocrmypdf /usr/local/bin/ocrmypdf; \
ln -sf /opt/venv/bin/weasyprint /usr/local/bin/weasyprint; \
ln -sf /opt/venv/bin/unoping /usr/local/bin/unoping; \
fc-cache -f
# Metadata labels - base image
LABEL org.opencontainers.image.title="Stirling-PDF Base" \
org.opencontainers.image.description="Pre-built base image with Calibre, Ghostscript, QPDF, ImageMagick, LibreOffice, OCRmyPDF and dependencies" \
org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF" \
org.opencontainers.image.licenses="MIT" \
org.opencontainers.image.vendor="Stirling-Tools" \
org.opencontainers.image.url="https://www.stirlingpdf.com" \
org.opencontainers.image.documentation="https://docs.stirlingpdf.com" \
maintainer="Stirling-Tools" \
org.opencontainers.image.authors="Stirling-Tools"

View File

@@ -1,269 +1,13 @@
# Stirling-PDF - Full version (embedded frontend)
# Uses pre-built base image for fast builds
FROM ubuntu:noble AS calibre-build
ARG TARGETPLATFORM
ARG CALIBRE_VERSION=9.4.0
ARG CALIBRE_STRIP_WEBENGINE=false
ARG BASE_VERSION=1.0.0
ARG BASE_IMAGE=ghcr.io/stirling-tools/stirling-pdf-base:${BASE_VERSION}
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates curl xz-utils libnss3 libfontconfig1 \
libgl1 libegl1 libdbus-1-3 libasound2t64 libxcomposite1 \
libxrandr2 libxkbcommon0 libxi6 libxtst6 libopengl0 \
poppler-utils; \
rm -rf /var/lib/apt/lists/*; \
\
case "$(uname -m)" in \
x86_64) CALIBRE_ARCH="x86_64" ;; \
aarch64) CALIBRE_ARCH="arm64" ;; \
*) echo "Unsupported arch: $(uname -m)"; exit 1 ;; \
esac; \
\
curl -fsSL \
"https://download.calibre-ebook.com/${CALIBRE_VERSION}/calibre-${CALIBRE_VERSION}-${CALIBRE_ARCH}.txz" \
-o /tmp/calibre.txz; \
mkdir -p /opt/calibre; \
tar xJf /tmp/calibre.txz -C /opt/calibre; \
rm /tmp/calibre.txz; \
\
# We only need Qt6 WebEngine (Chromium) for ebook->PDF output.
# PDF INPUT now uses the pdftohtml engine (poppler), not Qt.
rm -f /opt/calibre/lib/libQt6Designer* \
/opt/calibre/lib/libQt6Multimedia* \
/opt/calibre/lib/libQt6SpatialAudio.so.* \
/opt/calibre/lib/libQt6NetworkAuth.so.* \
/opt/calibre/lib/libQt6Concurrent.so.* \
/opt/calibre/lib/libQt6OpenGLWidgets.so.* \
/opt/calibre/lib/libQt6QuickWidgets.so.* \
/opt/calibre/lib/libQt6Svg.so.* \
/opt/calibre/lib/libQt6SvgWidgets.so.* \
/opt/calibre/lib/libQt6Pdf*.so.* \
/opt/calibre/lib/libQt6ShaderTools.so.* \
/opt/calibre/lib/libQt6SerialPort.so.* \
/opt/calibre/lib/libQt6Sensors.so.* \
/opt/calibre/lib/libQt6Test.so.* \
/opt/calibre/lib/libQt6Sql.so.* \
/opt/calibre/lib/libQt6RemoteObjects.so.* \
/opt/calibre/lib/libQt6Help.so.* \
/opt/calibre/lib/libQt6VirtualKeyboard.so.* \
/opt/calibre/lib/libQt6WaylandClient.so.* \
/opt/calibre/lib/libQt6WaylandCompositor.so.* \
/opt/calibre/lib/libQt6Bluetooth.so.* \
/opt/calibre/lib/libQt6Nfc.so.* \
/opt/calibre/lib/libQt6Charts.so.* \
/opt/calibre/lib/libQt6DataVisualization.so.* \
/opt/calibre/lib/libQt6Scxml.so.* \
/opt/calibre/lib/libQt6StateMachine.so.* \
/opt/calibre/lib/libQt6TextToSpeech.so.* \
/opt/calibre/lib/libQt63D*.so.* \
/opt/calibre/lib/libavcodec.so.* \
/opt/calibre/lib/libavfilter.so.* \
/opt/calibre/lib/libavformat.so.* \
/opt/calibre/lib/libavutil.so.* \
/opt/calibre/lib/libavdevice.so.* \
/opt/calibre/lib/libpostproc.so.* \
/opt/calibre/lib/libswresample.so.* \
/opt/calibre/lib/libswscale.so.* \
/opt/calibre/lib/libspeex.so.* \
/opt/calibre/lib/libFLAC.so.* \
/opt/calibre/lib/libopus.so.* \
/opt/calibre/lib/libvorbis*.so.* \
/opt/calibre/lib/libasyncns.so.* \
/opt/calibre/lib/libspeechd.so.* \
/opt/calibre/lib/libespeak-ng.so.* \
/opt/calibre/lib/libonnxruntime.so.* \
/opt/calibre/lib/libgio-2.0.so.* \
/opt/calibre/lib/libzstd.so.* \
/opt/calibre/lib/libhunspell-1.7.so.* \
/opt/calibre/lib/libbrotlienc.so.* \
/opt/calibre/lib/libbrotlicommon.so.* \
/opt/calibre/lib/libbrotlidec.so.* \
/opt/calibre/lib/libstemmer.so.* \
/opt/calibre/lib/libmtp.so.* \
/opt/calibre/lib/libncursesw.so.* \
/opt/calibre/lib/libchm.so.* \
/opt/calibre/lib/libgcrypt.so.* \
/opt/calibre/lib/libgpg-error.so.* \
/opt/calibre/lib/libicuio.so.* \
/opt/calibre/lib/libreadline.so.* \
/opt/calibre/lib/libusb-1.0.so.* \
/opt/calibre/lib/libpulse*.so.* \
/opt/calibre/lib/libsndfile.so.* \
/opt/calibre/lib/libmpv.so.* \
/opt/calibre/lib/libass.so.* \
/opt/calibre/lib/librubberband.so.* \
/opt/calibre/lib/libsamplerate.so.*; \
rm -rf /opt/calibre/lib/qt6/plugins/platformthemes \
/opt/calibre/lib/qt6/plugins/multimedia \
/opt/calibre/lib/qt6/plugins/designer \
/opt/calibre/lib/qt6/plugins/virtualkeyboard \
/opt/calibre/lib/qt6/plugins/wayland* \
/opt/calibre/lib/qt6/plugins/texttospeech \
/opt/calibre/lib/qt6/plugins/position \
/opt/calibre/lib/qt6/plugins/sensors \
/opt/calibre/lib/qt6/plugins/sqldrivers \
/opt/calibre/lib/qt6/plugins/canbus \
/opt/calibre/lib/qt6/plugins/sceneparsers \
/opt/calibre/lib/qt6/plugins/renderers \
/opt/calibre/lib/qt6/plugins/geometryloaders \
/opt/calibre/lib/qt6/plugins/generic \
/opt/calibre/lib/qt6/plugins/qmltooling \
/opt/calibre/lib/qt6/libexec/QtWebEngineProcess.bak; \
rm -rf /opt/calibre/plugins/sqldrivers \
/opt/calibre/plugins/multimedia \
/opt/calibre/plugins/wayland-shell-integration \
/opt/calibre/plugins/wayland-graphics-integration-client \
/opt/calibre/plugins/wayland-decoration-client \
/opt/calibre/plugins/texttospeech \
/opt/calibre/plugins/platformthemes \
/opt/calibre/plugins/platforminputcontexts \
/opt/calibre/plugins/egldeviceintegrations \
/opt/calibre/plugins/iconengines; \
\
# Remove GUI executables but keep ebook-convert, ebook-meta, and calibre-parallel.
rm -f /opt/calibre/calibre \
/opt/calibre/calibre-server \
/opt/calibre/calibre-smtp \
/opt/calibre/calibre-debug \
/opt/calibre/calibre-customize \
/opt/calibre/calibredb \
/opt/calibre/ebook-viewer \
/opt/calibre/ebook-edit \
/opt/calibre/ebook-polish \
/opt/calibre/ebook-device \
/opt/calibre/fetch-ebook-metadata \
/opt/calibre/lrf2lrs \
/opt/calibre/lrs2lrf \
/opt/calibre/markdown-calibre \
/opt/calibre/web2disk; \
\
# Remove Python modules not needed for conversion.
rm -rf /opt/calibre/lib/calibre/gui2 \
/opt/calibre/lib/calibre/devices \
/opt/calibre/lib/calibre/library \
/opt/calibre/lib/calibre/db \
/opt/calibre/lib/calibre/srv \
/opt/calibre/lib/calibre/spell \
/opt/calibre/lib/calibre/live \
/opt/calibre/lib/calibre/utils/piper \
/opt/calibre/lib/calibre/utils/certgen.so \
/opt/calibre/lib/calibre/utils/https \
/opt/calibre/lib/calibre/utils/mdns; \
\
# Remove resources not needed for CLI conversion.
rm -rf /opt/calibre/resources/images \
/opt/calibre/resources/icons \
/opt/calibre/resources/icons.rcc \
/opt/calibre/resources/content-server \
/opt/calibre/resources/editor* \
/opt/calibre/resources/viewer \
/opt/calibre/resources/viewer.js \
/opt/calibre/resources/viewer.html \
/opt/calibre/resources/recipes \
/opt/calibre/resources/dictionaries \
/opt/calibre/resources/hyphenation \
/opt/calibre/resources/catalog \
/opt/calibre/resources/calibre-mimetypes.xml \
/opt/calibre/resources/changelog.json \
/opt/calibre/resources/user-agent-data.json \
/opt/calibre/resources/builtin_recipes.zip \
/opt/calibre/resources/builtin_recipes.xml \
/opt/calibre/resources/builtin_recipes.xml \
/opt/calibre/resources/stylelint-bundle.min.js \
/opt/calibre/resources/stylelint.js \
/opt/calibre/resources/rapydscript \
/opt/calibre/resources/quick_start \
/opt/calibre/resources/piper-voices.json \
/opt/calibre/resources/images.qrc \
/opt/calibre/resources/mozilla-ca-certs.pem \
/opt/calibre/resources/ebook-convert-complete.calibre_msgpack \
/opt/calibre/resources/mathjax \
/opt/calibre/resources/common-english-words.txt \
/opt/calibre/resources/calibre-portable.sh \
/opt/calibre/resources/calibre-portable.bat \
/opt/calibre/resources/metadata_sqlite.sql \
/opt/calibre/resources/notes_sqlite.sql \
/opt/calibre/resources/fts_sqlite.sql \
/opt/calibre/resources/fts_triggers.sql \
/opt/calibre/resources/jacket \
/opt/calibre/resources/editor-functions.json \
/opt/calibre/resources/calibre-ebook-root-CA.crt \
/opt/calibre/resources/csscolorparser.js \
/opt/calibre/resources/lookup.js \
/opt/calibre/resources/pdf-mathjax-loader.js \
/opt/calibre/resources/scraper.js \
/opt/calibre/resources/toc.js \
/opt/calibre/resources/user-manual-translation-stats.json \
/opt/calibre/resources/pin-template.svg \
/opt/calibre/resources/scripts.calibre_msgpack \
/opt/calibre/resources/fonts \
/opt/calibre/resources/qtwebengine_devtools_resources.pak \
/opt/calibre/lib/calibre/ebooks/docx/images \
/opt/calibre/share \
/opt/calibre/man; \
\
# Remove translations and localization while keeping required libraries.
# Keep iso639.calibre_msgpack (required)
# Keep only en-US.pak from qtwebengine_locales (required for WebEngine)
rm -rf /opt/calibre/lib/qt6/translations; \
find /opt/calibre/translations -mindepth 1 -maxdepth 1 ! -name 'qtwebengine_locales' -exec rm -rf {} +; \
find /opt/calibre/translations/qtwebengine_locales -type f ! -name 'en-US.pak' -delete 2>/dev/null || true; \
if [ -d /opt/calibre/resources/localization ]; then \
rm -rf /opt/calibre/resources/localization/locales.zip \
/opt/calibre/resources/localization/stats.calibre_msgpack \
/opt/calibre/resources/localization/website-languages.txt; \
find /opt/calibre/resources/localization -mindepth 1 -maxdepth 1 ! -name 'iso639.calibre_msgpack' -exec rm -rf {} +; \
fi; \
\
# Strip debug symbols from calibre extension modules.
# Exclude Qt6 libs: libQt6WebEngineCore and friends embed Chromium V8 JIT code
# and internal resource blobs that strip corrupts, causing segfaults at render time.
find /opt/calibre/lib -name '*.so*' \
! -name 'libQt6*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
# Remove Python bytecode caches.
find /opt/calibre -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /opt/calibre -name '*.pyc' -delete 2>/dev/null || true; \
\
# ── Verify conversion still works ──
# NOTE: txt->epub used intentionally NOT txt->pdf.
# Calibre 7+ uses WebEngine (Chromium) for PDF output, which requires kernel
# capabilities unavailable in Docker RUN steps and segfaults under QEMU.
# epub output exercises the same Python/plugin stack without touching WebEngine.
/opt/calibre/ebook-convert --version; \
echo "Hello" > /tmp/test.txt; \
/opt/calibre/ebook-convert /tmp/test.txt /tmp/test.epub; \
rm -f /tmp/test.txt /tmp/test.epub; \
\
# Verify pdftohtml (poppler) is available for the pdftohtml PDF engine.
pdftohtml -v >/dev/null 2>&1 && echo "pdftohtml OK" || { echo "ERROR: pdftohtml not found"; exit 1; }; \
echo "=== Calibre stripped successfully ==="
# Optional: strip Chromium/WebEngine (~80 MB savings) when PDF output via Calibre is not needed.
# Build with --build-arg CALIBRE_STRIP_WEBENGINE=true to enable.
RUN if [ "${CALIBRE_STRIP_WEBENGINE}" = "true" ]; then \
echo "Stripping Calibre WebEngine (Chromium), PDF output via Calibre will be disabled"; \
rm -rf /opt/calibre/lib/qt6/libexec/QtWebEngineProcess \
/opt/calibre/lib/qt6/resources \
/opt/calibre/lib/libQt6WebEngine*.so.* \
/opt/calibre/lib/libQt6Quick*.so.* \
/opt/calibre/lib/libQt6Qml*.so.* \
/opt/calibre/translations/qtwebengine_locales 2>/dev/null || true; \
echo "WebEngine stripped, Calibre PDF output disabled"; \
else \
echo "CALIBRE_STRIP_WEBENGINE=false, keeping WebEngine for PDF output"; \
fi
# Build the Java application and frontend.
# Stage 1: Build the Java application and frontend
FROM gradle:9.3.1-jdk25 AS app-build
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update \
RUN apt-get update \
&& apt-get install -y --no-install-recommends curl ca-certificates \
&& update-ca-certificates \
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
@@ -286,403 +30,31 @@ COPY app/common/build.gradle app/common/
COPY app/proprietary/build.gradle app/proprietary/
# Use system gradle instead of gradlew to avoid SSL issues downloading gradle distribution on emulated arm64
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
gradle dependencies --no-daemon || true
RUN gradle dependencies --no-daemon || true
COPY . .
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
--mount=type=cache,target=/root/.npm,sharing=locked \
DISABLE_ADDITIONAL_FEATURES=false \
RUN DISABLE_ADDITIONAL_FEATURES=false \
gradle clean build \
-PbuildWithFrontend=true \
-x spotlessApply -x spotlessCheck -x test -x sonarqube \
--no-daemon
# Extract Spring Boot Layers.
# Stage 2: Extract Spring Boot Layers
FROM eclipse-temurin:25-jre-noble AS jar-extract
WORKDIR /tmp
COPY --from=app-build /app/app/core/build/libs/*.jar app.jar
RUN java -Djarmode=tools -jar app.jar extract --layers --destination /layers
# Build Ghostscript 10.06.0 from source in an isolated stage (avoids library conflicts).
FROM ubuntu:noble AS gs-build
ARG TARGETPLATFORM
ARG GS_VERSION=10.06.0
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/gs-build,id=gs-build-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential curl ca-certificates libfontconfig1-dev && rm -rf /var/lib/apt/lists/* && \
GS_TAG="gs$(printf '%s' "${GS_VERSION}" | tr -d '.')" && \
cd /tmp/gs-build && \
rm -rf ghostscript-* && \
(test -d "ghostscript-${GS_VERSION}" || curl -fsSL "https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/${GS_TAG}/ghostscript-${GS_VERSION}.tar.gz" | tar xz) && \
cd "ghostscript-${GS_VERSION}" && \
./configure \
--prefix=/usr/local \
--without-x \
--disable-cups \
--disable-gtk && \
make -j"$(nproc)" && \
make install && \
cd ..
# Stage 3: Final runtime image on top of pre-built base
FROM ${BASE_IMAGE}
ARG VERSION_TAG
# Build PDF Tools (QPDF and ImageMagick 7).
FROM ubuntu:noble AS pdf-tools-build
ARG TARGETPLATFORM
ARG QPDF_VERSION=12.3.2
ARG IM_VERSION=7.1.2-13
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/pdf-tools-build,id=pdf-tools-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake libssl-dev libjpeg-dev zlib1g-dev curl ca-certificates pkg-config \
libpng-dev libtiff-dev libwebp-dev libxml2-dev libfreetype6-dev liblcms2-dev libzip-dev liblqr-1-0-dev \
libltdl-dev libtool && rm -rf /var/lib/apt/lists/* && \
cd /tmp/pdf-tools-build && \
rm -rf qpdf-* ImageMagick-* && \
# Build QPDF
(test -d "qpdf-${QPDF_VERSION}" || curl -fsSL "https://github.com/qpdf/qpdf/releases/download/v${QPDF_VERSION}/qpdf-${QPDF_VERSION}.tar.gz" | tar xz) && \
cd "qpdf-${QPDF_VERSION}" && \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DALLOW_CRYPTO_OPENSSL=ON -DDEFAULT_CRYPTO=openssl && \
cmake --build build --parallel "$(nproc)" && \
cmake --install build --strip && \
cd .. && \
# Build ImageMagick 7
(test -d "ImageMagick-${IM_VERSION}" || curl -fsSL "https://github.com/ImageMagick/ImageMagick/archive/refs/tags/${IM_VERSION}.tar.gz" | tar xz) && \
cd "ImageMagick-${IM_VERSION}" && \
./configure --prefix=/usr/local --with-modules --with-perl=no --with-magick-plus-plus=no --with-quantum-depth=16 --disable-static --enable-shared && \
make -j"$(nproc)" && \
make install-strip && \
# Enable PDF/PS/EPS in policy
sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="PS"/rights="read|write" pattern="PS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="EPS"/rights="read|write" pattern="EPS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
cd .. && \
ldconfig /usr/local/lib
# Stage ImageMagick outputs into a single directory so runtime can import them with one COPY
# (reduces 4 separate COPY layers to 1 independent --link layer).
RUN mkdir -p /magick-export/usr/bin \
/magick-export/usr/local/lib \
/magick-export/usr/local/etc && \
cp /usr/local/bin/magick /magick-export/usr/bin/ && \
cp -a /usr/local/lib/libMagick*.so* /magick-export/usr/local/lib/ && \
cp -a /usr/local/lib/ImageMagick-7* /magick-export/usr/local/lib/ && \
cp -a /usr/local/etc/ImageMagick-7 /magick-export/usr/local/etc/
# Build Python venv in an isolated stage so runtime image never needs build tools.
# Packages with native extensions (opencv, cryptography) use pre-built wheels (--prefer-binary).
# python3-uno is intentionally NOT installed here, it is a system package in the runtime stage
# and accessed via --system-site-packages at runtime.
FROM ubuntu:noble AS python-venv-build
ARG TARGETPLATFORM
ARG UNOSERVER_VERSION=3.6
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && apt-get install -y --no-install-recommends \
python3 python3-venv ca-certificates binutils && \
rm -rf /var/lib/apt/lists/*
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
python3 -m venv /opt/venv --system-site-packages && \
/opt/venv/bin/pip install --no-cache-dir --prefer-binary \
weasyprint pdf2image opencv-python-headless ocrmypdf \
cryptography \
"unoserver==${UNOSERVER_VERSION}" && \
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
find /opt/venv \( -name '*.pyc' -o -name '*.pyi' \) -delete 2>/dev/null || true && \
rm -rf /opt/venv/lib/python*/site-packages/pip \
/opt/venv/lib/python*/site-packages/pip-*.dist-info \
/opt/venv/lib/python*/site-packages/setuptools \
/opt/venv/lib/python*/site-packages/setuptools-*.dist-info && \
find /opt/venv -name '*.so' -exec strip --strip-unneeded {} + 2>/dev/null || true
# Final runtime image.
FROM eclipse-temurin:25-jre-noble AS runtime
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
ENV DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
TESS_BASE_PATH=/usr/share/tesseract-ocr/5/tessdata
ARG UNOSERVER_VERSION=3.6
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
apt-get update; \
# Add LibreOffice Fresh PPA for latest version (26.2.x)
apt-get install -y --no-install-recommends software-properties-common; \
add-apt-repository -y ppa:libreoffice/ppa; \
apt-get update; \
apt-get install -y --no-install-recommends \
# Core tools
ca-certificates tzdata tini bash fontconfig curl \
ffmpeg poppler-utils fontforge \
gosu unpaper \
# Fonts: full CJK coverage retained
fonts-dejavu \
fonts-liberation2 \
fonts-crosextra-caladea fonts-crosextra-carlito \
fonts-noto-core fonts-noto-mono fonts-noto-extra \
fonts-noto-cjk poppler-data \
# python3-uno required for UNO bridge (accessed by venv via --system-site-packages)
# python3-venv is NOT needed: the copied /opt/venv works without it at runtime
# python3-dev is NOT needed, venv is pre-built in python-venv-build stage
python3 python3-uno \
# Python packages are in /opt/venv (copied from python-venv-build stage below)
# OCR
tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra \
tesseract-ocr-por tesseract-ocr-chi-sim \
# Tesseract OSD for orientation detection
tesseract-ocr-osd \
# Graphics / AWT headless
libcairo2 libpango-1.0-0 libpangoft2-1.0-0 libgdk-pixbuf-2.0-0 \
libfreetype6 libfontconfig1 libx11-6 libxt6 libxext6 libxrender1 \
libxtst6 libxi6 libxinerama1 libxkbcommon0 libsm6 libice6 \
# Qt/EGL for Calibre CLI
libegl1 libgl1 libopengl0 libdbus-1-3 libglib2.0-0 libnss3 \
libasound2t64 libxcomposite1 libxrandr2 \
# Virtual framebuffer (required for headless LibreOffice Impress/Draw)
xvfb x11-utils coreutils \
libreoffice-writer-nogui libreoffice-calc-nogui \
libreoffice-impress-nogui libreoffice-draw-nogui \
libreoffice-java-common \
; \
\
\
# Verify and fix LibreOffice
libreoffice --version; \
soffice --version 2>/dev/null || true; \
# Rebuild UNO bridge type database
/usr/lib/libreoffice/program/soffice.bin --headless --convert-to pdf /dev/null 2>/dev/null || true; \
# Force font cache rebuild
fc-cache -f -v 2>&1 | awk 'NR <= 20'; \
\
# Cleanup stage.
\
# Remove PPA helper, no longer needed after apt-get update
apt-get remove --purge -y software-properties-common || true; \
apt-get autoremove --purge -y || true; \
rm -rf /var/lib/apt/lists/*; \
\
# Docs / man / info / icons / themes / GUI assets (headless server)
rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/info/* \
/usr/share/lintian/* /usr/share/linda/* \
/usr/share/icons/* /usr/share/themes/* \
/usr/share/javascript/* \
/usr/share/gtk-3.0/* \
/usr/share/fontforge/pixmaps \
/usr/share/fontforge/osx \
/usr/share/fontforge/cidmap \
/usr/share/fontforge/prefs \
/usr/share/liblangtag/* \
/usr/share/tcltk/* \
/usr/share/python-wheels/* \
/usr/share/glib-2.0/schemas/* \
/usr/share/mime/* \
/usr/share/xml/iso-codes \
/usr/share/GConf \
/usr/share/bash-completion \
/usr/share/zsh \
/usr/share/libmysofa \
/usr/share/alsa \
/usr/share/iso-codes \
/usr/share/perl5 \
/usr/share/libthai \
/usr/share/libexttextcat \
/usr/share/openal \
/usr/share/gcc; \
\
find /usr/share/locale -mindepth 1 -maxdepth 1 -type d \
! -name 'en*' -exec rm -rf {} + 2>/dev/null || true; \
rm -rf /usr/share/i18n/locales /usr/share/i18n/charmaps; \
\
rm -rf /usr/lib/libreoffice/share/gallery \
/usr/lib/libreoffice/share/template \
/usr/lib/libreoffice/share/wizards \
/usr/lib/libreoffice/share/autotext \
/usr/lib/libreoffice/help \
/usr/lib/libreoffice/share/config/images_*.zip \
/usr/lib/libreoffice/share/basic \
/usr/lib/libreoffice/share/Scripts \
/usr/lib/libreoffice/share/autocorr \
/usr/lib/libreoffice/share/classification \
/usr/lib/libreoffice/share/wordbook \
/usr/lib/libreoffice/share/fingerprint \
/usr/lib/libreoffice/share/xdg \
/usr/lib/libreoffice/share/numbertext \
/usr/lib/libreoffice/share/shell \
/usr/lib/libreoffice/share/palette \
/usr/lib/libreoffice/share/theme_definitions \
/usr/lib/libreoffice/share/xslt \
/usr/lib/libreoffice/share/labels \
/usr/lib/libreoffice/share/dtd \
/usr/lib/libreoffice/share/tipoftheday \
/usr/lib/libreoffice/share/toolbarmode \
/usr/lib/libreoffice/share/psprint \
/usr/lib/libreoffice/CREDITS.fodt \
/usr/lib/libreoffice/LICENSE.html; \
\
# Remove unused LO extensions (GUI-only; not needed for document conversion)
rm -rf /usr/lib/libreoffice/share/extensions/wiki-publisher \
/usr/lib/libreoffice/share/extensions/nlpsolver \
/usr/lib/libreoffice/share/extensions/dict-* 2>/dev/null || true; \
# Remove LO database components (LO Base; not needed for Writer/Calc/Impress conversion)
rm -rf /usr/lib/libreoffice/program/libdba* \
/usr/lib/libreoffice/program/libdbahsql* \
/usr/lib/libreoffice/program/libdbu* \
/usr/lib/libreoffice/program/libreport* 2>/dev/null || true; \
\
rm -rf /usr/lib/python3.12/test \
/usr/lib/python3.12/idlelib \
/usr/lib/python3.12/tkinter \
/usr/lib/python3.12/lib2to3 \
/usr/lib/python3.12/pydoc_data; \
\
rm -rf /usr/lib/python3/dist-packages/scipy \
/usr/lib/python3/dist-packages/sympy \
/usr/lib/python3/dist-packages/mpmath; \
\
rm -rf \
/usr/lib/python3/dist-packages/cffi \
/usr/lib/python3/dist-packages/cffi-*.dist-info \
/usr/lib/python3/dist-packages/_cffi_backend*.so \
/usr/lib/python3/dist-packages/_cffi_backend*.cpython*.so \
2>/dev/null || true; \
\
# Strip debug symbols from ALL shared libraries
find /usr/lib -name '*.so*' -type f \
-not -path '*/jvm/*' \
-not -path '*/libreoffice/*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
# Preserving ffmpeg codec libs as they are directly linked.
\
# Remove GPU backends not needed for headless operation.
MULTIARCH_LIBDIR=$(dpkg-architecture -qDEB_HOST_MULTIARCH 2>/dev/null \
|| find /usr/lib -maxdepth 1 -type d -name '*-linux-gnu' | head -1); \
rm -f \
"${MULTIARCH_LIBDIR}"/libLLVM*.so* \
"${MULTIARCH_LIBDIR}"/libgallium*.so* \
2>/dev/null || true; \
\
find /usr/lib/python3* -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /usr/lib/python3* \( -name '*.pyc' -o -name '*.pyi' \) \
-delete 2>/dev/null || true; \
\
rm -rf /usr/share/bug /usr/share/lintian /usr/share/linda \
/var/log/dpkg.log /var/log/apt/* \
/usr/local/share/ghostscript/*/doc \
/usr/local/share/ghostscript/*/examples \
/usr/share/ImageMagick-*/doc \
/usr/share/ImageMagick-*/www; \
\
\
# NEW: Tesseract training configs (not needed for OCR, but keep configs/ for hocr/txt output)
rm -rf /usr/share/tesseract-ocr/*/tessdata/tessconfigs; \
\
# Noto fonts ship 1800+ files in many weights (Bold, Italic, SemiBold, etc.)
# For PDF processing, Regular weight covers all scripts. Saves ~370MB.
find /usr/share/fonts/truetype/noto -type f \
! -name '*Regular*' -delete 2>/dev/null || true; \
find /usr/share/fonts/opentype -type f \
! -name '*Regular*' -delete 2>/dev/null || true; \
# DejaVu: keep Regular and Bold only (commonly referenced in PDFs)
find /usr/share/fonts/truetype -name '*DejaVu*' \
! -name '*-Regular*' ! -name '*-Bold*' ! -name '*Bold.ttf' \
! -name 'DejaVuSans.ttf' ! -name 'DejaVuSerif.ttf' ! -name 'DejaVuSansMono.ttf' \
-type f -delete 2>/dev/null || true; \
# Remove empty font directories after cleanup
find /usr/share/fonts -type d -empty -delete 2>/dev/null || true; \
\
# ── gconv: keep only essential charset conversion modules (~6MB savings) ──
# PDF processing needs UTF-8, ISO-8859-*, and a few CJK encodings.
GCONV_DIR=$(find /usr/lib -type d -name gconv 2>/dev/null | head -1); \
if [ -n "$GCONV_DIR" ] && [ -d "$GCONV_DIR" ]; then \
mkdir -p /tmp/gconv-keep; \
for mod in UTF-8.so UTF-16.so UTF-32.so UTF-7.so \
ISO8859-1.so ISO8859-15.so ISO8859-2.so ISO8859-9.so \
UNICODE.so CP1252.so CP1251.so CP1250.so \
EUC-JP.so EUC-KR.so EUC-CN.so \
SHIFT_JIS.so GB18030.so BIG5.so \
gconv-modules gconv-modules.d gconv-modules.cache; do \
[ -e "$GCONV_DIR/$mod" ] && \
cp -a "$GCONV_DIR/$mod" /tmp/gconv-keep/ 2>/dev/null || true; \
done; \
rm -rf "$GCONV_DIR"/*; \
cp -a /tmp/gconv-keep/* "$GCONV_DIR/" 2>/dev/null || true; \
rm -rf /tmp/gconv-keep; \
fi; \
\
# NOTE: flite TTS voice libs (~26MB) are kept because ffmpeg directly links them.
# Removing them breaks ffmpeg startup. To save these 26MB, ffmpeg would need
# to be rebuilt without --enable-libflite (not worth the complexity).
\
# ── dpkg metadata cleanup (~14MB) ──
# Not needed at runtime, container won't run apt-get.
rm -rf /var/lib/dpkg/info/*.list \
/var/lib/dpkg/info/*.md5sums \
/var/lib/dpkg/info/*.conffiles \
/var/lib/dpkg/info/*.postinst \
/var/lib/dpkg/info/*.preinst \
/var/lib/dpkg/info/*.prerm \
/var/lib/dpkg/info/*.postrm \
/var/lib/dpkg/info/*.triggers \
/var/lib/dpkg/info/*.shlibs \
/var/lib/dpkg/info/*.symbols \
/var/lib/dpkg/info/*.templates; \
\
# Misc caches
rm -rf /var/cache/fontconfig/* /tmp/*
# External tool layers, all use --link for independent layer caching and parallel pulls.
COPY --link --from=calibre-build /opt/calibre /opt/calibre
COPY --link --from=pdf-tools-build /usr/local/bin/qpdf /usr/bin/qpdf
# ImageMagick: 4 layers collapsed to 1 via the magick-export staging dir in pdf-tools-build
COPY --link --from=pdf-tools-build /magick-export/ /
COPY --link --from=gs-build /usr/local/bin/gs /usr/local/bin/gs
COPY --link --from=gs-build /usr/local/share/ghostscript /usr/local/share/ghostscript
# Python venv pre-built in python-venv-build (no pip install at runtime, no build tools needed)
COPY --link --from=python-venv-build /opt/venv /opt/venv
RUN ldconfig /usr/local/lib && \
PYTHONDONTWRITEBYTECODE=1 \
/opt/venv/bin/python -c "import cffi; print('cffi OK:', cffi.__version__)" && \
PYTHONDONTWRITEBYTECODE=1 \
/opt/venv/bin/python -c "import cv2; print('OpenCV', cv2.__version__)" && \
PYTHONDONTWRITEBYTECODE=1 \
/opt/venv/bin/python -c "import ocrmypdf; print('ocrmypdf OK')" && \
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
# ---
# Non-root user
# ---
ARG PUID=1000
ARG PGID=1000
RUN set -eux; \
if ! getent group stirlingpdfgroup >/dev/null 2>&1; then \
groupadd -g "${PGID}" stirlingpdfgroup 2>/dev/null \
|| groupadd stirlingpdfgroup; \
fi; \
if ! id -u stirlingpdfuser >/dev/null 2>&1; then \
useradd -m -u "${PUID}" -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser 2>/dev/null \
|| useradd -m -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser; \
fi; \
ln -sf /usr/sbin/gosu /usr/local/bin/su-exec
# Application files.
WORKDIR /app
# Application layers
COPY --link --from=jar-extract --chown=1000:1000 /layers/dependencies/ /app/
COPY --link --from=jar-extract --chown=1000:1000 /layers/spring-boot-loader/ /app/
COPY --link --from=jar-extract --chown=1000:1000 /layers/snapshot-dependencies/ /app/
@@ -695,63 +67,33 @@ COPY --link --chown=1000:1000 scripts/ /scripts/
# Fonts go to system dir, root ownership is correct (world-readable)
COPY app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/truetype/
# Permissions and configuration.
# Permissions and configuration
RUN set -eux; \
ln -sf /opt/calibre/ebook-convert /usr/bin/ebook-convert; \
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
ln -sf /opt/venv/bin/ocrmypdf /usr/local/bin/ocrmypdf; \
ln -sf /opt/venv/bin/weasyprint /usr/local/bin/weasyprint; \
ln -sf /opt/venv/bin/unoping /usr/local/bin/unoping; \
chmod +x /scripts/*; \
mkdir -p /configs /configs/cache /configs/heap_dumps /logs /customFiles \
/pipeline/watchedFolders /pipeline/finishedFolders \
/tmp/stirling-pdf/heap_dumps; \
# Create symlinks to allow app to find these in /app/
ln -s /logs /app/logs; \
ln -s /configs /app/configs; \
ln -s /customFiles /app/customFiles; \
ln -s /pipeline /app/pipeline; \
chown -R stirlingpdfuser:stirlingpdfgroup \
/home/stirlingpdfuser /configs /logs /customFiles /pipeline \
/tmp/stirling-pdf; \
chown -h stirlingpdfuser:stirlingpdfgroup /app/logs /app/configs /app/customFiles /app/pipeline; \
chown stirlingpdfuser:stirlingpdfgroup /app; \
chmod 750 /tmp/stirling-pdf; \
chmod 750 /tmp/stirling-pdf/heap_dumps; \
fc-cache -f
# NOTE: Project Leyden AOT cache is generated in the background on first boot
# by init-without-ocr.sh and stored in /configs/cache/stirling.aot (persistent volume).
# The cache is picked up on subsequent boots for 15-25% faster startup.
# See: JEP 483 + 514 + 515 (JDK 25).
# Environment variables.
ARG VERSION_TAG
# Write version to a file so it is readable by scripts without env-var inheritance.
# init-without-ocr.sh reads /etc/stirling_version for the AOT cache fingerprint.
RUN echo "${VERSION_TAG:-dev}" > /etc/stirling_version
# Environment variables
ENV VERSION_TAG=$VERSION_TAG \
STIRLING_AOT_ENABLE="false" \
STIRLING_JVM_PROFILE="balanced" \
_JVM_OPTS_BALANCED="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=4m -XX:G1PeriodicGCInterval=60000 -XX:+UseStringDeduplication -XX:+UseCompactObjectHeaders -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
_JVM_OPTS_PERFORMANCE="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseShenandoahGC -XX:ShenandoahGCMode=generational -XX:+UseCompactObjectHeaders -XX:+UseStringDeduplication -XX:+AlwaysPreTouch -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
JAVA_CUSTOM_OPTS="" \
HOME=/home/stirlingpdfuser \
PUID=${PUID} \
PGID=${PGID} \
UMASK=022 \
PATH="/opt/venv/bin:${PATH}" \
UNO_PATH=/usr/lib/libreoffice/program \
LIBREOFFICE_BIN_PATH=/usr/lib/libreoffice/program/soffice.bin \
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
TMPDIR=/tmp/stirling-pdf \
TEMP=/tmp/stirling-pdf \
TMP=/tmp/stirling-pdf \
QTWEBENGINE_CHROMIUM_FLAGS="--no-sandbox --disable-gpu --disable-software-rasterizer" \
DBUS_SESSION_BUS_ADDRESS=/dev/null
SAL_TMP=/tmp/stirling-pdf/libre
# Metadata labels.
# Metadata labels
LABEL org.opencontainers.image.title="Stirling-PDF" \
org.opencontainers.image.description="Full version with Calibre, LibreOffice, Tesseract, OCRmyPDF" \
org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF" \

View File

@@ -1,192 +1,14 @@
# Stirling-PDF - Fat version (embedded frontend)
# Extra fonts for air-gapped environments
# Uses pre-built base image for fast builds
FROM ubuntu:noble AS calibre-build
ARG BASE_VERSION=1.0.0
ARG BASE_IMAGE=ghcr.io/stirling-tools/stirling-pdf-base:${BASE_VERSION}
ARG CALIBRE_VERSION=9.4.0
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates curl xz-utils libnss3 libfontconfig1 \
libgl1 libegl1 libdbus-1-3 libasound2t64 libxcomposite1 \
libxrandr2 libxkbcommon0 libxi6 libxtst6 libopengl0; \
rm -rf /var/lib/apt/lists/*; \
\
case "$(uname -m)" in \
x86_64) CALIBRE_ARCH="x86_64" ;; \
aarch64) CALIBRE_ARCH="arm64" ;; \
*) echo "Unsupported arch: $(uname -m)"; exit 1 ;; \
esac; \
\
curl -fsSL \
"https://download.calibre-ebook.com/${CALIBRE_VERSION}/calibre-${CALIBRE_VERSION}-${CALIBRE_ARCH}.txz" \
-o /tmp/calibre.txz; \
mkdir -p /opt/calibre; \
tar xJf /tmp/calibre.txz -C /opt/calibre; \
rm /tmp/calibre.txz; \
\
# Remove GUI-only shared libraries.
# Libs required by WebEngine PDF output are preserved.
rm -f /opt/calibre/lib/libQt6Designer* \
/opt/calibre/lib/libQt6Multimedia* \
/opt/calibre/lib/libQt6SpatialAudio.so.* \
/opt/calibre/lib/libQt6NetworkAuth.so.* \
/opt/calibre/lib/libQt6Concurrent.so.* \
/opt/calibre/lib/libQt6OpenGLWidgets.so.* \
/opt/calibre/lib/libQt6QuickWidgets.so.* \
# AV / multimedia
/opt/calibre/lib/libavcodec.so.* \
/opt/calibre/lib/libavfilter.so.* \
/opt/calibre/lib/libavformat.so.* \
/opt/calibre/lib/libavutil.so.* \
/opt/calibre/lib/libavdevice.so.* \
/opt/calibre/lib/libpostproc.so.* \
/opt/calibre/lib/libswresample.so.* \
/opt/calibre/lib/libswscale.so.* \
# Audio / speech / TTS
/opt/calibre/lib/libspeex.so.* \
/opt/calibre/lib/libFLAC.so.* \
/opt/calibre/lib/libopus.so.* \
/opt/calibre/lib/libvorbis*.so.* \
/opt/calibre/lib/libasyncns.so.* \
/opt/calibre/lib/libspeechd.so.* \
/opt/calibre/lib/libespeak-ng.so.* \
# Other unused libs
/opt/calibre/lib/libonnxruntime.so.* \
/opt/calibre/lib/libgio-2.0.so.* \
/opt/calibre/lib/libzstd.so.* \
/opt/calibre/lib/libhunspell-1.7.so.* \
/opt/calibre/lib/libbrotlienc.so.* \
/opt/calibre/lib/libbrotlicommon.so.* \
/opt/calibre/lib/libbrotlidec.so.* \
/opt/calibre/lib/libstemmer.so.* \
/opt/calibre/lib/libmtp.so.* \
/opt/calibre/lib/libncursesw.so.* \
/opt/calibre/lib/libchm.so.* \
/opt/calibre/lib/libgcrypt.so.* \
/opt/calibre/lib/libgpg-error.so.* \
/opt/calibre/lib/libicuio.so.* \
/opt/calibre/lib/libreadline.so.* \
/opt/calibre/lib/libusb-1.0.so.*; \
rm -rf /opt/calibre/lib/qt6/plugins/platformthemes \
/opt/calibre/lib/qt6/plugins/multimedia \
/opt/calibre/lib/qt6/plugins/designer \
/opt/calibre/lib/qt6/plugins/qmltooling; \
\
# Remove GUI executables but keep ebook-convert, ebook-meta, and calibre-parallel.
rm -f /opt/calibre/calibre \
/opt/calibre/calibre-server \
/opt/calibre/calibre-smtp \
/opt/calibre/calibre-debug \
/opt/calibre/calibre-customize \
/opt/calibre/calibredb \
/opt/calibre/ebook-viewer \
/opt/calibre/ebook-edit \
/opt/calibre/ebook-polish \
/opt/calibre/ebook-device \
/opt/calibre/fetch-ebook-metadata \
/opt/calibre/lrf2lrs \
/opt/calibre/lrs2lrf \
/opt/calibre/markdown-calibre \
/opt/calibre/web2disk; \
\
# Remove Python modules not needed for conversion.
rm -rf /opt/calibre/lib/calibre/gui2 \
/opt/calibre/lib/calibre/devices \
/opt/calibre/lib/calibre/library \
/opt/calibre/lib/calibre/db \
/opt/calibre/lib/calibre/srv \
/opt/calibre/lib/calibre/spell \
/opt/calibre/lib/calibre/live; \
\
# Remove resources not needed for CLI conversion.
rm -rf /opt/calibre/resources/images \
/opt/calibre/resources/icons \
/opt/calibre/resources/icons.rcc \
/opt/calibre/resources/content-server \
/opt/calibre/resources/editor* \
/opt/calibre/resources/viewer \
/opt/calibre/resources/viewer.js \
/opt/calibre/resources/viewer.html \
/opt/calibre/resources/recipes \
/opt/calibre/resources/dictionaries \
/opt/calibre/resources/hyphenation \
/opt/calibre/resources/catalog \
/opt/calibre/resources/calibre-mimetypes.xml \
/opt/calibre/resources/changelog.json \
/opt/calibre/resources/user-agent-data.json \
/opt/calibre/resources/builtin_recipes.zip \
/opt/calibre/resources/builtin_recipes.xml \
/opt/calibre/resources/builtin_recipes.xml \
/opt/calibre/resources/stylelint-bundle.min.js \
/opt/calibre/resources/stylelint.js \
/opt/calibre/resources/rapydscript \
/opt/calibre/resources/quick_start \
/opt/calibre/resources/piper-voices.json \
/opt/calibre/resources/images.qrc \
/opt/calibre/resources/mozilla-ca-certs.pem \
/opt/calibre/resources/ebook-convert-complete.calibre_msgpack \
/opt/calibre/resources/mathjax \
/opt/calibre/resources/common-english-words.txt \
/opt/calibre/resources/calibre-portable.sh \
/opt/calibre/resources/calibre-portable.bat \
/opt/calibre/resources/metadata_sqlite.sql \
/opt/calibre/resources/notes_sqlite.sql \
/opt/calibre/resources/fts_sqlite.sql \
/opt/calibre/resources/fts_triggers.sql \
/opt/calibre/resources/jacket \
/opt/calibre/resources/editor-functions.json \
/opt/calibre/resources/calibre-ebook-root-CA.crt \
/opt/calibre/resources/csscolorparser.js \
/opt/calibre/resources/lookup.js \
/opt/calibre/resources/pdf-mathjax-loader.js \
/opt/calibre/resources/scraper.js \
/opt/calibre/resources/toc.js \
/opt/calibre/resources/user-manual-translation-stats.json \
/opt/calibre/resources/pin-template.svg \
/opt/calibre/resources/scripts.calibre_msgpack \
/opt/calibre/lib/calibre/ebooks/docx/images \
/opt/calibre/share \
/opt/calibre/man; \
\
# Remove translations and localization while keeping required libraries.
rm -rf /opt/calibre/lib/qt6/translations; \
find /opt/calibre/translations -mindepth 1 -maxdepth 1 ! -name 'qtwebengine_locales' -exec rm -rf {} +; \
rm -rf /opt/calibre/resources/localization/locales.zip \
/opt/calibre/resources/localization/stats.calibre_msgpack \
/opt/calibre/resources/localization/website-languages.txt; \
find /opt/calibre/resources/localization -mindepth 1 -maxdepth 1 ! -name 'iso639.calibre_msgpack' -exec rm -rf {} +; \
\
# Strip debug symbols from calibre extension modules.
# Exclude Qt6 libs and all qt6/ subdirectory files to prevent Chromium renderer crashes.
find /opt/calibre/lib -name '*.so*' \
! -name 'libQt6*' \
! -path '*/qt6/*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
find /opt/calibre -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /opt/calibre -name '*.pyc' -delete 2>/dev/null || true; \
\
# Verify conversion functionality.
# NOTE: txt→epub used intentionally NOT txt→pdf.
# Calibre 7+ uses WebEngine (Chromium) for PDF output, which requires kernel
# capabilities unavailable in Docker RUN steps and segfaults under QEMU.
# epub output exercises the same Python/plugin stack without touching WebEngine.
/opt/calibre/ebook-convert --version; \
echo "Hello" > /tmp/test.txt; \
/opt/calibre/ebook-convert /tmp/test.txt /tmp/test.epub; \
rm -f /tmp/test.txt /tmp/test.epub; \
echo "=== Calibre stripped successfully ==="
# Build the Java application and frontend.
# Stage 1: Build the Java application and frontend
FROM gradle:9.3.1-jdk25 AS app-build
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update \
RUN apt-get update \
&& apt-get install -y --no-install-recommends curl ca-certificates \
&& update-ca-certificates \
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
@@ -209,381 +31,71 @@ COPY app/common/build.gradle app/common/
COPY app/proprietary/build.gradle app/proprietary/
# Use system gradle instead of gradlew to avoid SSL issues downloading gradle distribution on emulated arm64
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
gradle dependencies --no-daemon || true
RUN gradle dependencies --no-daemon || true
COPY . .
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
DISABLE_ADDITIONAL_FEATURES=false \
RUN DISABLE_ADDITIONAL_FEATURES=false \
gradle clean build \
-PbuildWithFrontend=true \
-x spotlessApply -x spotlessCheck -x test -x sonarqube \
--no-daemon
# Python Builder stage.
FROM ubuntu:noble AS python-build
ARG UNOSERVER_VERSION=3.6
ENV DEBIAN_FRONTEND=noninteractive
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && apt-get install -y --no-install-recommends \
python3 python3-venv python3-dev \
python3-packaging \
build-essential \
# Build dependencies for ocrmypdf/weasyprint/opencv
zlib1g-dev libjpeg-dev libffi-dev libpango1.0-dev \
&& rm -rf /var/lib/apt/lists/*
RUN python3 -m venv /opt/venv --system-site-packages
ENV PATH="/opt/venv/bin:$PATH"
# Build all heavy python packages here
RUN --mount=type=cache,target=/root/.cache/pip \
pip install \
weasyprint pdf2image opencv-python-headless ocrmypdf \
"unoserver==${UNOSERVER_VERSION}"
# Stage 2: Extract Spring Boot Layers
FROM eclipse-temurin:25-jre-noble AS jar-extract
WORKDIR /tmp
COPY --from=app-build /app/app/core/build/libs/*.jar app.jar
RUN java -Djarmode=tools -jar app.jar extract --layers --destination /layers
# Build Ghostscript 10.06.0 from source in an isolated stage (avoids library conflicts).
FROM ubuntu:noble AS gs-build
ARG GS_VERSION=10.06.0
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/gs-build,id=gs-build-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential curl ca-certificates libfontconfig1-dev && rm -rf /var/lib/apt/lists/* && \
GS_TAG="gs$(printf '%s' "${GS_VERSION}" | tr -d '.')" && \
cd /tmp/gs-build && \
rm -rf ghostscript-* && \
(test -d "ghostscript-${GS_VERSION}" || curl -fsSL "https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/${GS_TAG}/ghostscript-${GS_VERSION}.tar.gz" | tar xz) && \
cd "ghostscript-${GS_VERSION}" && \
./configure \
--prefix=/usr/local \
--without-x \
--disable-cups \
--disable-gtk && \
make -j"$(nproc)" && \
make install && \
cd ..
# Stage 3: Final runtime image on top of pre-built base
FROM ${BASE_IMAGE}
ARG VERSION_TAG
# Build PDF Tools (QPDF and ImageMagick 7).
FROM ubuntu:noble AS pdf-tools-build
ARG QPDF_VERSION=12.3.2
ARG IM_VERSION=7.1.2-13
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/tmp/pdf-tools-build,id=pdf-tools-${TARGETPLATFORM:-local} \
apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake libssl-dev libjpeg-dev zlib1g-dev curl ca-certificates pkg-config \
libpng-dev libtiff-dev libwebp-dev libxml2-dev libfreetype6-dev liblcms2-dev libzip-dev liblqr-1-0-dev \
libltdl-dev libtool && rm -rf /var/lib/apt/lists/* && \
cd /tmp/pdf-tools-build && \
rm -rf qpdf-* ImageMagick-* && \
# Build QPDF
(test -d "qpdf-${QPDF_VERSION}" || curl -fsSL "https://github.com/qpdf/qpdf/releases/download/v${QPDF_VERSION}/qpdf-${QPDF_VERSION}.tar.gz" | tar xz) && \
cd "qpdf-${QPDF_VERSION}" && \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DALLOW_CRYPTO_OPENSSL=ON -DDEFAULT_CRYPTO=openssl && \
cmake --build build --parallel "$(nproc)" && \
cmake --install build && \
cd .. && \
# Build ImageMagick 7
(test -d "ImageMagick-${IM_VERSION}" || curl -fsSL "https://github.com/ImageMagick/ImageMagick/archive/refs/tags/${IM_VERSION}.tar.gz" | tar xz) && \
cd "ImageMagick-${IM_VERSION}" && \
./configure --prefix=/usr/local --with-modules --with-perl=no --with-magick-plus-plus=no --with-quantum-depth=16 --disable-static --enable-shared && \
make -j"$(nproc)" && \
make install && \
# Enable PDF/PS/EPS in policy
sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="PS"/rights="read|write" pattern="PS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
sed -i 's/rights="none" pattern="EPS"/rights="read|write" pattern="EPS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
cd .. && \
ldconfig /usr/local/lib
WORKDIR /app
# Application layers
COPY --link --from=jar-extract --chown=1000:1000 /layers/dependencies/ /app/
COPY --link --from=jar-extract --chown=1000:1000 /layers/spring-boot-loader/ /app/
COPY --link --from=jar-extract --chown=1000:1000 /layers/snapshot-dependencies/ /app/
COPY --link --from=jar-extract --chown=1000:1000 /layers/application/ /app/
# Final runtime image.
FROM eclipse-temurin:25-jre AS runtime
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
ENV DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
TESS_BASE_PATH=/usr/share/tesseract-ocr/5/tessdata
ARG UNOSERVER_VERSION=3.6
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/root/.cache/pip \
set -eux; \
apt-get update; \
# Add LibreOffice Fresh PPA for latest version (26.2.x)
apt-get install -y --no-install-recommends software-properties-common; \
add-apt-repository -y ppa:libreoffice/ppa; \
apt-get update; \
apt-get install -y --no-install-recommends \
# Core tools
ca-certificates tzdata tini bash fontconfig curl \
ffmpeg poppler-utils fontforge \
gosu unpaper pngquant \
fonts-liberation2 \
fonts-crosextra-caladea fonts-crosextra-carlito \
fonts-noto-core fonts-noto-mono fonts-noto-extra \
fonts-noto-cjk poppler-data \
fonts-freefont-ttf fonts-terminus \
# Python runtime & UNO bridge (python3-full -> python3 optimization)
python3 python3-uno python3-packaging \
# OCR
tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra \
tesseract-ocr-por tesseract-ocr-chi-sim \
# Graphics / AWT headless
libcairo2 libpango-1.0-0 libpangoft2-1.0-0 \
libfreetype6 libfontconfig1 libx11-6 libxt6 libxext6 libxrender1 \
libxtst6 libxi6 libxinerama1 libxkbcommon0 libsm6 libice6 \
# Qt/EGL for Calibre CLI
libegl1 libgl1 libopengl0 libdbus-1-3 libglib2.0-0 libnss3 \
libasound2t64 libxcomposite1 libxrandr2 \
# Virtual framebuffer (required for headless LibreOffice Impress/Draw)
xvfb x11-utils coreutils \
libreoffice-writer-nogui libreoffice-calc-nogui \
libreoffice-impress-nogui libreoffice-draw-nogui \
libreoffice-base-nogui libreoffice-java-common \
; \
\
# Fix LibreOffice UNO bridge and filter availability
libreoffice --version; \
soffice --version 2>/dev/null || true; \
# Rebuild UNO bridge type database
/usr/lib/libreoffice/program/soffice.bin --headless --convert-to pdf /dev/null 2>/dev/null || true; \
# Force font cache rebuild and verify filters are available
fc-cache -f -v 2>&1 | awk 'NR <= 20'; \
\
# Cleanup stage.
\
rm -rf /var/lib/apt/lists/*; \
\
# Docs / man / info / icons / themes / GUI assets (headless server)
rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/info/* \
/usr/share/lintian/* /usr/share/linda/* \
/usr/share/icons/* /usr/share/themes/* \
/usr/share/javascript/* \
/usr/share/gtk-3.0/* \
/usr/share/fontforge/pixmaps \
/usr/share/liblangtag/* \
/usr/share/tcltk/* \
/usr/share/python-wheels/*; \
\
# Clean up system locale data (LANG=C.UTF-8 doesn't use them)
find /usr/share/locale -mindepth 1 -maxdepth 1 -type d \
! -name 'en*' -exec rm -rf {} + 2>/dev/null || true; \
rm -rf /usr/share/i18n/locales /usr/share/i18n/charmaps; \
\
rm -rf /usr/lib/libreoffice/share/gallery \
/usr/lib/libreoffice/share/template \
/usr/lib/libreoffice/share/wizards \
/usr/lib/libreoffice/share/autotext \
/usr/lib/libreoffice/help \
/usr/lib/libreoffice/share/config/images_*.zip \
/usr/lib/libreoffice/share/basic \
/usr/lib/libreoffice/share/Scripts \
/usr/lib/libreoffice/share/autocorr \
/usr/lib/libreoffice/share/classification \
/usr/lib/libreoffice/share/wordbook \
/usr/lib/libreoffice/share/fingerprint \
/usr/lib/libreoffice/share/xdg \
/usr/lib/libreoffice/share/numbertext \
/usr/lib/libreoffice/share/shell \
/usr/lib/libreoffice/share/palette \
/usr/lib/libreoffice/share/theme_definitions \
/usr/lib/libreoffice/share/xslt \
/usr/lib/libreoffice/share/labels \
/usr/lib/libreoffice/share/dtd \
/usr/lib/libreoffice/share/tipoftheday \
/usr/lib/libreoffice/share/toolbarmode \
/usr/lib/libreoffice/share/psprint; \
\
# Preserving soffice.cfg because LibreOffice needs it to load documents.
\
\
\
find /usr/lib -name '*.so*' -type f \
-not -path '*/jvm/*' \
-not -path '*/libreoffice/*' \
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
\
# Preserving ffmpeg codec libs as they are directly linked.
\
# Remove Mesa/LLVM GPU backends (~179 MB, not needed for headless/offscreen)
MULTIARCH_LIBDIR=$(dpkg-architecture -qDEB_HOST_MULTIARCH 2>/dev/null \
|| find /usr/lib -maxdepth 1 -type d -name '*-linux-gnu' | head -1); \
rm -f \
"${MULTIARCH_LIBDIR}"/libLLVM*.so* \
"${MULTIARCH_LIBDIR}"/libgallium*.so* \
2>/dev/null || true; \
\
# Python stdlib: remove unused modules (~71 MB)
rm -rf /usr/lib/python3.12/test \
/usr/lib/python3.12/idlelib \
/usr/lib/python3.12/tkinter \
/usr/lib/python3.12/lib2to3 \
/usr/lib/python3.12/pydoc_data; \
\
# System Python packages not needed at runtime (~153 MB)
rm -rf /usr/lib/python3/dist-packages/scipy \
/usr/lib/python3/dist-packages/sympy \
/usr/lib/python3/dist-packages/mpmath; \
\
# Duplicate system packages (superseded by venv versions, ~55 MB)
rm -rf /usr/lib/python3/dist-packages/numpy \
/usr/lib/python3/dist-packages/fontTools \
/usr/lib/python3/dist-packages/PIL; \
\
# System-wide Python cache cleanup
find /usr/lib/python3* -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /usr/lib/python3* \( -name '*.pyc' -o -name '*.pyi' \) \
-delete 2>/dev/null || true; \
\
# Additional metadata cleanup
# FIX: Only remove ImageMagick doc/www, NOT the whole dir (preserves policy.xml/delegates.xml)
rm -rf /usr/share/bug /usr/share/lintian /usr/share/linda \
/var/lib/dpkg/info/*.md5sums \
/var/log/dpkg.log /var/log/apt/* \
/usr/local/share/ghostscript/*/doc \
/usr/local/share/ghostscript/*/examples \
/usr/share/ImageMagick-*/doc \
/usr/share/ImageMagick-*/www; \
\
\
# NEW: Tesseract training configs (not needed for OCR, but keep configs/ for hocr/txt output)
rm -rf /usr/share/tesseract-ocr/*/tessdata/tessconfigs; \
\
# Trim CJK fonts to Regular weight only (FIX: Broadened path)
find /usr/share/fonts -name '*CJK*' \
! -name '*Regular*' -type f -delete 2>/dev/null || true; \
\
# Misc caches
rm -rf /var/cache/fontconfig/* /tmp/*
# Python virtual environment.
COPY --from=python-build /opt/venv /opt/venv
RUN set -eux; \
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
# Verify python libs are accessible
/opt/venv/bin/python -c "import cv2; import ocrmypdf; import weasyprint; print('Python libs verified')"; \
# Cleanup venv from builder leftovers
find /opt/venv -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true; \
find /opt/venv \( -name '*.pyc' -o -name '*.pyi' \) -delete 2>/dev/null || true; \
rm -rf /opt/venv/lib/python*/site-packages/pip \
/opt/venv/lib/python*/site-packages/pip-*.dist-info \
/opt/venv/lib/python*/site-packages/setuptools \
/opt/venv/lib/python*/site-packages/setuptools-*.dist-info;
# Calibre and PDF Tools.
COPY --link --from=calibre-build /opt/calibre /opt/calibre
COPY --link --from=pdf-tools-build /usr/local/bin/qpdf /usr/bin/qpdf
COPY --link --from=pdf-tools-build /usr/local/bin/magick /usr/bin/magick
COPY --link --from=pdf-tools-build /usr/local/lib/libMagick* /usr/local/lib/
COPY --link --from=pdf-tools-build /usr/local/etc/ImageMagick-7 /usr/local/etc/ImageMagick-7
COPY --link --from=gs-build /usr/local/bin/gs /usr/local/bin/gs
COPY --link --from=gs-build /usr/local/share/ghostscript /usr/local/share/ghostscript
RUN set -eux; \
ldconfig /usr/local/lib; \
# Clean pycache that may have been generated during stage-1 verify
find /opt/calibre -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true;
# Non-root user.
ARG PUID=1000
ARG PGID=1000
RUN set -eux; \
if ! getent group stirlingpdfgroup >/dev/null 2>&1; then \
groupadd -g "${PGID}" stirlingpdfgroup 2>/dev/null \
|| groupadd stirlingpdfgroup; \
fi; \
if ! id -u stirlingpdfuser >/dev/null 2>&1; then \
useradd -m -u "${PUID}" -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser 2>/dev/null \
|| useradd -m -g stirlingpdfgroup \
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser; \
fi; \
ln -sf /usr/sbin/gosu /usr/local/bin/su-exec
# Application files.
COPY --link --from=app-build --chown=1000:1000 \
/app/app/core/build/libs/*.jar /app.jar
COPY --link --from=app-build --chown=1000:1000 \
/app/build/libs/restart-helper.jar /restart-helper.jar
COPY --link --chown=1000:1000 scripts/ /scripts/
# Fonts go to system dir root ownership is correct (world-readable)
COPY --link app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/truetype/
# Fonts go to system dir, root ownership is correct (world-readable)
COPY app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/truetype/
# Permissions and configuration.
# Permissions and configuration
RUN set -eux; \
ln -sf /opt/calibre/ebook-convert /usr/bin/ebook-convert; \
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
ln -sf /opt/venv/bin/ocrmypdf /usr/local/bin/ocrmypdf; \
ln -sf /opt/venv/bin/weasyprint /usr/local/bin/weasyprint; \
ln -sf /opt/venv/bin/unoping /usr/local/bin/unoping; \
chmod +x /scripts/*; \
mkdir -p /configs /logs /customFiles \
/pipeline/watchedFolders /pipeline/finishedFolders \
/tmp/stirling-pdf/heap_dumps; \
# Create symlinks to allow app to find these in /app/
mkdir -p /app; \
ln -s /logs /app/logs; \
ln -s /configs /app/configs; \
ln -s /customFiles /app/customFiles; \
ln -s /pipeline /app/pipeline; \
chown -R stirlingpdfuser:stirlingpdfgroup \
/home/stirlingpdfuser /configs /logs /customFiles /pipeline \
/tmp/stirling-pdf; \
chown -h stirlingpdfuser:stirlingpdfgroup /app/logs /app/configs /app/customFiles /app/pipeline; \
chown stirlingpdfuser:stirlingpdfgroup /app; \
chmod 1777 /tmp/stirling-pdf; \
fc-cache -f; \
# NOTE: Project Leyden AOT cache is generated in the background on first boot
# by init-without-ocr.sh. The cache is picked up on subsequent boots for
# 15-25% faster startup. See: JEP 483 + 514 + 515 (JDK 25).
\
# Clean Calibre pycache that may have been generated during stage-1 verify
find /opt/calibre -type d -name __pycache__ \
-exec rm -rf {} + 2>/dev/null || true
chmod 750 /tmp/stirling-pdf; \
chmod 750 /tmp/stirling-pdf/heap_dumps; \
fc-cache -f
# Environment variables.
ARG VERSION_TAG
# Write version to a file so it is readable by scripts without env-var inheritance.
RUN echo "${VERSION_TAG:-dev}" > /etc/stirling_version
# Environment variables
ENV VERSION_TAG=$VERSION_TAG \
STIRLING_AOT_ENABLE="false" \
STIRLING_JVM_PROFILE="balanced" \
_JVM_OPTS_BALANCED="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=4m -XX:G1PeriodicGCInterval=60000 -XX:+UseStringDeduplication -XX:+UseCompactObjectHeaders -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
_JVM_OPTS_PERFORMANCE="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseShenandoahGC -XX:ShenandoahGCMode=generational -XX:+UseCompactObjectHeaders -XX:+UseStringDeduplication -XX:+AlwaysPreTouch -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
JAVA_CUSTOM_OPTS="" \
HOME=/home/stirlingpdfuser \
PUID=${PUID} \
PGID=${PGID} \
UMASK=022 \
FAT_DOCKER=true \
INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false \
PATH="/opt/venv/bin:${PATH}" \
UNO_PATH=/usr/lib/libreoffice/program \
LIBREOFFICE_BIN_PATH=/usr/lib/libreoffice/program/soffice.bin \
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
TMPDIR=/tmp/stirling-pdf \
TEMP=/tmp/stirling-pdf \
TMP=/tmp/stirling-pdf \
QTWEBENGINE_CHROMIUM_FLAGS="--no-sandbox --disable-gpu --disable-software-rasterizer" \
DBUS_SESSION_BUS_ADDRESS=/dev/null
SAL_TMP=/tmp/stirling-pdf/libre
# Metadata labels.
# Metadata labels
LABEL org.opencontainers.image.title="Stirling-PDF Fat" \
org.opencontainers.image.description="Fat version with extra fonts for air-gapped environments, includes Calibre, LibreOffice, Tesseract, OCRmyPDF" \
org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF" \
@@ -599,8 +111,8 @@ LABEL org.opencontainers.image.title="Stirling-PDF Fat" \
EXPOSE 8080/tcp
STOPSIGNAL SIGTERM
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:8080/api/v1/info/status || exit 1
HEALTHCHECK --interval=30s --timeout=15s --start-period=120s --retries=5 \
CMD curl -fs --max-time 10 http://localhost:8080/api/v1/info/status || exit 1
ENTRYPOINT ["tini", "--", "/scripts/init.sh"]
CMD []

View File

@@ -5,8 +5,7 @@
FROM gradle:9.3.1-jdk25 AS build
# Install Node.js and npm for frontend build
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && apt-get install -y --no-install-recommends \
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y --no-install-recommends nodejs \
@@ -30,15 +29,13 @@ ENV JDK_JAVA_OPTIONS="--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNN
--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED"
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
./gradlew dependencies --no-daemon || true
RUN ./gradlew dependencies --no-daemon || true
# Copy entire project
COPY . .
# Build ultra-lite JAR with embedded frontend (minimal features)
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
DISABLE_ADDITIONAL_FEATURES=true \
RUN DISABLE_ADDITIONAL_FEATURES=true \
./gradlew clean build \
-PbuildWithFrontend=true \
-x spotlessApply -x spotlessCheck -x test -x sonarqube \

View File

@@ -2,6 +2,9 @@
services:
stirling-pdf:
container_name: Stirling-PDF-Fat-Disable-Endpoints
build:
context: ../../..
dockerfile: docker/embedded/Dockerfile.fat
image: docker.stirlingpdf.com/stirlingtools/stirling-pdf:fat
build:
context: ../../../

View File

@@ -1,6 +1,9 @@
services:
stirling-pdf:
container_name: Stirling-PDF-Security-Fat
build:
context: ../../..
dockerfile: docker/embedded/Dockerfile.fat
image: docker.stirlingpdf.com/stirlingtools/stirling-pdf:fat
build:
context: ../../../

View File

@@ -1,6 +1,9 @@
services:
stirling-pdf:
container_name: Stirling-PDF-Ultra-Lite
build:
context: ../../..
dockerfile: docker/embedded/Dockerfile.ultra-lite
image: docker.stirlingpdf.com/stirlingtools/stirling-pdf:ultra-lite
build:
context: ../../../

View File

@@ -1,6 +1,9 @@
services:
stirling-pdf:
container_name: Stirling-PDF-Security-Fat-with-login
build:
context: ../../..
dockerfile: docker/embedded/Dockerfile.fat
image: docker.stirlingpdf.com/stirlingtools/stirling-pdf:fat
build:
context: ../../../

View File

@@ -1,463 +0,0 @@
# Stirling-PDF Unified Container
Single Docker container that can run as **frontend + backend**, **frontend only**, or **backend only** using the `MODE` environment variable.
## Quick Start
### MODE=BOTH (Default)
Single container with both frontend and backend on port 8080:
```bash
docker run -p 8080:8080 \
-e MODE=BOTH \
stirlingtools/stirling-pdf:unified
```
Access at: `http://localhost:8080`
### MODE=FRONTEND
Frontend only, connecting to separate backend:
```bash
docker run -p 8080:8080 \
-e MODE=FRONTEND \
-e VITE_API_BASE_URL=http://backend:8080 \
stirlingtools/stirling-pdf:unified
```
### MODE=BACKEND
Backend API only:
```bash
docker run -p 8080:8080 \
-e MODE=BACKEND \
stirlingtools/stirling-pdf:unified
```
Access API at: `http://localhost:8080/api`
Swagger UI at: `http://localhost:8080/swagger-ui/index.html`
---
## Architecture
### MODE=BOTH (Default)
```
┌─────────────────────────────────────┐
│ Port 8080 (External) │
│ ┌───────────────────────────────┐ │
│ │ Nginx │ │
│ │ • Serves frontend (/) │ │
│ │ • Proxies /api/* → backend │ │
│ └───────────┬───────────────────┘ │
│ │ │
│ ┌───────────▼───────────────────┐ │
│ │ Backend (Internal 8081) │ │
│ │ • Spring Boot │ │
│ │ • PDF Processing │ │
│ │ • UnoServer │ │
│ └───────────────────────────────┘ │
└─────────────────────────────────────┘
```
### MODE=FRONTEND
```
┌─────────────────────────────┐ ┌──────────────────┐
│ Frontend Container │ │ Backend │
│ Port 8080 │ │ (External) │
│ ┌───────────────────────┐ │ │ │
│ │ Nginx │ │──────▶ :8080/api │
│ │ • Serves frontend │ │ │ │
│ │ • Proxies to backend │ │ │ │
│ └───────────────────────┘ │ └──────────────────┘
└─────────────────────────────┘
```
### MODE=BACKEND
```
┌─────────────────────────────┐
│ Backend Container │
│ Port 8080 │
│ ┌───────────────────────┐ │
│ │ Spring Boot │ │
│ │ • API Endpoints │ │
│ │ • PDF Processing │ │
│ │ • UnoServer │ │
│ └───────────────────────┘ │
└─────────────────────────────┘
```
---
## Environment Variables
### MODE Configuration
| Variable | Values | Default | Description |
|----------|--------|---------|-------------|
| `MODE` | `BOTH`, `FRONTEND`, `BACKEND` | `BOTH` | Container operation mode |
### MODE=BOTH Specific
| Variable | Default | Description |
|----------|---------|-------------|
| `BACKEND_INTERNAL_PORT` | `8081` | Internal port for backend when MODE=BOTH |
### MODE=FRONTEND Specific
| Variable | Default | Description |
|----------|---------|-------------|
| `VITE_API_BASE_URL` | `http://backend:8080` | Backend URL for API proxying |
### Standard Configuration
All modes support standard Stirling-PDF environment variables:
- `DISABLE_ADDITIONAL_FEATURES` - Enable/disable OCR and LibreOffice features
- `DOCKER_ENABLE_SECURITY` - Enable authentication
- `PUID` / `PGID` - User/Group IDs
- `SYSTEM_MAXFILESIZE` - Max upload size (MB)
- `TESSERACT_LANGS` - Comma-separated OCR language codes
- `JAVA_CUSTOM_OPTS` - Additional JVM options
- `PROCESS_EXECUTOR_AUTO_UNO_SERVER` - Overrides `processExecutor.autoUnoServer` (true or false)
- `PROCESS_EXECUTOR_SESSION_LIMIT_LIBRE_OFFICE_SESSION_LIMIT` - Overrides `processExecutor.sessionLimit.libreOfficeSessionLimit`
- `UNO_SERVER_AUTO` - Legacy alias for `processExecutor.autoUnoServer`
- `UNO_SERVER_COUNT` - Legacy alias for `processExecutor.sessionLimit.libreOfficeSessionLimit`
- `UNO_SERVER_HEALTH_INTERVAL` - Seconds between unoserver PID checks (default: 30)
See full configuration docs at: https://docs.stirlingpdf.com
---
## Docker Compose Examples
### Example 1: All-in-One (MODE=BOTH)
**File:** `docker/compose/docker-compose-unified-both.yml`
```yaml
services:
stirling-pdf:
image: stirlingtools/stirling-pdf:unified
ports:
- "8080:8080"
volumes:
- ./data:/usr/share/tessdata:rw
- ./config:/configs:rw
environment:
MODE: BOTH
restart: unless-stopped
```
### Example 2: Separate Frontend & Backend
**File:** `docker/compose/docker-compose-unified-frontend.yml`
```yaml
services:
backend:
image: stirlingtools/stirling-pdf:unified
ports:
- "8081:8080"
environment:
MODE: BACKEND
volumes:
- ./data:/usr/share/tessdata:rw
- ./config:/configs:rw
frontend:
image: stirlingtools/stirling-pdf:unified
ports:
- "8080:8080"
environment:
MODE: FRONTEND
VITE_API_BASE_URL: http://backend:8080
depends_on:
- backend
```
### Example 3: Backend API Only
**File:** `docker/compose/docker-compose-unified-backend.yml`
```yaml
services:
stirling-pdf-api:
image: stirlingtools/stirling-pdf:unified
ports:
- "8080:8080"
environment:
MODE: BACKEND
volumes:
- ./data:/usr/share/tessdata:rw
- ./config:/configs:rw
restart: unless-stopped
```
---
## Building the Image
```bash
# From repository root
docker build -t stirlingtools/stirling-pdf:unified -f docker/Dockerfile.unified .
```
### Build Arguments
| Argument | Description |
|----------|-------------|
| `VERSION_TAG` | Version tag for the image |
Example:
```bash
docker build \
--build-arg VERSION_TAG=v1.0.0 \
-t stirlingtools/stirling-pdf:unified \
-f docker/Dockerfile.unified .
```
---
## Use Cases
### 1. Simple Deployment (MODE=BOTH)
- **Best for:** Personal use, small teams, simple deployments
- **Pros:** Single container, easy setup, minimal configuration
- **Cons:** Frontend and backend scale together
### 2. Scaled Frontend (MODE=FRONTEND + BACKEND)
- **Best for:** High traffic, need to scale frontend independently
- **Pros:** Scale frontend containers separately, CDN-friendly
- **Example:**
```yaml
services:
backend:
image: stirlingtools/stirling-pdf:unified
environment:
MODE: BACKEND
deploy:
replicas: 1
frontend:
image: stirlingtools/stirling-pdf:unified
environment:
MODE: FRONTEND
VITE_API_BASE_URL: http://backend:8080
deploy:
replicas: 5 # Scale frontend independently
```
### 3. API-Only (MODE=BACKEND)
- **Best for:** Headless deployments, custom frontends, API integrations
- **Pros:** Minimal resources, no nginx overhead
- **Example:** Use with external frontend or API consumers
### 4. Multi-Backend Setup
- **Best for:** Load balancing, high availability
- **Example:**
```yaml
services:
backend-1:
image: stirlingtools/stirling-pdf:unified
environment:
MODE: BACKEND
backend-2:
image: stirlingtools/stirling-pdf:unified
environment:
MODE: BACKEND
frontend:
image: stirlingtools/stirling-pdf:unified
environment:
MODE: FRONTEND
VITE_API_BASE_URL: http://load-balancer:8080
```
---
## Port Configuration
All modes use **port 8080** by default:
- **MODE=BOTH**: Nginx listens on 8080, proxies to backend on internal 8081
- **MODE=FRONTEND**: Nginx listens on 8080
- **MODE=BACKEND**: Spring Boot listens on 8080
**Expose port 8080** in all configurations:
```yaml
ports:
- "8080:8080"
```
---
## Health Checks
### MODE=BOTH and MODE=BACKEND
```yaml
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8080/api/v1/info/status || exit 1"]
interval: 30s
timeout: 10s
retries: 3
```
### MODE=FRONTEND
```yaml
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8080/ || exit 1"]
interval: 30s
timeout: 10s
retries: 3
```
---
## Troubleshooting
### Check logs
```bash
docker logs stirling-pdf-container
```
Look for the startup banner:
```
===================================
Stirling-PDF Unified Container
MODE: BOTH
===================================
```
### Invalid MODE error
```
ERROR: Invalid MODE 'XYZ'. Must be BOTH, FRONTEND, or BACKEND
```
**Fix:** Set `MODE` to one of the three valid values.
### Frontend can't connect to backend (MODE=FRONTEND)
**Check:**
1. `VITE_API_BASE_URL` points to correct backend URL
2. Backend container is running and accessible
3. Network connectivity between containers
### Backend not starting (MODE=BOTH or BACKEND)
**Check:**
1. Sufficient memory allocated (4GB recommended)
2. Java heap size (`JAVA_CUSTOM_OPTS`)
3. Volume permissions for `/tmp/stirling-pdf`
---
## Migration Guide
### From Separate Containers → MODE=BOTH
**Before:**
```yaml
services:
frontend:
image: stirlingtools/stirling-pdf:frontend
ports: ["80:80"]
backend:
image: stirlingtools/stirling-pdf:backend
ports: ["8080:8080"]
```
**After:**
```yaml
services:
stirling-pdf:
image: stirlingtools/stirling-pdf:unified
ports: ["8080:8080"]
environment:
MODE: BOTH
```
### From Legacy → MODE=BACKEND
```yaml
services:
stirling-pdf:
image: stirlingtools/stirling-pdf:latest
ports: ["8080:8080"]
```
**Becomes:**
```yaml
services:
stirling-pdf:
image: stirlingtools/stirling-pdf:unified
ports: ["8080:8080"]
environment:
MODE: BACKEND
```
---
## Performance Tuning
### MODE=BOTH
```yaml
environment:
JAVA_CUSTOM_OPTS: "-Xmx4g -XX:MaxRAMPercentage=75"
BACKEND_INTERNAL_PORT: 8081
deploy:
resources:
limits:
memory: 4G
reservations:
memory: 2G
```
### MODE=FRONTEND (Lightweight)
```yaml
deploy:
resources:
limits:
memory: 512M
reservations:
memory: 256M
```
### MODE=BACKEND (Heavy Processing)
```yaml
environment:
JAVA_CUSTOM_OPTS: "-Xmx8g"
deploy:
resources:
limits:
memory: 10G
reservations:
memory: 4G
```
---
## Security Considerations
1. **MODE=BOTH**: Backend not exposed externally (runs on internal port)
2. **MODE=BACKEND**: API exposed directly - consider API authentication
3. **MODE=FRONTEND**: Only serves static files - minimal attack surface
Enable security features:
```yaml
environment:
DOCKER_ENABLE_SECURITY: "true"
SECURITY_ENABLELOGIN: "true"
```
---
## Support
- Documentation: https://docs.stirlingpdf.com
- GitHub Issues: https://github.com/Stirling-Tools/Stirling-PDF/issues
- Docker Hub: https://hub.docker.com/r/stirlingtools/stirling-pdf
---
## License
MIT License - See repository for full details

View File

@@ -1,38 +0,0 @@
#!/bin/bash
# Build script for Stirling-PDF Unified Container
# Usage: ./build.sh [version-tag]
set -e
VERSION_TAG=${1:-latest}
IMAGE_NAME="stirlingtools/stirling-pdf:unified-${VERSION_TAG}"
echo "==================================="
echo "Building Stirling-PDF Unified Container"
echo "Version: $VERSION_TAG"
echo "Image: $IMAGE_NAME"
echo "==================================="
# Navigate to repository root (assuming script is in docker/unified/)
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
REPO_ROOT="$SCRIPT_DIR/../.."
cd "$REPO_ROOT"
# Build the image
docker build \
--build-arg VERSION_TAG="$VERSION_TAG" \
-t "$IMAGE_NAME" \
-f docker/Dockerfile.unified \
.
echo "==================================="
echo "✓ Build complete!"
echo "Image: $IMAGE_NAME"
echo ""
echo "Test the image:"
echo " MODE=BOTH: docker run -p 8080:8080 -e MODE=BOTH $IMAGE_NAME"
echo " MODE=FRONTEND: docker run -p 8080:8080 -e MODE=FRONTEND $IMAGE_NAME"
echo " MODE=BACKEND: docker run -p 8080:8080 -e MODE=BACKEND $IMAGE_NAME"
echo "==================================="

View File

@@ -1,379 +0,0 @@
#!/bin/bash
set -e
# Default MODE to BOTH if not set
MODE=${MODE:-BOTH}
echo "==================================="
echo "Stirling-PDF Unified Container"
echo "MODE: $MODE"
echo "==================================="
# Function to setup OCR (from init.sh)
setup_ocr() {
echo "Setting up OCR languages..."
# In Alpine, tesseract uses /usr/share/tessdata
TESSDATA_DIR="/usr/share/tessdata"
# Create tessdata directory
mkdir -p "$TESSDATA_DIR"
# Restore system languages from backup (Dockerfile moved them to tessdata-original)
if [ -d /usr/share/tessdata-original ]; then
echo "Restoring system tessdata from backup..."
cp -rn /usr/share/tessdata-original/* "$TESSDATA_DIR"/ 2>/dev/null || true
fi
# Note: If user mounted custom languages to /usr/share/tessdata, they'll be overlaid here.
# The cp -rn above won't overwrite user files, just adds missing system files.
# Install additional languages if specified
if [ -n "$TESSERACT_LANGS" ]; then
SPACE_SEPARATED_LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')
for LANG in $SPACE_SEPARATED_LANGS; do
case "$LANG" in
[a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z][a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z]_[a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z][a-zA-Z]_[a-zA-Z][a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z][a-zA-Z][a-zA-Z]_[a-zA-Z][a-zA-Z][a-zA-Z][a-zA-Z])
apk add --no-cache "tesseract-ocr-data-$LANG" 2>/dev/null || true
;;
esac
done
fi
# Point to the consolidated location
export TESSDATA_PREFIX="$TESSDATA_DIR"
echo "Using TESSDATA_PREFIX=$TESSDATA_PREFIX"
}
# Function to setup user permissions (from init-without-ocr.sh)
setup_permissions() {
echo "Setting up user permissions..."
export JAVA_TOOL_OPTIONS="${JAVA_BASE_OPTS} ${JAVA_CUSTOM_OPTS}"
# Update user and group IDs
if [ ! -z "$PUID" ] && [ "$PUID" != "$(id -u stirlingpdfuser)" ]; then
usermod -o -u "$PUID" stirlingpdfuser || true
fi
if [ ! -z "$PGID" ] && [ "$PGID" != "$(getent group stirlingpdfgroup | cut -d: -f3)" ]; then
groupmod -o -g "$PGID" stirlingpdfgroup || true
fi
umask "$UMASK" || true
# Install fonts if needed
if [[ -n "$LANGS" ]]; then
/scripts/installFonts.sh $LANGS
fi
# Ensure directories exist with correct permissions
mkdir -p /tmp/stirling-pdf || true
# Set ownership and permissions
chown -R stirlingpdfuser:stirlingpdfgroup \
$HOME /logs /scripts /usr/share/fonts/opentype/noto \
/configs /customFiles /pipeline /tmp/stirling-pdf \
/var/lib/nginx /var/log/nginx /usr/share/nginx \
/app.jar 2>/dev/null || echo "[WARN] Some chown operations failed, may run as host user"
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto \
/configs /customFiles /pipeline /tmp/stirling-pdf 2>/dev/null || true
}
# Function to configure nginx
configure_nginx() {
local backend_url=$1
echo "Configuring nginx with backend URL: $backend_url"
sed -i "s|\${BACKEND_URL}|${backend_url}|g" /etc/nginx/nginx.conf
}
# Function to run as user or root depending on permissions
run_as_user() {
if [ "$(id -u)" = "0" ]; then
# Running as root, use su-exec
su-exec stirlingpdfuser "$@"
else
# Already running as non-root
exec "$@"
fi
}
run_with_timeout() {
local secs=$1; shift
if command -v timeout >/dev/null 2>&1; then
timeout "${secs}s" "$@"
else
"$@"
fi
}
run_as_user_with_timeout() {
local secs=$1; shift
if command -v timeout >/dev/null 2>&1; then
run_as_user timeout "${secs}s" "$@"
else
run_as_user "$@"
fi
}
tcp_port_check() {
local host=$1
local port=$2
local timeout_secs=${3:-5}
# Try nc first (most portable)
if command -v nc >/dev/null 2>&1; then
run_with_timeout "$timeout_secs" nc -z "$host" "$port" 2>/dev/null
return $?
fi
# Fallback to /dev/tcp (bash-specific)
if [ -n "${BASH_VERSION:-}" ] && command -v bash >/dev/null 2>&1; then
run_with_timeout "$timeout_secs" bash -c "exec 3<>/dev/tcp/${host}/${port}" 2>/dev/null
local result=$?
exec 3>&- 2>/dev/null || true
return $result
fi
# No TCP check method available
return 2
}
CONFIG_FILE=${CONFIG_FILE:-/configs/settings.yml}
UNOSERVER_PIDS=()
UNOSERVER_PORTS=()
UNOSERVER_UNO_PORTS=()
read_setting_value() {
local key=$1
if [ ! -f "$CONFIG_FILE" ]; then
return
fi
awk -F: -v key="$key" '
$1 ~ "^[[:space:]]*"key"[[:space:]]*$" {
val=$2
sub(/#.*/, "", val)
gsub(/^[[:space:]]+|[[:space:]]+$/, "", val)
gsub(/^["'"'"']|["'"'"']$/, "", val)
print val
exit
}
' "$CONFIG_FILE"
}
get_unoserver_auto() {
if [ -n "${PROCESS_EXECUTOR_AUTO_UNO_SERVER:-}" ]; then
echo "$PROCESS_EXECUTOR_AUTO_UNO_SERVER"
return
fi
if [ -n "${UNO_SERVER_AUTO:-}" ]; then
echo "$UNO_SERVER_AUTO"
return
fi
read_setting_value "autoUnoServer"
}
get_unoserver_count() {
if [ -n "${PROCESS_EXECUTOR_SESSION_LIMIT_LIBRE_OFFICE_SESSION_LIMIT:-}" ]; then
echo "$PROCESS_EXECUTOR_SESSION_LIMIT_LIBRE_OFFICE_SESSION_LIMIT"
return
fi
if [ -n "${UNO_SERVER_COUNT:-}" ]; then
echo "$UNO_SERVER_COUNT"
return
fi
read_setting_value "libreOfficeSessionLimit"
}
start_unoserver_instance() {
local port=$1
local uno_port=$2
run_as_user /opt/venv/bin/unoserver --port "$port" --interface 127.0.0.1 --uno-port "$uno_port" &
LAST_UNOSERVER_PID=$!
}
start_unoserver_watchdog() {
local interval=${UNO_SERVER_HEALTH_INTERVAL:-30}
case "$interval" in
''|*[!0-9]*) interval=30 ;;
esac
(
while true; do
local i=0
while [ "$i" -lt "${#UNOSERVER_PIDS[@]}" ]; do
local pid=${UNOSERVER_PIDS[$i]}
local port=${UNOSERVER_PORTS[$i]}
local uno_port=${UNOSERVER_UNO_PORTS[$i]}
local needs_restart=false
# Check 1: PID exists
if [ -z "$pid" ] || ! kill -0 "$pid" 2>/dev/null; then
echo "unoserver PID ${pid} not found for port ${port}"
needs_restart=true
else
# PID exists, now check if server is actually healthy
local health_ok=false
# Check 2A: Health check with unoping (best - checks actual server health)
if command -v unoping >/dev/null 2>&1; then
if run_as_user_with_timeout 5 unoping --host 127.0.0.1 --port "$port" >/dev/null 2>&1; then
health_ok=true
else
echo "unoserver health check failed (unoping) for port ${port}, trying TCP fallback"
fi
fi
# Check 2B: Fallback to TCP port check (verifies service is listening)
if [ "$health_ok" = false ]; then
tcp_port_check "127.0.0.1" "$port" 5
local tcp_rc=$?
if [ $tcp_rc -eq 0 ]; then
health_ok=true
elif [ $tcp_rc -eq 2 ]; then
echo "No TCP check available; falling back to PID-only for port ${port}"
health_ok=true
else
echo "unoserver TCP check failed for port ${port}"
needs_restart=true
fi
fi
fi
if [ "$needs_restart" = true ]; then
echo "Restarting unoserver on 127.0.0.1:${port} (uno-port ${uno_port})"
# Kill the old process if it exists
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
kill -TERM "$pid" 2>/dev/null || true
sleep 1
kill -KILL "$pid" 2>/dev/null || true
fi
start_unoserver_instance "$port" "$uno_port"
UNOSERVER_PIDS[$i]=$LAST_UNOSERVER_PID
fi
i=$((i + 1))
done
sleep "$interval"
done
) &
}
start_unoserver_pool() {
local auto
auto="$(get_unoserver_auto)"
auto="${auto,,}"
if [ -z "$auto" ]; then
auto="true"
fi
if [ "$auto" != "true" ]; then
echo "Skipping local unoserver pool (autoUnoServer=$auto)"
return
fi
local count
count="$(get_unoserver_count)"
case "$count" in
''|*[!0-9]*) count=1 ;;
esac
if [ "$count" -le 0 ]; then
count=1
fi
local i=0
while [ "$i" -lt "$count" ]; do
local port=$((2003 + (i * 2)))
local uno_port=$((2004 + (i * 2)))
echo "Starting unoserver on 127.0.0.1:${port} (uno-port ${uno_port})"
UNOSERVER_PORTS+=("$port")
UNOSERVER_UNO_PORTS+=("$uno_port")
start_unoserver_instance "$port" "$uno_port"
UNOSERVER_PIDS+=("$LAST_UNOSERVER_PID")
i=$((i + 1))
done
start_unoserver_watchdog
}
# Setup OCR and permissions
setup_ocr
setup_permissions
# Handle different modes
case "$MODE" in
BOTH)
echo "Starting in BOTH mode: Frontend + Backend on port 8080"
# Configure nginx to proxy to internal backend
configure_nginx "http://localhost:${BACKEND_INTERNAL_PORT:-8081}"
# Start backend on internal port
echo "Starting backend on port ${BACKEND_INTERNAL_PORT:-8081}..."
run_as_user sh -c "java -Dfile.encoding=UTF-8 \
-Djava.io.tmpdir=/tmp/stirling-pdf \
-Dserver.port=${BACKEND_INTERNAL_PORT:-8081} \
-jar /app.jar" &
BACKEND_PID=$!
# Start unoserver pool for document conversion
start_unoserver_pool
# Wait for backend to start
sleep 3
# Start nginx on port 8080
echo "Starting nginx on port 8080..."
run_as_user nginx -g "daemon off;" &
NGINX_PID=$!
echo "==================================="
echo "✓ Frontend available at: http://localhost:8080"
echo "✓ Backend API at: http://localhost:8080/api"
echo "✓ Backend running internally on port ${BACKEND_INTERNAL_PORT:-8081}"
echo "==================================="
;;
FRONTEND)
echo "Starting in FRONTEND mode: Frontend only on port 8080"
# Configure nginx with external backend URL
BACKEND_URL=${VITE_API_BASE_URL:-http://backend:8080}
configure_nginx "$BACKEND_URL"
# Start nginx on port 8080
echo "Starting nginx on port 8080..."
run_as_user nginx -g "daemon off;" &
NGINX_PID=$!
echo "==================================="
echo "✓ Frontend available at: http://localhost:8080"
echo "✓ Proxying API calls to: $BACKEND_URL"
echo "==================================="
;;
BACKEND)
echo "Starting in BACKEND mode: Backend only on port 8080"
# Start backend on port 8080
echo "Starting backend on port 8080..."
run_as_user sh -c "java -Dfile.encoding=UTF-8 \
-Djava.io.tmpdir=/tmp/stirling-pdf \
-Dserver.port=8080 \
-jar /app.jar" &
BACKEND_PID=$!
start_unoserver_pool
echo "==================================="
echo "✓ Backend API available at: http://localhost:8080/api"
echo "✓ Swagger UI at: http://localhost:8080/swagger-ui/index.html"
echo "==================================="
;;
*)
echo "ERROR: Invalid MODE '$MODE'. Must be BOTH, FRONTEND, or BACKEND"
exit 1
;;
esac
# Wait for all background processes
wait

View File

@@ -1,121 +0,0 @@
# Run nginx as non-root user
pid /tmp/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
# Add .mjs MIME type mapping
types {
text/javascript mjs;
}
# Gzip compression
gzip on;
gzip_vary on;
gzip_min_length 1024;
gzip_types text/plain text/css text/xml text/javascript application/javascript application/xml+rss application/json;
server {
listen 8080;
server_name _;
root /usr/share/nginx/html;
index index.html index.htm;
# Global settings for file uploads
client_max_body_size 100m;
# Handle client-side routing - support subpaths
location / {
try_files $uri $uri/ /index.html;
}
# Proxy API calls to backend
location /api/ {
proxy_pass ${BACKEND_URL}/api/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
# Additional headers for proper API proxying
proxy_set_header Connection '';
proxy_http_version 1.1;
proxy_buffering off;
proxy_cache off;
# Timeout settings for large file uploads
proxy_connect_timeout 60s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
# Request size limits for file uploads
client_max_body_size 100m;
proxy_request_buffering off;
}
# Proxy Swagger UI to backend (including versioned paths)
location ~ ^/swagger-ui(.*)$ {
proxy_pass ${BACKEND_URL}/swagger-ui$1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header Connection '';
proxy_http_version 1.1;
proxy_buffering off;
proxy_cache off;
}
# Proxy API docs to backend (with query parameters and sub-paths)
location ~ ^/v3/api-docs(.*)$ {
proxy_pass ${BACKEND_URL}/v3/api-docs$1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
}
# Proxy v1 API docs to backend (with query parameters and sub-paths)
location ~ ^/v1/api-docs(.*)$ {
proxy_pass ${BACKEND_URL}/v1/api-docs$1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
}
# Serve .mjs files with correct MIME type (must come before general static assets)
location ~* \.mjs$ {
try_files $uri =404;
add_header Content-Type "text/javascript; charset=utf-8" always;
expires 1y;
add_header Cache-Control "public, immutable";
}
# Cache static assets (but not API endpoints)
location ~* ^(?!/api/).*\.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
expires 1y;
add_header Cache-Control "public, immutable";
}
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
add_header X-XSS-Protection "1; mode=block" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
}
}

View File

@@ -16,30 +16,30 @@ security:
loginResetTimeMinutes: 120 # lock account for 2 hours after x attempts
loginMethod: all # Accepts values like 'all' and 'normal'(only Login with Username/Password), 'oauth2'(only Login with OAuth2) or 'saml2'(only Login with SAML2)
initialLogin:
username: '' # initial username for the first login
password: '' # initial password for the first login
username: "" # initial username for the first login
password: "" # initial password for the first login
oauth2:
enabled: false # set to 'true' to enable login (Note: enableLogin must also be 'true' for this to work)
client:
keycloak:
issuer: '' # URL of the Keycloak realm's OpenID Connect Discovery endpoint
clientId: '' # client ID for Keycloak OAuth2
clientSecret: '' # client secret for Keycloak OAuth2
issuer: "" # URL of the Keycloak realm's OpenID Connect Discovery endpoint
clientId: "" # client ID for Keycloak OAuth2
clientSecret: "" # client secret for Keycloak OAuth2
scopes: openid, profile, email # scopes for Keycloak OAuth2
useAsUsername: preferred_username # field to use as the username for Keycloak OAuth2. Available options are: [email | name | given_name | family_name | preferred_name]
google:
clientId: '' # client ID for Google OAuth2
clientSecret: '' # client secret for Google OAuth2
clientId: "" # client ID for Google OAuth2
clientSecret: "" # client secret for Google OAuth2
scopes: email, profile # scopes for Google OAuth2
useAsUsername: email # field to use as the username for Google OAuth2. Available options are: [email | name | given_name | family_name]
github:
clientId: '' # client ID for GitHub OAuth2
clientSecret: '' # client secret for GitHub OAuth2
clientId: "" # client ID for GitHub OAuth2
clientSecret: "" # client secret for GitHub OAuth2
scopes: read:user # scope for GitHub OAuth2
useAsUsername: login # field to use as the username for GitHub OAuth2. Available options are: [email | login | name]
issuer: '' # set to any Provider that supports OpenID Connect Discovery (/.well-known/openid-configuration) endpoint
clientId: '' # client ID from your Provider
clientSecret: '' # client secret from your Provider
issuer: "" # set to any Provider that supports OpenID Connect Discovery (/.well-known/openid-configuration) endpoint
clientId: "" # client ID from your Provider
clientSecret: "" # client secret from your Provider
autoCreateUser: true # set to 'true' to allow auto-creation of non-existing users
blockRegistration: false # set to 'true' to deny login with SSO without prior registration by an admin
useAsUsername: email # default is 'email'; custom fields can be used as the username
@@ -47,21 +47,27 @@ security:
provider: google # set this to your OAuth Provider's name, e.g., 'google' or 'keycloak'
saml2:
enabled: false # Only enabled for paid enterprise clients (enterpriseEdition.enabled must be true)
provider: '' # The name of your Provider
provider: "" # The name of your Provider
autoCreateUser: true # set to 'true' to allow auto-creation of non-existing users
blockRegistration: false # set to 'true' to deny login with SSO without prior registration by an admin
registrationId: stirling # The name of your Service Provider (SP) app name. Should match the name in the path for your SSO & SLO URLs
idpMetadataUri: https://dev-XXXXXXXX.okta.com/app/externalKey/sso/saml/metadata # The uri for your Provider's metadata
idpSingleLoginUrl: https://dev-XXXXXXXX.okta.com/app/dev-XXXXXXXX_stirlingpdf_1/externalKey/sso/saml # The URL for initiating SSO. Provided by your Provider
idpSingleLogoutUrl: https://dev-XXXXXXXX.okta.com/app/dev-XXXXXXXX_stirlingpdf_1/externalKey/slo/saml # The URL for initiating SLO. Provided by your Provider
idpIssuer: '' # The ID of your Provider
idpIssuer: "" # The ID of your Provider
idpCert: classpath:okta.cert # The certificate your Provider will use to authenticate your app's SAML authentication requests. Provided by your Provider
privateKey: classpath:saml-private-key.key # Your private key. Generated from your keypair
spCert: classpath:saml-public-cert.crt # Your signing certificate. Generated from your keypair
# IMPORTANT: For SAML setup, download your SP metadata from the BACKEND URL: http://localhost:8080/saml2/service-provider-metadata/{registrationId}
# Do NOT use the frontend dev server URL (localhost:5173) as it will generate incorrect ACS URLs. Always use the backend URL (localhost:8080) for SAML configuration.
jwt: # This feature is currently under development and not yet fully supported. Do not use in production.
persistence: true # Set to 'true' to enable JWT key store
enableKeyRotation: true # Set to 'true' to enable key pair rotation
enableKeyCleanup: true # Set to 'true' to enable key pair cleanup
tokenExpiryMinutes: 1440 # JWT access token lifetime in minutes for web clients (1 day).
desktopTokenExpiryMinutes: 43200 # JWT access token lifetime in minutes for desktop clients (30 days).
allowedClockSkewSeconds: 60 # Allowed JWT validation clock skew in seconds to tolerate small client/server time drift.
refreshGraceMinutes: 15 # Allow refresh using an expired access token only within this many minutes after expiry.
validation: # PDF signature validation settings
trust:
serverAsAnchor: true # Trust server certificate as anchor for PDF signatures (if configured and self-signed or CA)
@@ -78,6 +84,7 @@ security:
revocation:
mode: none # Revocation checking mode: 'none' (disabled), 'ocsp' (OCSP only), 'crl' (CRL only), 'ocsp+crl' (OCSP with CRL fallback)
hardFail: false # Fail validation if revocation status cannot be determined (true=strict, false=soft-fail)
xFrameOptions: DENY # X-Frame-Options header value. Options: 'DENY' (default, prevents all framing), 'SAMEORIGIN' (allows framing from same domain), 'DISABLED' (no X-Frame-Options header sent). Note: automatically set to DISABLED when login is disabled
premium:
key: 00000000-0000-0000-0000-000000000000
@@ -89,11 +96,19 @@ premium:
author: username
creator: Stirling-PDF
producer: Stirling-PDF
googleDrive:
enabled: false # Enable Google Drive file picker integration
clientId: "" # Google OAuth 2.0 client ID (obtain from Google Cloud Console)
apiKey: "" # Google API key for Google Picker API (obtain from Google Cloud Console)
appId: "" # Google Drive app ID
enterpriseFeatures:
audit:
enabled: true # Enable audit logging
level: 2 # Audit logging level: 0=OFF, 1=BASIC, 2=STANDARD, 3=VERBOSE
retentionDays: 90 # Number of days to retain audit logs
enabled: true # Enable audit logging for security and compliance tracking
level: 2 # Audit logging level: 0=OFF, 1=BASIC (compress/split/merge/etc and settings), 2=STANDARD (BASIC + user actions, excludes polling), 3=VERBOSE (everything including polling).
retentionDays: 90 # Number of days to retain audit logs (0 or negative = infinite retention)
captureFileHash: false # Capture SHA-256 hash of uploaded/processed files. Warning: adds 50-200ms per file depending on size. Only enabled independently of audit level.
capturePdfAuthor: false # Capture author metadata from PDF documents. Warning: requires PDF parsing which increases processing time. Only enabled independently of audit level.
captureOperationResults: false # Capture operation return values and responses in audit log. Warning: not recommended, significantly increases log volume and disk usage. Use only for debugging.
databaseNotifications:
backups:
successful: false # set to 'true' to enable email notifications for successful database backups
@@ -107,21 +122,45 @@ mail:
enableInvites: false # set to 'true' to enable email invites for user management (requires mail.enabled and security.enableLogin)
host: smtp.example.com # SMTP server hostname
port: 587 # SMTP server port
username: '' # SMTP server username
password: '' # SMTP server password
from: '' # sender email address
username: "" # SMTP server username
password: "" # SMTP server password
from: "" # sender email address
startTlsEnable: true # enable STARTTLS (explicit TLS upgrade after connecting) when supported by the SMTP server
startTlsRequired: false # require STARTTLS; connection fails if the upgrade command is not supported
sslEnable: false # enable SSL/TLS wrapper for implicit TLS (typically used with port 465)
sslTrust: '' # optional trusted host override, e.g. "smtp.example.com" or "*"; defaults to "*" (trust all) when empty
sslTrust: "" # optional trusted host override, e.g. "smtp.example.com" or "*"; defaults to "*" (trust all) when empty
sslCheckServerIdentity: false # enable hostname verification when using SSL/TLS
telegram:
enabled: false # set to 'true' to enable Telegram bot integration
botToken: "" # Telegram bot token obtained from BotFather
botUsername: "" # Telegram bot username (without @)
pipelineInboxFolder: telegram # Name of the pipeline inbox folder for Telegram uploads
customFolderSuffix: true # set to 'true' to allow users to specify custom target folders via UserID
enableAllowUserIDs: true # set to 'true' to restrict access to specific Telegram user IDs
allowUserIDs: [] # List of allowed Telegram user IDs (e.g. [123456789, 987654321]). Leave empty to allow all users.
enableAllowChannelIDs: true # set to 'true' to restrict access to specific Telegram channel IDs
allowChannelIDs: [] # List of allowed Telegram channel IDs (e.g. [-1001234567890, -1009876543210]). Leave empty to allow all channels.
processingTimeoutSeconds: 180 # Maximum time in seconds to wait for processing a Telegram request
pollingIntervalMillis: 2000 # Interval in milliseconds between polling for new messages
feedback:
channel:
noValidDocument: true # set to 'false' to hide/suppress feedback messages in channels (to avoid spam)
errorProcessing: true # set to 'false' to hide/suppress feedback messages in channels (to avoid spam)
errorMessage: true # set to 'false' to hide/suppress error messages in channels (to avoid spam)
processing: true # set to 'false' to hide/suppress processing messages in channels (to avoid spam)
user:
noValidDocument: true # set to 'false' to hide/suppress feedback messages to users (to avoid spam)
errorProcessing: true # set to 'false' to hide/suppress feedback messages to users (to avoid spam)
errorMessage: true # set to 'false' to hide/suppress error messages to users (to avoid spam)
processing: true # set to 'false' to hide/suppress processing messages to users (to avoid spam)
legal:
termsAndConditions: https://www.stirlingpdf.com/terms # URL to the terms and conditions of your application (e.g. https://example.com/terms). Empty string to disable or filename to load from local file in static folder
privacyPolicy: https://www.stirlingpdf.com/privacy-policy # URL to the privacy policy of your application (e.g. https://example.com/privacy). Empty string to disable or filename to load from local file in static folder
accessibilityStatement: '' # URL to the accessibility statement of your application (e.g. https://example.com/accessibility). Empty string to disable or filename to load from local file in static folder
cookiePolicy: '' # URL to the cookie policy of your application (e.g. https://example.com/cookie). Empty string to disable or filename to load from local file in static folder
impressum: '' # URL to the impressum of your application (e.g. https://example.com/impressum). Empty string to disable or filename to load from local file in static folder
accessibilityStatement: "" # URL to the accessibility statement of your application (e.g. https://example.com/accessibility). Empty string to disable or filename to load from local file in static folder
cookiePolicy: "" # URL to the cookie policy of your application (e.g. https://example.com/cookie). Empty string to disable or filename to load from local file in static folder
impressum: "" # URL to the impressum of your application (e.g. https://example.com/impressum). Empty string to disable or filename to load from local file in static folder
system:
defaultLocale: en-US # set the default language (e.g. 'de-DE', 'fr-FR', etc)
@@ -129,16 +168,25 @@ system:
enableAlphaFunctionality: false # set to enable functionality which might need more testing before it fully goes live (this feature might make no changes)
showUpdate: false # see when a new update is available
showUpdateOnlyAdmin: false # only admins can see when a new update is available, depending on showUpdate it must be set to 'true'
showSettingsWhenNoLogin: true # set to 'false' to hide settings button when login is disabled (enableLogin: false). Only applies when login is disabled.
customHTMLFiles: false # enable to have files placed in /customFiles/templates override the existing template HTML files
tessdataDir: /usr/share/tessdata # path to the directory containing the Tessdata files. This setting is relevant for Windows systems. For Windows users, this path should be adjusted to point to the appropriate directory where the Tessdata files are stored.
enableAnalytics: true # Master toggle for analytics: set to 'true' to enable all analytics, 'false' to disable all analytics, or leave as 'null' to prompt admin on first launch
enableDesktopInstallSlide: true # Set to 'false' to hide the desktop app installation slide in the onboarding flow
enablePosthog: null # Enable PostHog analytics (open-source product analytics): set to 'true' to enable, 'false' to disable, or 'null' to enable by default when analytics is enabled
enableScarf: null # Enable Scarf tracking pixel: set to 'true' to enable, 'false' to disable, or 'null' to enable by default when analytics is enabled
enableUrlToPDF: false # Set to 'true' to enable URL to PDF, INTERNAL ONLY, known security issues, should not be used externally
disableSanitize: false # set to true to disable Sanitize HTML; (can lead to injections in HTML)
maxDPI: 500 # Maximum allowed DPI for PDF to image conversion
corsAllowedOrigins: [] # List of allowed origins for CORS (e.g. ['http://localhost:5173', 'https://app.example.com']). Leave empty to disable CORS.
frontendUrl: '' # Base URL for frontend (e.g. 'https://pdf.example.com'). Used for generating invite links in emails. If empty, falls back to backend URL.
corsAllowedOrigins: [] # List of allowed origins for CORS (e.g. ['http://localhost:5173', 'https://app.example.com']). Leave empty to disable CORS. For local development with frontend on port 5173, add 'http://localhost:5173'
backendUrl: "" # Backend base URL for SAML/OAuth/API callbacks (e.g. 'http://localhost:8080' for dev, 'https://api.example.com' for production). REQUIRED for SSO authentication to work correctly. This is where your IdP will send SAML responses and OAuth callbacks. Leave empty to default to 'http://localhost:8080' in development.
frontendUrl: "" # Frontend URL for invite email links (e.g. 'https://app.example.com'). Optional - if not set, will use backendUrl. This is the URL users click in invite emails.
enableMobileScanner: true # Enable mobile phone QR code upload feature. Requires frontendUrl to be configured.
mobileScannerSettings:
convertToPdf: true # Automatically convert uploaded images to PDF format. If false, images are kept as-is.
imageResolution: full # Image resolution for mobile uploads: 'full' (original size) or 'reduced' (max 1200px on longest side). Only applies when convertToPdf is true.
pageFormat: A4 # Page format for converted PDFs: 'keep' (original image dimensions), 'A4' (A4 page size), or 'letter' (US Letter page size). Only applies when convertToPdf is true.
stretchToFit: false # Whether to stretch images to fill the entire page (may distort aspect ratio). If false, images are centered with preserved aspect ratio. Only applies when convertToPdf is true.
serverCertificate:
enabled: true # Enable server-side certificate for "Sign with Stirling-PDF" option
organizationName: Stirling-PDF # Organization name for generated certificates
@@ -150,14 +198,14 @@ system:
level: MEDIUM # Security level: MAX (whitelist only), MEDIUM (block internal networks), OFF (no restrictions)
allowedDomains: [] # Whitelist of allowed domains (e.g. ['cdn.example.com', 'images.google.com'])
blockedDomains: [] # Additional domains to block (e.g. ['evil.com', 'malicious.org'])
internalTlds: [.local, .internal, .corp, .home] # Block domains with these TLD patterns
internalTlds: [".local", ".internal", ".corp", ".home"] # Block domains with these TLD patterns
blockPrivateNetworks: true # Block RFC 1918 private networks (10.x.x.x, 192.168.x.x, 172.16-31.x.x)
blockLocalhost: true # Block localhost and loopback addresses (127.x.x.x, ::1)
blockLinkLocal: true # Block link-local addresses (169.254.x.x, fe80::/10)
blockCloudMetadata: true # Block cloud provider metadata endpoints (169.254.169.254)
datasource:
enableCustomDatabase: false # Enterprise users ONLY, set this property to 'true' if you would like to use your own custom database configuration
customDatabaseUrl: '' # eg jdbc:postgresql://localhost:5432/postgres, set the url for your own custom database connection. If provided, the type, hostName, port and name are not necessary and will not be used
customDatabaseUrl: "" # eg jdbc:postgresql://localhost:5432/postgres, set the url for your own custom database connection. If provided, the type, hostName, port and name are not necessary and will not be used
username: postgres # set the database username
password: postgres # set the database password
type: postgresql # the type of the database to set (e.g. 'h2', 'postgresql')
@@ -166,52 +214,42 @@ system:
name: postgres # set the name of your database. Should match the name of the database you create
customPaths:
pipeline:
watchedFoldersDir: '' # Defaults to /pipeline/watchedFolders
finishedFoldersDir: '' # Defaults to /pipeline/finishedFolders
pipelineDir: "" # Defaults to /pipeline
watchedFoldersDir: "" # Defaults to /pipeline/watchedFolders
watchedFoldersDirs: [] # List of watched folder directories. Defaults to watchedFoldersDir or /pipeline/watchedFolders.
finishedFoldersDir: "" # Defaults to /pipeline/finishedFolders
operations:
weasyprint: '' # Defaults to /opt/venv/bin/weasyprint
unoconvert: '' # Defaults to /opt/venv/bin/unoconvert
calibre: '' # Defaults to /usr/bin/ebook-convert
ocrmypdf: '' # Defaults to /usr/bin/ocrmypdf
soffice: '' # Defaults to /usr/bin/soffice
fileUploadLimit: '' # Defaults to "". No limit when string is empty. Set a number, between 0 and 999, followed by one of the following strings to set a limit. "KB", "MB", "GB".
weasyprint: "" # Defaults to /opt/venv/bin/weasyprint
unoconvert: "" # Defaults to /opt/venv/bin/unoconvert
calibre: "" # Defaults to /usr/bin/ebook-convert
ocrmypdf: "" # Defaults to /usr/bin/ocrmypdf
soffice: "" # Defaults to /usr/bin/soffice
fileUploadLimit: "" # Defaults to "". No limit when string is empty. Set a number, between 0 and 999, followed by one of the following strings to set a limit. "KB", "MB", "GB".
tempFileManagement:
baseTmpDir: '' # Defaults to java.io.tmpdir/stirling-pdf
libreofficeDir: '' # Defaults to tempFileManagement.baseTmpDir/libreoffice
systemTempDir: '' # Only used if cleanupSystemTemp is true
baseTmpDir: "" # Defaults to java.io.tmpdir/stirling-pdf
libreofficeDir: "" # Defaults to tempFileManagement.baseTmpDir/libreoffice
systemTempDir: "" # Only used if cleanupSystemTemp is true
prefix: stirling-pdf- # Prefix for temp file names
maxAgeHours: 24 # Maximum age in hours before temp files are cleaned up
cleanupIntervalMinutes: 30 # How often to run cleanup (in minutes)
startupCleanup: true # Clean up old temp files on startup
cleanupSystemTemp: false # Whether to clean broader system temp directory
databaseBackup:
cron: 0 0 0 * * ? # Cron expression for automatic database backups "0 0 0 * * ?" daily at midnight
stirling:
pdf:
fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
json:
font-normalization:
enabled: false # IMPORTANT: Disable to preserve ToUnicode CMaps for correct font rendering. Ghostscript strips Unicode mappings from CID fonts.
cff-converter:
enabled: true # Wrap CFF/Type1C fonts as OpenType-CFF for browser compatibility
method: python # Converter method: 'python' (fontTools, recommended - wraps as OTF), 'fontforge' (legacy - converts to TTF, may hang on CID fonts)
python-command: /opt/venv/bin/python3 # Python interpreter path
python-script: /scripts/convert_cff_to_ttf.py # Path to font wrapping script
fontforge-command: fontforge # Override if FontForge is installed under a different name/path
type3:
library:
enabled: true # Match common Type3 fonts against the built-in library of converted programs
index: classpath:/type3/library/index.json # Override to point at a custom index.json (supports http:, file:, classpath:)
cron: "0 0 0 * * ?" # Cron expression for automatic database backups "0 0 0 * * ?" daily at midnight
ui:
appNameNavbar: '' # name displayed on the navigation bar
appNameNavbar: "" # name displayed on the navigation bar
logoStyle: classic # Options: 'classic' (default - classic S icon) or 'modern' (minimalist logo)
languages: [] # If empty, all languages are enabled. To display only German and Polish ["de_DE", "pl_PL"]. British English is always enabled.
languages: [] # If empty, all languages are enabled. To restrict to specific languages, use a whitelist like ["de_DE", "pl_PL", "sv_SE"]. Empty list or not restricting any languages will enable all available languages.
defaultHideUnavailableTools: false # Default user preference: hide disabled tools instead of greying them out
defaultHideUnavailableConversions: false # Default user preference: hide disabled conversion options instead of greying them out
hideDisabledTools:
googleDrive: false # Hide Google Drive button when not enabled
mobileQRScanner: false # Hide mobile QR scanner button when not enabled
endpoints:
toRemove: [ebook-to-pdf, crop, merge-pdfs, multi-page-layout, overlay-pdfs, pdf-to-single-page, rearrange-pages, remove-image-pdf, remove-pages, rotate-pdf, scale-pages, split-by-size-or-count, split-pages, split-pdf-by-chapters, split-pdf-by-sections, add-password, add-watermark, auto-redact, cert-sign, get-info-on-pdf, redact, remove-cert-sign, remove-password, sanitize-pdf, validate-signature, file-to-pdf, html-to-pdf, img-to-pdf, markdown-to-pdf, pdf-to-csv, pdf-to-html, pdf-to-img, pdf-to-markdown, pdf-to-pdfa, pdf-to-presentation, pdf-to-text, pdf-to-word, pdf-to-xml, url-to-pdf, add-image, add-page-numbers, add-stamp, auto-rename, auto-split-pdf, compress-pdf, decompress-pdf, extract-image-scans, extract-images, flatten, ocr-pdf, remove-blanks, repair, replace-invert-pdf, show-javascript, update-metadata, filter-contains-image, filter-contains-text, filter-file-size, filter-page-count, filter-page-rotation, filter-page-size, add-attachments] # list endpoints to disable (e.g. ['img-to-pdf', 'remove-pages'])
groupsToRemove: [] # list groups to disable (e.g. ['LibreOffice'])
groupsToRemove: [] # list groups to disable (e.g. ['LibreOffice', 'DeveloperTools', 'DeveloperDocs', 'Automation'])
metrics:
enabled: true # 'true' to enable Info APIs (`/api/*`) endpoints, 'false' to disable
@@ -220,11 +258,23 @@ metrics:
AutomaticallyGenerated:
key: cbb81c0f-50b1-450c-a2b5-89ae527776eb
UUID: 10dd4fba-01fa-4717-9b78-3dc4f54e398a
appVersion: 2.1.2
appVersion: 2.7.2
processExecutor:
autoUnoServer: true # true: use local pool based on libreOfficeSessionLimit; false: use unoServerEndpoints
unoServerEndpoints: [] # Used when autoUnoServer is false
# Example manual endpoints (uncomment to use):
# unoServerEndpoints:
# - host: "127.0.0.1"
# port: 2003
# hostLocation: "auto" # auto|local|remote (use "remote" for port-forwarded servers)
# protocol: "http" # http|https
# - host: "remote-server.local"
# port: 8080
# hostLocation: "remote"
# protocol: "https"
sessionLimit: # Process executor instances limits
libreOfficeSessionLimit: 1
libreOfficeSessionLimit: 1 # Each additional uno server adds ~50MB idle RAM
pdfToHtmlSessionLimit: 1
qpdfSessionLimit: 4
tesseractSessionLimit: 1
@@ -232,6 +282,7 @@ processExecutor:
weasyPrintSessionLimit: 16
installAppSessionLimit: 1
calibreSessionLimit: 1
imageMagickSessionLimit: 4
ghostscriptSessionLimit: 8
ocrMyPdfSessionLimit: 2
timeoutMinutes: # Process executor timeout in minutes
@@ -241,7 +292,26 @@ processExecutor:
weasyPrinttimeoutMinutes: 30
installApptimeoutMinutes: 60
calibretimeoutMinutes: 30
imageMagickTimeoutMinutes: 30
tesseractTimeoutMinutes: 30
qpdfTimeoutMinutes: 30
ghostscriptTimeoutMinutes: 30
ocrMyPdfTimeoutMinutes: 30
pdfEditor:
fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
cache:
max-bytes: -1 # Max in-memory cache size in bytes; -1 disables byte cap
max-percent: 20 # Max in-memory cache as % of JVM max; used when max-bytes <= 0
font-normalization:
enabled: false # IMPORTANT: Disable to preserve ToUnicode CMaps for correct font rendering. Ghostscript strips Unicode mappings from CID fonts.
cff-converter:
enabled: true # Wrap CFF/Type1CFF fonts as OpenType-CFF for browser compatibility
method: python # Converter method: 'python' (fontTools, recommended - wraps as OTF), 'fontforge' (legacy - converts to TTF, may hang on CID fonts)
python-command: /opt/venv/bin/python3 # Python interpreter path
python-script: /scripts/convert_cff_to_ttf.py # Path to font wrapping script
fontforge-command: fontforge # Override if FontForge is installed under a different name/path
type3:
library:
enabled: true # Match common Type3 fonts against the built-in library of converted programs
index: classpath:/type3/library/index.json # Override to point at a custom index.json (supports http:, file:, classpath:)

View File

@@ -31,6 +31,47 @@ find_root() {
PROJECT_ROOT=$(find_root)
# Base image version - must be provided or read from environment
# This is a testing-specific version; production should pass explicit BASE_VERSION
if [ -z "$BASE_VERSION" ]; then
# For CI/automation: use a unique test identifier
if [ -n "${GITHUB_RUN_ID}" ]; then
BASE_VERSION="test-${GITHUB_RUN_ID}"
else
# For local testing: generate unique identifier
BASE_VERSION="test-local-$(date +%s)"
fi
fi
BASE_IMAGE="ghcr.io/stirling-tools/stirling-pdf-base:${BASE_VERSION}"
# Function to ensure base image exists (build if missing)
ensure_base_image() {
echo "Checking for base image: $BASE_IMAGE"
if docker image inspect "$BASE_IMAGE" >/dev/null 2>&1; then
echo "✓ Base image found locally: $BASE_IMAGE"
return 0
fi
echo "Base image not found. Attempting to pull from registry..."
if docker pull "$BASE_IMAGE" 2>/dev/null; then
echo "✓ Pulled base image from registry: $BASE_IMAGE"
return 0
fi
echo "Base image not available in registry. Building from source..."
if docker build -f "$PROJECT_ROOT/docker/base/Dockerfile" \
-t "$BASE_IMAGE" \
--build-arg BASE_VERSION="$BASE_VERSION" \
"$PROJECT_ROOT/docker/base"; then
echo "✓ Built base image: $BASE_IMAGE"
return 0
else
echo "ERROR: Failed to build base image"
return 1
fi
}
# Function to check application readiness via HTTP instead of Docker's health status
check_health() {
local container_name=$1 # real container name
@@ -101,14 +142,16 @@ capture_file_list() {
-not -path '/configs/*' \
-not -path '/logs/*' \
-not -path '*/home/stirlingpdfuser/.config/libreoffice/*' \
-not -path '*/home/stirlingpdfuser/.config/calibre/*' \
-not -path '*/home/stirlingpdfuser/.java/fonts/*' \
-not -path '*/home/stirlingpdfuser/.pdfbox.cache' \
-not -path '*/tmp/stirling-pdf/PDFBox*' \
-not -path '*/tmp/stirling-pdf/hsperfdata_stirlingpdfuser/*' \
-not -path '*/tmp/hsperfdata_stirlingpdfuser/*' \
-not -path '*/tmp/hsperfdata_root/*' \
-not -path '*/tmp/stirling-pdf/jetty-*/*' \
-not -path '*/tmp/stirling-pdf/lu*' \
-not -path '*/tmp/stirling-pdf/tmp*' \
-not -path '/tmp/lu*' \
-not -path '*/tmp/*/user/registrymodifications.xcu' \
-not -path '/app/stirling.aot' \
-not -path '*/tmp/stirling.aotconf' \
-not -path '*/tmp/aot-*.log' \
@@ -128,14 +171,16 @@ capture_file_list() {
-not -path '/configs/*' \
-not -path '/logs/*' \
-not -path '*/home/stirlingpdfuser/.config/libreoffice/*' \
-not -path '*/home/stirlingpdfuser/.config/calibre/*' \
-not -path '*/home/stirlingpdfuser/.java/fonts/*' \
-not -path '*/home/stirlingpdfuser/.pdfbox.cache' \
-not -path '*/tmp/PDFBox*' \
-not -path '*/tmp/hsperfdata_stirlingpdfuser/*' \
-not -path '*/tmp/hsperfdata_root/*' \
-not -path '*/tmp/stirling-pdf/hsperfdata_stirlingpdfuser/*' \
-not -path '*/tmp/stirling-pdf/jetty-*/*' \
-not -path '*/tmp/lu*' \
-not -path '*/tmp/tmp*' \
-not -path '/tmp/lu*' \
-not -path '/tmp/tmp*' \
-not -path '/app/stirling.aot' \
-not -path '*/tmp/stirling.aotconf' \
-not -path '*/tmp/aot-*.log' \
@@ -374,6 +419,13 @@ main() {
SECONDS=0
cd "$PROJECT_ROOT"
# Ensure base image exists before running tests
echo "=========================================="
echo "Preparing Docker base image..."
echo "=========================================="
ensure_base_image || exit 1
echo ""
# Parse command line arguments
RERUN_MODE=false
declare -a RERUN_TESTS