mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-04-06 03:19:39 +02:00
Base docker image (#5958)
Co-authored-by: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,9 @@ build/
|
||||
**/build/
|
||||
out/
|
||||
target/
|
||||
**/target/
|
||||
bin/
|
||||
version_builds/
|
||||
|
||||
# Gradle caches (local, not what's in the container)
|
||||
.gradle/
|
||||
@@ -16,9 +19,15 @@ target/
|
||||
# Node / frontend
|
||||
node_modules/
|
||||
**/node_modules/
|
||||
frontend/node_modules/
|
||||
frontend/dist/
|
||||
.npm/
|
||||
.yarn/
|
||||
|
||||
# Tauri/desktop builds
|
||||
src-tauri/target/
|
||||
src-tauri/dist/
|
||||
|
||||
# IDE and editor
|
||||
.idea/
|
||||
.vscode/
|
||||
@@ -46,7 +55,21 @@ Dockerfile*
|
||||
**/test-results/
|
||||
**/jacoco/
|
||||
|
||||
# Testing and documentation (not needed in build)
|
||||
testing/
|
||||
docs/
|
||||
*.md
|
||||
README*
|
||||
|
||||
# Local env
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
|
||||
# Misc
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
.DS_Store
|
||||
.cache/
|
||||
.pytest_cache/
|
||||
|
||||
13
.github/config/.files.yaml
vendored
13
.github/config/.files.yaml
vendored
@@ -6,14 +6,20 @@ openapi: &openapi
|
||||
- *build
|
||||
- app/(common|core|proprietary)/src/main/java/**
|
||||
|
||||
docker-base: &docker-base
|
||||
- docker/base/Dockerfile
|
||||
- ".github/workflows/push-docker-base.yml"
|
||||
|
||||
docker: &docker
|
||||
- Dockerfile
|
||||
- Dockerfile.fat
|
||||
- Dockerfile.ultra-lite
|
||||
- docker/embedded/Dockerfile
|
||||
- docker/embedded/Dockerfile.fat
|
||||
- docker/embedded/Dockerfile.ultra-lite
|
||||
- ".github/workflows/build.yml"
|
||||
- ".github/workflows/push-docker.yml"
|
||||
- scripts/init.sh
|
||||
- scripts/init-without-ocr.sh
|
||||
- exampleYmlFiles/**
|
||||
- *docker-base
|
||||
|
||||
project: &project
|
||||
- app/(common|core|proprietary)/src/(main|test)/java/**
|
||||
@@ -24,6 +30,7 @@ project: &project
|
||||
- libs/**
|
||||
- "testing/**/!(requirements*.txt|requirements*.in)*"
|
||||
- *docker
|
||||
- *docker-base
|
||||
- gradle.properties
|
||||
- gradlew
|
||||
- gradlew.bat
|
||||
|
||||
32
.github/workflows/build.yml
vendored
32
.github/workflows/build.yml
vendored
@@ -30,6 +30,7 @@ jobs:
|
||||
project: ${{ steps.changes.outputs.project }}
|
||||
openapi: ${{ steps.changes.outputs.openapi }}
|
||||
frontend: ${{ steps.changes.outputs.frontend }}
|
||||
docker-base: ${{ steps.changes.outputs.docker-base }}
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@58077d3c7e43986b6b15fba718e8ea69e387dfcc # v2.15.1
|
||||
@@ -402,6 +403,17 @@ jobs:
|
||||
- name: Checkout Repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ github.token }}
|
||||
|
||||
- name: Convert repository owner to lowercase
|
||||
id: repoowner
|
||||
run: echo "lowercase=$(echo ${{ github.repository_owner }} | awk '{print tolower($0)}')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Free disk space on runner
|
||||
run: |
|
||||
echo "Disk space before cleanup:" && df -h
|
||||
@@ -446,6 +458,22 @@ jobs:
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
|
||||
|
||||
- name: Build base image locally (PR base change only)
|
||||
if: github.event_name == 'pull_request' && needs.files-changed.outputs.docker-base == 'true'
|
||||
run: |
|
||||
docker build -t stirling-pdf-base:pr-test -f docker/base/Dockerfile docker/base
|
||||
|
||||
- name: Set base image and platform for this build
|
||||
id: build-params
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] && [ "${{ needs.files-changed.outputs.docker-base }}" == "true" ]; then
|
||||
echo "base_image=stirling-pdf-base:pr-test" >> $GITHUB_OUTPUT
|
||||
echo "platforms=linux/amd64" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "base_image=ghcr.io/${{ steps.repoowner.outputs.lowercase }}/stirling-pdf-base:latest" >> $GITHUB_OUTPUT
|
||||
echo "platforms=linux/amd64,linux/arm64/v8" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Build ${{ matrix.docker-rev }}
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
|
||||
with:
|
||||
@@ -455,7 +483,9 @@ jobs:
|
||||
push: false
|
||||
cache-from: type=gha,scope=${{ matrix.cache-scope }}
|
||||
cache-to: type=gha,mode=max,scope=${{ matrix.cache-scope }}
|
||||
platforms: linux/amd64,linux/arm64/v8
|
||||
platforms: ${{ steps.build-params.outputs.platforms }}
|
||||
build-args: |
|
||||
BASE_IMAGE=${{ steps.build-params.outputs.base_image }}
|
||||
provenance: true
|
||||
sbom: true
|
||||
|
||||
|
||||
119
.github/workflows/push-docker-base.yml
vendored
Normal file
119
.github/workflows/push-docker-base.yml
vendored
Normal file
@@ -0,0 +1,119 @@
|
||||
name: Push Docker Base Image
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- baseDockerImage
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Base image version (e.g., 1.0.0, 1.0.1)'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
push-base:
|
||||
if: ${{ vars.CI_PROFILE != 'lite' && github.actor == 'Frooodle' }}
|
||||
runs-on: ubuntu-24.04-8core
|
||||
permissions:
|
||||
packages: write
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Verify authorized user
|
||||
run: |
|
||||
if [ "${{ github.actor }}" != "Frooodle" ]; then
|
||||
echo "Error: Only Frooodle is authorized to run this workflow"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Set version
|
||||
id: version
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
|
||||
VERSION="${{ github.event.inputs.version }}"
|
||||
else
|
||||
VERSION="1.0.0"
|
||||
fi
|
||||
echo "version=${VERSION}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Harden Runner
|
||||
uses: step-security/harden-runner@58077d3c7e43986b6b15fba718e8ea69e387dfcc # v2.15.1
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_API }}
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ github.token }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0
|
||||
|
||||
- name: Convert repository owner to lowercase
|
||||
id: repoowner
|
||||
run: echo "lowercase=$(echo ${{ github.repository_owner }} | awk '{print tolower($0)}')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Generate tags for base image
|
||||
id: meta
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # v6.0.0
|
||||
with:
|
||||
images: |
|
||||
${{ secrets.DOCKER_HUB_ORG_USERNAME }}/stirling-pdf-base
|
||||
ghcr.io/${{ steps.repoowner.outputs.lowercase }}/stirling-pdf-base
|
||||
tags: |
|
||||
type=raw,value=${{ steps.version.outputs.version }}
|
||||
|
||||
- name: Build and push base image
|
||||
id: build-push-base
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
context: docker/base
|
||||
file: ./docker/base/Dockerfile
|
||||
push: true
|
||||
cache-from: type=gha,scope=stirling-pdf-base
|
||||
cache-to: type=gha,mode=max,scope=stirling-pdf-base
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
platforms: linux/amd64,linux/arm64/v8
|
||||
provenance: true
|
||||
sbom: true
|
||||
|
||||
- name: Install cosign
|
||||
uses: sigstore/cosign-installer@faadad0cce49287aee09b3a48701e75088a2c6ad # v4.0.0
|
||||
with:
|
||||
cosign-release: "v2.4.1"
|
||||
|
||||
- name: Sign base images
|
||||
env:
|
||||
DIGEST: ${{ steps.build-push-base.outputs.digest }}
|
||||
TAGS: ${{ steps.meta.outputs.tags }}
|
||||
COSIGN_PRIVATE_KEY: ${{ secrets.COSIGN_PRIVATE_KEY }}
|
||||
COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }}
|
||||
run: |
|
||||
if [ -n "$COSIGN_PRIVATE_KEY" ]; then
|
||||
echo "$TAGS" | tr ',' '\n' | while read -r tag; do
|
||||
cosign sign --yes \
|
||||
--key env://COSIGN_PRIVATE_KEY \
|
||||
"${tag}@${DIGEST}"
|
||||
done
|
||||
else
|
||||
echo "Warning: COSIGN_PRIVATE_KEY not set, skipping image signing"
|
||||
fi
|
||||
4
.github/workflows/push-docker.yml
vendored
4
.github/workflows/push-docker.yml
vendored
@@ -130,7 +130,9 @@ jobs:
|
||||
cache-to: type=gha,mode=max,scope=stirling-pdf-latest
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
build-args: VERSION_TAG=${{ steps.versionNumber.outputs.versionNumber }}
|
||||
build-args: |
|
||||
VERSION_TAG=${{ steps.versionNumber.outputs.versionNumber }}
|
||||
BASE_VERSION=1.0.0
|
||||
platforms: linux/amd64,linux/arm64/v8
|
||||
provenance: true
|
||||
sbom: true
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -29,6 +29,9 @@ clientWebUI/
|
||||
exampleYmlFiles/stirling/
|
||||
/stirling/
|
||||
/testing/file_snapshots
|
||||
/testing/cucumber/junit/
|
||||
/testing/cucumber/report.html
|
||||
/testing/.failed_tests
|
||||
SwaggerDoc.json
|
||||
|
||||
# Runtime storage for uploaded files and user data (not Java source code)
|
||||
|
||||
@@ -1,157 +0,0 @@
|
||||
# Unified Dockerfile - Frontend + Backend in single container
|
||||
# Supports MODE parameter: BOTH (default), FRONTEND, BACKEND
|
||||
|
||||
# Stage 1: Build Frontend
|
||||
FROM node:20-alpine@sha256:658d0f63e501824d6c23e06d4bb95c71e7d704537c9d9272f488ac03a370d448 AS frontend-build
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY frontend/package.json frontend/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY frontend .
|
||||
# Override VITE_API_BASE_URL to use relative paths for production
|
||||
# This ensures frontend works with nginx proxy setup
|
||||
RUN DISABLE_ADDITIONAL_FEATURES=false VITE_API_BASE_URL=/ npm run build
|
||||
|
||||
# Stage 2: Build Backend (server-only JAR - no UI)
|
||||
FROM gradle:8.14-jdk21@sha256:051d9a116793bdc5175a3f97a545718b750489eee85a7da20913c8a53f722a72 AS backend-build
|
||||
|
||||
COPY build.gradle .
|
||||
COPY settings.gradle .
|
||||
COPY gradlew .
|
||||
COPY gradle gradle/
|
||||
COPY app/core/build.gradle core/.
|
||||
COPY app/common/build.gradle common/.
|
||||
COPY app/proprietary/build.gradle proprietary/.
|
||||
|
||||
ENV JAVA_TOOL_OPTIONS="--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
|
||||
--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
|
||||
--add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
|
||||
--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
|
||||
--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED"
|
||||
|
||||
RUN ./gradlew build -x spotlessApply -x spotlessCheck -x test -x sonarqube || return 0
|
||||
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
|
||||
# Build server-only JAR (no frontend, includes security features controlled by DOCKER_ENABLE_SECURITY at runtime)
|
||||
RUN DISABLE_ADDITIONAL_FEATURES=false \
|
||||
./gradlew clean build -x spotlessApply -x spotlessCheck -x test -x sonarqube
|
||||
|
||||
# Stage 3: Final unified image
|
||||
FROM alpine:3.22.1@sha256:4bcff63911fcb4448bd4fdacec207030997caf25e9bea4045fa6c8c44de311d1
|
||||
|
||||
ARG VERSION_TAG
|
||||
|
||||
# Labels
|
||||
LABEL org.opencontainers.image.title="Stirling-PDF Unified"
|
||||
LABEL org.opencontainers.image.description="Unified container for Stirling-PDF - Frontend + Backend with MODE parameter"
|
||||
LABEL org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF"
|
||||
LABEL org.opencontainers.image.licenses="MIT"
|
||||
LABEL org.opencontainers.image.vendor="Stirling-Tools"
|
||||
LABEL org.opencontainers.image.url="https://www.stirlingpdf.com"
|
||||
LABEL org.opencontainers.image.documentation="https://docs.stirlingpdf.com"
|
||||
LABEL maintainer="Stirling-Tools"
|
||||
LABEL org.opencontainers.image.authors="Stirling-Tools"
|
||||
LABEL org.opencontainers.image.version="${VERSION_TAG}"
|
||||
LABEL org.opencontainers.image.keywords="PDF, manipulation, unified, API, Spring Boot, React"
|
||||
|
||||
# Copy backend files
|
||||
COPY scripts /scripts
|
||||
COPY app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto/
|
||||
|
||||
# Copy built JAR
|
||||
# Use numeric UID:GID (1000:1000) since the named user doesn't exist yet at COPY time
|
||||
COPY --from=backend-build --chown=1000:1000 \
|
||||
/app/app/core/build/libs/*.jar app.jar
|
||||
|
||||
# Copy frontend files
|
||||
COPY --from=frontend-build /app/dist /usr/share/nginx/html
|
||||
|
||||
# Copy nginx configuration
|
||||
COPY docker/unified/nginx.conf /etc/nginx/nginx.conf
|
||||
COPY docker/unified/entrypoint.sh /entrypoint.sh
|
||||
|
||||
# Environment Variables
|
||||
ENV VERSION_TAG=$VERSION_TAG \
|
||||
JAVA_BASE_OPTS="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UnlockExperimentalVMOptions -XX:MaxRAMPercentage=75 -XX:InitiatingHeapOccupancyPercent=20 -XX:+G1PeriodicGCInvokesConcurrent -XX:G1PeriodicGCInterval=10000 -XX:+UseStringDeduplication -XX:G1PeriodicGCSystemLoadThreshold=70" \
|
||||
JAVA_CUSTOM_OPTS="" \
|
||||
HOME=/home/stirlingpdfuser \
|
||||
PUID=1000 \
|
||||
PGID=1000 \
|
||||
UMASK=022 \
|
||||
PYTHONPATH=/usr/lib/libreoffice/program:/opt/venv/lib/python3.12/site-packages \
|
||||
UNO_PATH=/usr/lib/libreoffice/program \
|
||||
URE_BOOTSTRAP=file:///usr/lib/libreoffice/program/fundamentalrc \
|
||||
PATH=$PATH:/opt/venv/bin \
|
||||
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
|
||||
TMPDIR=/tmp/stirling-pdf \
|
||||
TEMP=/tmp/stirling-pdf \
|
||||
TMP=/tmp/stirling-pdf \
|
||||
MODE=BOTH \
|
||||
BACKEND_INTERNAL_PORT=8081 \
|
||||
VITE_API_BASE_URL=http://localhost:8080
|
||||
|
||||
# Install all dependencies
|
||||
# Removed wasteful pip upgrade; chown moved to COPY above
|
||||
RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
||||
echo "@community https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
|
||||
apk upgrade --no-cache -a && \
|
||||
apk add --no-cache \
|
||||
ca-certificates \
|
||||
tzdata \
|
||||
tini \
|
||||
bash \
|
||||
curl \
|
||||
shadow \
|
||||
su-exec \
|
||||
openssl \
|
||||
openssl-dev \
|
||||
openjdk21-jre \
|
||||
nginx \
|
||||
# Doc conversion
|
||||
gcompat \
|
||||
libc6-compat \
|
||||
libreoffice \
|
||||
imagemagick \
|
||||
# pdftohtml
|
||||
poppler-utils \
|
||||
# OCR MY PDF
|
||||
unpaper \
|
||||
tesseract-ocr-data-eng \
|
||||
tesseract-ocr-data-chi_sim \
|
||||
tesseract-ocr-data-deu \
|
||||
tesseract-ocr-data-fra \
|
||||
tesseract-ocr-data-por \
|
||||
ocrmypdf \
|
||||
# CV
|
||||
py3-opencv \
|
||||
python3 \
|
||||
py3-pip \
|
||||
py3-pillow@testing \
|
||||
py3-pdf2image@testing && \
|
||||
python3 -m venv /opt/venv && \
|
||||
/opt/venv/bin/pip install --no-cache-dir unoserver weasyprint && \
|
||||
ln -s /usr/lib/libreoffice/program/uno.py /opt/venv/lib/python3.12/site-packages/ && \
|
||||
ln -s /usr/lib/libreoffice/program/unohelper.py /opt/venv/lib/python3.12/site-packages/ && \
|
||||
ln -s /usr/lib/libreoffice/program /opt/venv/lib/python3.12/site-packages/LibreOffice && \
|
||||
# Clean up pip + setuptools from venv
|
||||
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true; \
|
||||
rm -rf /opt/venv/lib/python*/site-packages/pip \
|
||||
/opt/venv/lib/python*/site-packages/setuptools && \
|
||||
mv /usr/share/tessdata /usr/share/tessdata-original && \
|
||||
mkdir -p $HOME /configs /configs/heap_dumps /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders /tmp/stirling-pdf /pipeline/watchedFolders /pipeline/finishedFolders && \
|
||||
mkdir -p /var/lib/nginx/tmp /var/log/nginx && \
|
||||
fc-cache -f -v && \
|
||||
chmod +x /scripts/* && \
|
||||
chmod +x /entrypoint.sh && \
|
||||
# User permissions
|
||||
addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \
|
||||
chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /pipeline /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /tmp/stirling-pdf /var/lib/nginx /var/log/nginx /usr/share/nginx
|
||||
|
||||
EXPOSE 8080/tcp
|
||||
|
||||
ENTRYPOINT ["tini", "--", "/entrypoint.sh"]
|
||||
@@ -1,119 +0,0 @@
|
||||
# Unified Ultra-Lite Dockerfile - Frontend + Backend in single container with minimal dependencies
|
||||
# Supports MODE parameter: BOTH (default), FRONTEND, BACKEND
|
||||
|
||||
# Stage 1: Build Frontend
|
||||
FROM node:20-alpine@sha256:658d0f63e501824d6c23e06d4bb95c71e7d704537c9d9272f488ac03a370d448 AS frontend-build
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY frontend/package.json frontend/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY frontend .
|
||||
# Override VITE_API_BASE_URL to use relative paths for production
|
||||
# This ensures frontend works with nginx proxy setup
|
||||
RUN DISABLE_ADDITIONAL_FEATURES=true VITE_API_BASE_URL=/ npm run build
|
||||
|
||||
# Stage 2: Build Backend
|
||||
FROM gradle:8.14-jdk21@sha256:051d9a116793bdc5175a3f97a545718b750489eee85a7da20913c8a53f722a72 AS backend-build
|
||||
|
||||
COPY build.gradle .
|
||||
COPY settings.gradle .
|
||||
COPY gradlew .
|
||||
COPY gradle gradle/
|
||||
COPY app/core/build.gradle core/.
|
||||
COPY app/common/build.gradle common/.
|
||||
COPY app/proprietary/build.gradle proprietary/.
|
||||
|
||||
ENV JAVA_TOOL_OPTIONS="--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
|
||||
--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
|
||||
--add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
|
||||
--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
|
||||
--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED"
|
||||
|
||||
RUN ./gradlew build -x spotlessApply -x spotlessCheck -x test -x sonarqube || return 0
|
||||
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
|
||||
RUN DISABLE_ADDITIONAL_FEATURES=true \
|
||||
./gradlew clean build -x spotlessApply -x spotlessCheck -x test -x sonarqube
|
||||
|
||||
# Stage 3: Final unified ultra-lite image
|
||||
FROM alpine:3.22.1@sha256:4bcff63911fcb4448bd4fdacec207030997caf25e9bea4045fa6c8c44de311d1
|
||||
|
||||
ARG VERSION_TAG
|
||||
|
||||
# Labels
|
||||
LABEL org.opencontainers.image.title="Stirling-PDF Unified Ultra-Lite"
|
||||
LABEL org.opencontainers.image.description="Unified ultra-lite container for Stirling-PDF - Frontend + Backend with minimal dependencies"
|
||||
LABEL org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF"
|
||||
LABEL org.opencontainers.image.licenses="MIT"
|
||||
LABEL org.opencontainers.image.vendor="Stirling-Tools"
|
||||
LABEL org.opencontainers.image.url="https://www.stirlingpdf.com"
|
||||
LABEL org.opencontainers.image.documentation="https://docs.stirlingpdf.com"
|
||||
LABEL maintainer="Stirling-Tools"
|
||||
LABEL org.opencontainers.image.authors="Stirling-Tools"
|
||||
LABEL org.opencontainers.image.version="${VERSION_TAG}"
|
||||
LABEL org.opencontainers.image.keywords="PDF, manipulation, unified, ultra-lite, API, Spring Boot, React"
|
||||
|
||||
# Copy backend files
|
||||
COPY scripts/init-without-ocr.sh /scripts/init-without-ocr.sh
|
||||
COPY scripts/installFonts.sh /scripts/installFonts.sh
|
||||
|
||||
# Copy built JAR
|
||||
# Use numeric UID:GID (1000:1000) since the named user doesn't exist yet at COPY time
|
||||
COPY --from=backend-build --chown=1000:1000 \
|
||||
/app/app/core/build/libs/*.jar app.jar
|
||||
|
||||
# Copy frontend files
|
||||
COPY --from=frontend-build /app/dist /usr/share/nginx/html
|
||||
|
||||
# Copy nginx configuration
|
||||
COPY docker/unified/nginx.conf /etc/nginx/nginx.conf
|
||||
COPY docker/unified/entrypoint.sh /entrypoint.sh
|
||||
|
||||
# Environment Variables
|
||||
ENV DISABLE_ADDITIONAL_FEATURES=false \
|
||||
VERSION_TAG=$VERSION_TAG \
|
||||
JAVA_BASE_OPTS="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UnlockExperimentalVMOptions -XX:MaxRAMPercentage=75 -XX:InitiatingHeapOccupancyPercent=20 -XX:+G1PeriodicGCInvokesConcurrent -XX:G1PeriodicGCInterval=10000 -XX:+UseStringDeduplication -XX:G1PeriodicGCSystemLoadThreshold=70" \
|
||||
JAVA_CUSTOM_OPTS="" \
|
||||
HOME=/home/stirlingpdfuser \
|
||||
PUID=1000 \
|
||||
PGID=1000 \
|
||||
UMASK=022 \
|
||||
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
|
||||
TMPDIR=/tmp/stirling-pdf \
|
||||
TEMP=/tmp/stirling-pdf \
|
||||
TMP=/tmp/stirling-pdf \
|
||||
MODE=BOTH \
|
||||
BACKEND_INTERNAL_PORT=8081 \
|
||||
ENDPOINTS_GROUPS_TO_REMOVE=CLI
|
||||
|
||||
# Install minimal dependencies
|
||||
# /app.jar chown moved to COPY above
|
||||
RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
||||
echo "@community https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
|
||||
apk upgrade --no-cache -a && \
|
||||
apk add --no-cache \
|
||||
ca-certificates \
|
||||
tzdata \
|
||||
tini \
|
||||
bash \
|
||||
curl \
|
||||
shadow \
|
||||
su-exec \
|
||||
openjdk21-jre \
|
||||
nginx && \
|
||||
mkdir -p $HOME /configs /configs/heap_dumps /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders /tmp/stirling-pdf /pipeline/watchedFolders /pipeline/finishedFolders && \
|
||||
mkdir -p /usr/share/fonts/opentype/noto /var/lib/nginx/tmp /var/log/nginx && \
|
||||
chmod +x /scripts/*.sh && \
|
||||
chmod +x /entrypoint.sh && \
|
||||
# User permissions
|
||||
addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \
|
||||
chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /pipeline /configs /customFiles /pipeline /tmp/stirling-pdf /var/lib/nginx /var/log/nginx /usr/share/nginx
|
||||
|
||||
EXPOSE 8080/tcp
|
||||
|
||||
ENTRYPOINT ["tini", "--", "/entrypoint.sh"]
|
||||
658
docker/base/Dockerfile
Normal file
658
docker/base/Dockerfile
Normal file
@@ -0,0 +1,658 @@
|
||||
# Stirling-PDF Base Image
|
||||
# Pre-built tools: Calibre, Ghostscript, QPDF, ImageMagick, Python venv with AI/OCR packages
|
||||
# This image is reused by Dockerfile and Dockerfile.fat to avoid rebuilding tools on every app release
|
||||
|
||||
ARG TARGETPLATFORM
|
||||
|
||||
# Stage 1: Build and strip Calibre
|
||||
FROM ubuntu:noble AS calibre-build
|
||||
ARG TARGETPLATFORM
|
||||
ARG CALIBRE_VERSION=9.4.0
|
||||
ARG CALIBRE_STRIP_WEBENGINE=false
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
set -eux; \
|
||||
apt-get update; \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates curl xz-utils libnss3 libfontconfig1 \
|
||||
libgl1 libegl1 libdbus-1-3 libasound2t64 libxcomposite1 \
|
||||
libxrandr2 libxkbcommon0 libxi6 libxtst6 libopengl0 \
|
||||
poppler-utils; \
|
||||
rm -rf /var/lib/apt/lists/*; \
|
||||
\
|
||||
case "$(uname -m)" in \
|
||||
x86_64) CALIBRE_ARCH="x86_64" ;; \
|
||||
aarch64) CALIBRE_ARCH="arm64" ;; \
|
||||
*) echo "Unsupported arch: $(uname -m)"; exit 1 ;; \
|
||||
esac; \
|
||||
\
|
||||
for attempt in 1 2 3; do \
|
||||
echo "Downloading Calibre (attempt $attempt/3)..."; \
|
||||
if curl -fsSL --max-time 300 --retry 3 --retry-delay 5 --retry-max-time 900 \
|
||||
"https://download.calibre-ebook.com/${CALIBRE_VERSION}/calibre-${CALIBRE_VERSION}-${CALIBRE_ARCH}.txz" \
|
||||
-o /tmp/calibre.txz; then \
|
||||
break; \
|
||||
fi; \
|
||||
if [ $attempt -lt 3 ]; then sleep 10; fi; \
|
||||
done; \
|
||||
mkdir -p /opt/calibre; \
|
||||
tar xJf /tmp/calibre.txz -C /opt/calibre; \
|
||||
rm /tmp/calibre.txz; \
|
||||
\
|
||||
# We only need Qt6 WebEngine (Chromium) for ebook->PDF output.
|
||||
# PDF INPUT now uses the pdftohtml engine (poppler), not Qt.
|
||||
rm -f /opt/calibre/lib/libQt6Designer* \
|
||||
/opt/calibre/lib/libQt6Multimedia* \
|
||||
/opt/calibre/lib/libQt6SpatialAudio.so.* \
|
||||
/opt/calibre/lib/libQt6NetworkAuth.so.* \
|
||||
/opt/calibre/lib/libQt6Concurrent.so.* \
|
||||
/opt/calibre/lib/libQt6OpenGLWidgets.so.* \
|
||||
/opt/calibre/lib/libQt6QuickWidgets.so.* \
|
||||
/opt/calibre/lib/libQt6Svg.so.* \
|
||||
/opt/calibre/lib/libQt6SvgWidgets.so.* \
|
||||
/opt/calibre/lib/libQt6Pdf*.so.* \
|
||||
/opt/calibre/lib/libQt6ShaderTools.so.* \
|
||||
/opt/calibre/lib/libQt6SerialPort.so.* \
|
||||
/opt/calibre/lib/libQt6Sensors.so.* \
|
||||
/opt/calibre/lib/libQt6Test.so.* \
|
||||
/opt/calibre/lib/libQt6Sql.so.* \
|
||||
/opt/calibre/lib/libQt6RemoteObjects.so.* \
|
||||
/opt/calibre/lib/libQt6Help.so.* \
|
||||
/opt/calibre/lib/libQt6VirtualKeyboard.so.* \
|
||||
/opt/calibre/lib/libQt6WaylandClient.so.* \
|
||||
/opt/calibre/lib/libQt6WaylandCompositor.so.* \
|
||||
/opt/calibre/lib/libQt6Bluetooth.so.* \
|
||||
/opt/calibre/lib/libQt6Nfc.so.* \
|
||||
/opt/calibre/lib/libQt6Charts.so.* \
|
||||
/opt/calibre/lib/libQt6DataVisualization.so.* \
|
||||
/opt/calibre/lib/libQt6Scxml.so.* \
|
||||
/opt/calibre/lib/libQt6StateMachine.so.* \
|
||||
/opt/calibre/lib/libQt6TextToSpeech.so.* \
|
||||
/opt/calibre/lib/libQt63D*.so.* \
|
||||
/opt/calibre/lib/libavcodec.so.* \
|
||||
/opt/calibre/lib/libavfilter.so.* \
|
||||
/opt/calibre/lib/libavformat.so.* \
|
||||
/opt/calibre/lib/libavutil.so.* \
|
||||
/opt/calibre/lib/libavdevice.so.* \
|
||||
/opt/calibre/lib/libpostproc.so.* \
|
||||
/opt/calibre/lib/libswresample.so.* \
|
||||
/opt/calibre/lib/libswscale.so.* \
|
||||
/opt/calibre/lib/libspeex.so.* \
|
||||
/opt/calibre/lib/libFLAC.so.* \
|
||||
/opt/calibre/lib/libopus.so.* \
|
||||
/opt/calibre/lib/libvorbis*.so.* \
|
||||
/opt/calibre/lib/libasyncns.so.* \
|
||||
/opt/calibre/lib/libspeechd.so.* \
|
||||
/opt/calibre/lib/libespeak-ng.so.* \
|
||||
/opt/calibre/lib/libonnxruntime.so.* \
|
||||
/opt/calibre/lib/libgio-2.0.so.* \
|
||||
/opt/calibre/lib/libzstd.so.* \
|
||||
/opt/calibre/lib/libhunspell-1.7.so.* \
|
||||
/opt/calibre/lib/libbrotlienc.so.* \
|
||||
/opt/calibre/lib/libbrotlicommon.so.* \
|
||||
/opt/calibre/lib/libbrotlidec.so.* \
|
||||
/opt/calibre/lib/libstemmer.so.* \
|
||||
/opt/calibre/lib/libmtp.so.* \
|
||||
/opt/calibre/lib/libncursesw.so.* \
|
||||
/opt/calibre/lib/libchm.so.* \
|
||||
/opt/calibre/lib/libgcrypt.so.* \
|
||||
/opt/calibre/lib/libgpg-error.so.* \
|
||||
/opt/calibre/lib/libicuio.so.* \
|
||||
/opt/calibre/lib/libreadline.so.* \
|
||||
/opt/calibre/lib/libusb-1.0.so.* \
|
||||
/opt/calibre/lib/libpulse*.so.* \
|
||||
/opt/calibre/lib/libsndfile.so.* \
|
||||
/opt/calibre/lib/libmpv.so.* \
|
||||
/opt/calibre/lib/libass.so.* \
|
||||
/opt/calibre/lib/librubberband.so.* \
|
||||
/opt/calibre/lib/libsamplerate.so.*; \
|
||||
rm -rf /opt/calibre/lib/qt6/plugins/platformthemes \
|
||||
/opt/calibre/lib/qt6/plugins/multimedia \
|
||||
/opt/calibre/lib/qt6/plugins/designer \
|
||||
/opt/calibre/lib/qt6/plugins/virtualkeyboard \
|
||||
/opt/calibre/lib/qt6/plugins/wayland* \
|
||||
/opt/calibre/lib/qt6/plugins/texttospeech \
|
||||
/opt/calibre/lib/qt6/plugins/position \
|
||||
/opt/calibre/lib/qt6/plugins/sensors \
|
||||
/opt/calibre/lib/qt6/plugins/sqldrivers \
|
||||
/opt/calibre/lib/qt6/plugins/canbus \
|
||||
/opt/calibre/lib/qt6/plugins/sceneparsers \
|
||||
/opt/calibre/lib/qt6/plugins/renderers \
|
||||
/opt/calibre/lib/qt6/plugins/geometryloaders \
|
||||
/opt/calibre/lib/qt6/plugins/generic \
|
||||
/opt/calibre/lib/qt6/plugins/qmltooling \
|
||||
/opt/calibre/lib/qt6/libexec/QtWebEngineProcess.bak; \
|
||||
rm -rf /opt/calibre/plugins/sqldrivers \
|
||||
/opt/calibre/plugins/multimedia \
|
||||
/opt/calibre/plugins/wayland-shell-integration \
|
||||
/opt/calibre/plugins/wayland-graphics-integration-client \
|
||||
/opt/calibre/plugins/wayland-decoration-client \
|
||||
/opt/calibre/plugins/texttospeech \
|
||||
/opt/calibre/plugins/platformthemes \
|
||||
/opt/calibre/plugins/platforminputcontexts \
|
||||
/opt/calibre/plugins/egldeviceintegrations \
|
||||
/opt/calibre/plugins/iconengines; \
|
||||
\
|
||||
# Remove GUI executables but keep ebook-convert, ebook-meta, and calibre-parallel.
|
||||
rm -f /opt/calibre/calibre \
|
||||
/opt/calibre/calibre-server \
|
||||
/opt/calibre/calibre-smtp \
|
||||
/opt/calibre/calibre-debug \
|
||||
/opt/calibre/calibre-customize \
|
||||
/opt/calibre/calibredb \
|
||||
/opt/calibre/ebook-viewer \
|
||||
/opt/calibre/ebook-edit \
|
||||
/opt/calibre/ebook-polish \
|
||||
/opt/calibre/ebook-device \
|
||||
/opt/calibre/fetch-ebook-metadata \
|
||||
/opt/calibre/lrf2lrs \
|
||||
/opt/calibre/lrs2lrf \
|
||||
/opt/calibre/markdown-calibre \
|
||||
/opt/calibre/web2disk; \
|
||||
\
|
||||
# Remove Python modules not needed for conversion.
|
||||
rm -rf /opt/calibre/lib/calibre/gui2 \
|
||||
/opt/calibre/lib/calibre/devices \
|
||||
/opt/calibre/lib/calibre/library \
|
||||
/opt/calibre/lib/calibre/db \
|
||||
/opt/calibre/lib/calibre/srv \
|
||||
/opt/calibre/lib/calibre/spell \
|
||||
/opt/calibre/lib/calibre/live \
|
||||
/opt/calibre/lib/calibre/utils/piper \
|
||||
/opt/calibre/lib/calibre/utils/certgen.so \
|
||||
/opt/calibre/lib/calibre/utils/https \
|
||||
/opt/calibre/lib/calibre/utils/mdns; \
|
||||
\
|
||||
# Remove resources not needed for CLI conversion.
|
||||
rm -rf /opt/calibre/resources/images \
|
||||
/opt/calibre/resources/icons \
|
||||
/opt/calibre/resources/icons.rcc \
|
||||
/opt/calibre/resources/content-server \
|
||||
/opt/calibre/resources/editor* \
|
||||
/opt/calibre/resources/viewer \
|
||||
/opt/calibre/resources/viewer.js \
|
||||
/opt/calibre/resources/viewer.html \
|
||||
/opt/calibre/resources/recipes \
|
||||
/opt/calibre/resources/dictionaries \
|
||||
/opt/calibre/resources/hyphenation \
|
||||
/opt/calibre/resources/catalog \
|
||||
/opt/calibre/resources/calibre-mimetypes.xml \
|
||||
/opt/calibre/resources/changelog.json \
|
||||
/opt/calibre/resources/user-agent-data.json \
|
||||
/opt/calibre/resources/builtin_recipes.zip \
|
||||
/opt/calibre/resources/builtin_recipes.xml \
|
||||
/opt/calibre/resources/stylelint-bundle.min.js \
|
||||
/opt/calibre/resources/stylelint.js \
|
||||
/opt/calibre/resources/rapydscript \
|
||||
/opt/calibre/resources/quick_start \
|
||||
/opt/calibre/resources/piper-voices.json \
|
||||
/opt/calibre/resources/images.qrc \
|
||||
/opt/calibre/resources/mozilla-ca-certs.pem \
|
||||
/opt/calibre/resources/ebook-convert-complete.calibre_msgpack \
|
||||
/opt/calibre/resources/mathjax \
|
||||
/opt/calibre/resources/common-english-words.txt \
|
||||
/opt/calibre/resources/calibre-portable.sh \
|
||||
/opt/calibre/resources/calibre-portable.bat \
|
||||
/opt/calibre/resources/metadata_sqlite.sql \
|
||||
/opt/calibre/resources/notes_sqlite.sql \
|
||||
/opt/calibre/resources/fts_sqlite.sql \
|
||||
/opt/calibre/resources/fts_triggers.sql \
|
||||
/opt/calibre/resources/jacket \
|
||||
/opt/calibre/resources/editor-functions.json \
|
||||
/opt/calibre/resources/calibre-ebook-root-CA.crt \
|
||||
/opt/calibre/resources/csscolorparser.js \
|
||||
/opt/calibre/resources/lookup.js \
|
||||
/opt/calibre/resources/pdf-mathjax-loader.js \
|
||||
/opt/calibre/resources/scraper.js \
|
||||
/opt/calibre/resources/toc.js \
|
||||
/opt/calibre/resources/user-manual-translation-stats.json \
|
||||
/opt/calibre/resources/pin-template.svg \
|
||||
/opt/calibre/resources/scripts.calibre_msgpack \
|
||||
/opt/calibre/resources/fonts \
|
||||
/opt/calibre/resources/qtwebengine_devtools_resources.pak \
|
||||
/opt/calibre/lib/calibre/ebooks/docx/images \
|
||||
/opt/calibre/share \
|
||||
/opt/calibre/man; \
|
||||
\
|
||||
# Remove translations and localization while keeping required libraries.
|
||||
# Keep iso639.calibre_msgpack (required)
|
||||
# Keep only en-US.pak from qtwebengine_locales (required for WebEngine)
|
||||
rm -rf /opt/calibre/lib/qt6/translations; \
|
||||
find /opt/calibre/translations -mindepth 1 -maxdepth 1 ! -name 'qtwebengine_locales' -exec rm -rf {} +; \
|
||||
find /opt/calibre/translations/qtwebengine_locales -type f ! -name 'en-US.pak' -delete 2>/dev/null || true; \
|
||||
if [ -d /opt/calibre/resources/localization ]; then \
|
||||
rm -rf /opt/calibre/resources/localization/locales.zip \
|
||||
/opt/calibre/resources/localization/stats.calibre_msgpack \
|
||||
/opt/calibre/resources/localization/website-languages.txt; \
|
||||
find /opt/calibre/resources/localization -mindepth 1 -maxdepth 1 ! -name 'iso639.calibre_msgpack' -exec rm -rf {} +; \
|
||||
fi; \
|
||||
\
|
||||
# Strip debug symbols from calibre extension modules.
|
||||
# Exclude Qt6 libs: libQt6WebEngineCore and friends embed Chromium V8 JIT code
|
||||
# and internal resource blobs that strip corrupts, causing segfaults at render time.
|
||||
find /opt/calibre/lib -name '*.so*' \
|
||||
! -name 'libQt6*' \
|
||||
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
|
||||
\
|
||||
# Remove Python bytecode caches.
|
||||
find /opt/calibre -type d -name __pycache__ \
|
||||
-exec rm -rf {} + 2>/dev/null || true; \
|
||||
find /opt/calibre -name '*.pyc' -delete 2>/dev/null || true; \
|
||||
\
|
||||
# Verify conversion still works
|
||||
# NOTE: txt->epub used intentionally NOT txt->pdf.
|
||||
# Calibre 7+ uses WebEngine (Chromium) for PDF output, which requires kernel
|
||||
# capabilities unavailable in Docker RUN steps and segfaults under QEMU.
|
||||
# epub output exercises the same Python/plugin stack without touching WebEngine.
|
||||
/opt/calibre/ebook-convert --version; \
|
||||
echo "Hello" > /tmp/test.txt; \
|
||||
/opt/calibre/ebook-convert /tmp/test.txt /tmp/test.epub; \
|
||||
rm -f /tmp/test.txt /tmp/test.epub; \
|
||||
\
|
||||
# Verify pdftohtml (poppler) is available for the pdftohtml PDF engine.
|
||||
pdftohtml -v >/dev/null 2>&1 && echo "pdftohtml OK" || { echo "ERROR: pdftohtml not found"; exit 1; }; \
|
||||
echo "=== Calibre stripped successfully ==="
|
||||
|
||||
# Optional: strip Chromium/WebEngine (~80 MB savings) when PDF output via Calibre is not needed.
|
||||
# Build with --build-arg CALIBRE_STRIP_WEBENGINE=true to enable.
|
||||
RUN if [ "${CALIBRE_STRIP_WEBENGINE}" = "true" ]; then \
|
||||
echo "Stripping Calibre WebEngine (Chromium), PDF output via Calibre will be disabled"; \
|
||||
rm -rf /opt/calibre/lib/qt6/libexec/QtWebEngineProcess \
|
||||
/opt/calibre/lib/qt6/resources \
|
||||
/opt/calibre/lib/libQt6WebEngine*.so.* \
|
||||
/opt/calibre/lib/libQt6Quick*.so.* \
|
||||
/opt/calibre/lib/libQt6Qml*.so.* \
|
||||
/opt/calibre/translations/qtwebengine_locales 2>/dev/null || true; \
|
||||
echo "WebEngine stripped, Calibre PDF output disabled"; \
|
||||
else \
|
||||
echo "CALIBRE_STRIP_WEBENGINE=false, keeping WebEngine for PDF output"; \
|
||||
fi
|
||||
|
||||
|
||||
# Stage 2: Build Ghostscript from source
|
||||
FROM ubuntu:noble AS gs-build
|
||||
ARG TARGETPLATFORM
|
||||
ARG GS_VERSION=10.06.0
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/tmp/gs-build,id=gs-build-${TARGETPLATFORM:-local} \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential curl ca-certificates libfontconfig1-dev && rm -rf /var/lib/apt/lists/* && \
|
||||
GS_TAG="gs$(printf '%s' "${GS_VERSION}" | tr -d '.')" && \
|
||||
cd /tmp/gs-build && \
|
||||
rm -rf ghostscript-* && \
|
||||
(test -d "ghostscript-${GS_VERSION}" || curl -fsSL --max-time 300 --retry 3 --retry-delay 5 "https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/${GS_TAG}/ghostscript-${GS_VERSION}.tar.gz" | tar xz) && \
|
||||
cd "ghostscript-${GS_VERSION}" && \
|
||||
./configure \
|
||||
--prefix=/usr/local \
|
||||
--without-x \
|
||||
--disable-cups \
|
||||
--disable-gtk && \
|
||||
make -j"$(nproc)" && \
|
||||
make install && \
|
||||
cd ..
|
||||
|
||||
|
||||
# Stage 3: Build PDF Tools (QPDF and ImageMagick 7)
|
||||
FROM ubuntu:noble AS pdf-tools-build
|
||||
ARG TARGETPLATFORM
|
||||
ARG QPDF_VERSION=12.3.2
|
||||
ARG IM_VERSION=7.1.2-13
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/tmp/pdf-tools-build,id=pdf-tools-${TARGETPLATFORM:-local} \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential cmake libssl-dev libjpeg-dev zlib1g-dev curl ca-certificates pkg-config \
|
||||
libpng-dev libtiff-dev libwebp-dev libxml2-dev libfreetype6-dev liblcms2-dev libzip-dev liblqr-1-0-dev \
|
||||
libltdl-dev libtool && rm -rf /var/lib/apt/lists/* && \
|
||||
cd /tmp/pdf-tools-build && \
|
||||
rm -rf qpdf-* ImageMagick-* && \
|
||||
# Build QPDF
|
||||
(test -d "qpdf-${QPDF_VERSION}" || curl -fsSL --max-time 300 --retry 3 --retry-delay 5 "https://github.com/qpdf/qpdf/releases/download/v${QPDF_VERSION}/qpdf-${QPDF_VERSION}.tar.gz" | tar xz) && \
|
||||
cd "qpdf-${QPDF_VERSION}" && \
|
||||
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DALLOW_CRYPTO_OPENSSL=ON -DDEFAULT_CRYPTO=openssl && \
|
||||
cmake --build build --parallel "$(nproc)" && \
|
||||
cmake --install build --strip && \
|
||||
cd .. && \
|
||||
# Build ImageMagick 7
|
||||
(test -d "ImageMagick-${IM_VERSION}" || curl -fsSL --max-time 300 --retry 3 --retry-delay 5 "https://github.com/ImageMagick/ImageMagick/archive/refs/tags/${IM_VERSION}.tar.gz" | tar xz) && \
|
||||
cd "ImageMagick-${IM_VERSION}" && \
|
||||
./configure --prefix=/usr/local --with-modules --with-perl=no --with-magick-plus-plus=no --with-quantum-depth=16 --disable-static --enable-shared && \
|
||||
make -j"$(nproc)" && \
|
||||
make install-strip && \
|
||||
# Enable PDF/PS/EPS in policy
|
||||
sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /usr/local/etc/ImageMagick-7/policy.xml && \
|
||||
sed -i 's/rights="none" pattern="PS"/rights="read|write" pattern="PS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
|
||||
sed -i 's/rights="none" pattern="EPS"/rights="read|write" pattern="EPS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
|
||||
cd .. && \
|
||||
ldconfig /usr/local/lib
|
||||
|
||||
# Stage ImageMagick outputs into a single directory so runtime can import them with one COPY
|
||||
# (reduces 4 separate COPY layers to 1 independent --link layer).
|
||||
RUN mkdir -p /magick-export/usr/bin \
|
||||
/magick-export/usr/local/lib \
|
||||
/magick-export/usr/local/etc && \
|
||||
cp /usr/local/bin/magick /magick-export/usr/bin/ && \
|
||||
cp -a /usr/local/lib/libMagick*.so* /magick-export/usr/local/lib/ && \
|
||||
cp -a /usr/local/lib/ImageMagick-7* /magick-export/usr/local/lib/ && \
|
||||
cp -a /usr/local/etc/ImageMagick-7 /magick-export/usr/local/etc/
|
||||
|
||||
|
||||
# Stage 4: Build Python venv
|
||||
FROM ubuntu:noble AS python-venv-build
|
||||
ARG TARGETPLATFORM
|
||||
ARG UNOSERVER_VERSION=3.6
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3 python3-venv ca-certificates binutils && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
|
||||
python3 -m venv /opt/venv --system-site-packages && \
|
||||
/opt/venv/bin/pip install --no-cache-dir --prefer-binary \
|
||||
weasyprint pdf2image opencv-python-headless ocrmypdf \
|
||||
cryptography \
|
||||
"unoserver==${UNOSERVER_VERSION}" && \
|
||||
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
|
||||
find /opt/venv \( -name '*.pyc' -o -name '*.pyi' \) -delete 2>/dev/null || true && \
|
||||
rm -rf /opt/venv/lib/python*/site-packages/pip \
|
||||
/opt/venv/lib/python*/site-packages/pip-*.dist-info \
|
||||
/opt/venv/lib/python*/site-packages/setuptools \
|
||||
/opt/venv/lib/python*/site-packages/setuptools-*.dist-info && \
|
||||
find /opt/venv -name '*.so' -exec strip --strip-unneeded {} + 2>/dev/null || true
|
||||
|
||||
|
||||
# Final runtime image - the actual base image
|
||||
FROM eclipse-temurin:25-jre-noble AS runtime
|
||||
|
||||
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
LANG=C.UTF-8 \
|
||||
LC_ALL=C.UTF-8 \
|
||||
TESS_BASE_PATH=/usr/share/tesseract-ocr/5/tessdata
|
||||
|
||||
ARG UNOSERVER_VERSION=3.6
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
set -eux; \
|
||||
apt-get update; \
|
||||
# Add LibreOffice Fresh PPA for latest version (26.2.x)
|
||||
apt-get install -y --no-install-recommends software-properties-common; \
|
||||
add-apt-repository -y ppa:libreoffice/ppa; \
|
||||
apt-get update; \
|
||||
apt-get install -y --no-install-recommends \
|
||||
# Core tools
|
||||
ca-certificates tzdata tini bash fontconfig curl \
|
||||
ffmpeg poppler-utils fontforge \
|
||||
gosu unpaper pngquant \
|
||||
# Fonts: full coverage for standard + fat variants
|
||||
fonts-dejavu \
|
||||
fonts-liberation2 \
|
||||
fonts-crosextra-caladea fonts-crosextra-carlito \
|
||||
fonts-noto-core fonts-noto-mono fonts-noto-extra \
|
||||
fonts-noto-cjk poppler-data \
|
||||
fonts-freefont-ttf fonts-terminus \
|
||||
# Python runtime & UNO bridge
|
||||
python3 python3-uno python3-packaging \
|
||||
# OCR
|
||||
tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra \
|
||||
tesseract-ocr-por tesseract-ocr-chi-sim \
|
||||
# Tesseract OSD for orientation detection
|
||||
tesseract-ocr-osd \
|
||||
# Graphics / AWT headless
|
||||
libcairo2 libpango-1.0-0 libpangoft2-1.0-0 libgdk-pixbuf-2.0-0 \
|
||||
libfreetype6 libfontconfig1 libx11-6 libxt6 libxext6 libxrender1 \
|
||||
libxtst6 libxi6 libxinerama1 libxkbcommon0 libsm6 libice6 \
|
||||
# Qt/EGL for Calibre CLI
|
||||
libegl1 libgl1 libopengl0 libdbus-1-3 libglib2.0-0 libnss3 \
|
||||
libasound2t64 libxcomposite1 libxrandr2 \
|
||||
# Virtual framebuffer (required for headless LibreOffice Impress/Draw)
|
||||
xvfb x11-utils coreutils \
|
||||
libreoffice-writer-nogui libreoffice-calc-nogui \
|
||||
libreoffice-impress-nogui libreoffice-draw-nogui \
|
||||
libreoffice-base-nogui libreoffice-java-common \
|
||||
; \
|
||||
\
|
||||
# Verify and fix LibreOffice
|
||||
libreoffice --version; \
|
||||
soffice --version 2>/dev/null || true; \
|
||||
# Rebuild UNO bridge type database
|
||||
/usr/lib/libreoffice/program/soffice.bin --headless --convert-to pdf /dev/null 2>/dev/null || true; \
|
||||
# Force font cache rebuild
|
||||
fc-cache -f -v 2>&1 | awk 'NR <= 20'; \
|
||||
\
|
||||
# Cleanup stage.
|
||||
\
|
||||
# Remove PPA helper, no longer needed after apt-get update
|
||||
apt-get remove --purge -y software-properties-common || true; \
|
||||
apt-get autoremove --purge -y || true; \
|
||||
rm -rf /var/lib/apt/lists/*; \
|
||||
\
|
||||
# Docs / man / info / icons / themes / GUI assets (headless server)
|
||||
rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/info/* \
|
||||
/usr/share/lintian/* /usr/share/linda/* \
|
||||
/usr/share/icons/* /usr/share/themes/* \
|
||||
/usr/share/javascript/* \
|
||||
/usr/share/gtk-3.0/* \
|
||||
/usr/share/fontforge/pixmaps \
|
||||
/usr/share/fontforge/osx \
|
||||
/usr/share/fontforge/cidmap \
|
||||
/usr/share/fontforge/prefs \
|
||||
/usr/share/liblangtag/* \
|
||||
/usr/share/tcltk/* \
|
||||
/usr/share/python-wheels/* \
|
||||
/usr/share/glib-2.0/schemas/* \
|
||||
/usr/share/mime/* \
|
||||
/usr/share/xml/iso-codes \
|
||||
/usr/share/GConf \
|
||||
/usr/share/bash-completion \
|
||||
/usr/share/zsh \
|
||||
/usr/share/libmysofa \
|
||||
/usr/share/alsa \
|
||||
/usr/share/iso-codes \
|
||||
/usr/share/perl5 \
|
||||
/usr/share/libthai \
|
||||
/usr/share/libexttextcat \
|
||||
/usr/share/openal \
|
||||
/usr/share/gcc; \
|
||||
\
|
||||
find /usr/share/locale -mindepth 1 -maxdepth 1 -type d \
|
||||
! -name 'en*' -exec rm -rf {} + 2>/dev/null || true; \
|
||||
rm -rf /usr/share/i18n/locales /usr/share/i18n/charmaps; \
|
||||
\
|
||||
rm -rf /usr/lib/libreoffice/share/gallery \
|
||||
/usr/lib/libreoffice/share/template \
|
||||
/usr/lib/libreoffice/share/wizards \
|
||||
/usr/lib/libreoffice/share/autotext \
|
||||
/usr/lib/libreoffice/help \
|
||||
/usr/lib/libreoffice/share/config/images_*.zip \
|
||||
/usr/lib/libreoffice/share/basic \
|
||||
/usr/lib/libreoffice/share/Scripts \
|
||||
/usr/lib/libreoffice/share/autocorr \
|
||||
/usr/lib/libreoffice/share/classification \
|
||||
/usr/lib/libreoffice/share/wordbook \
|
||||
/usr/lib/libreoffice/share/fingerprint \
|
||||
/usr/lib/libreoffice/share/xdg \
|
||||
/usr/lib/libreoffice/share/numbertext \
|
||||
/usr/lib/libreoffice/share/shell \
|
||||
/usr/lib/libreoffice/share/palette \
|
||||
/usr/lib/libreoffice/share/theme_definitions \
|
||||
/usr/lib/libreoffice/share/xslt \
|
||||
/usr/lib/libreoffice/share/labels \
|
||||
/usr/lib/libreoffice/share/dtd \
|
||||
/usr/lib/libreoffice/share/tipoftheday \
|
||||
/usr/lib/libreoffice/share/toolbarmode \
|
||||
/usr/lib/libreoffice/share/psprint \
|
||||
/usr/lib/libreoffice/CREDITS.fodt \
|
||||
/usr/lib/libreoffice/LICENSE.html; \
|
||||
\
|
||||
# Remove unused LO extensions (GUI-only; not needed for document conversion)
|
||||
rm -rf /usr/lib/libreoffice/share/extensions/wiki-publisher \
|
||||
/usr/lib/libreoffice/share/extensions/nlpsolver \
|
||||
/usr/lib/libreoffice/share/extensions/dict-* 2>/dev/null || true; \
|
||||
# Remove LO database components (LO Base; not needed for Writer/Calc/Impress conversion)
|
||||
rm -rf /usr/lib/libreoffice/program/libdba* \
|
||||
/usr/lib/libreoffice/program/libdbahsql* \
|
||||
/usr/lib/libreoffice/program/libdbu* \
|
||||
/usr/lib/libreoffice/program/libreport* 2>/dev/null || true; \
|
||||
\
|
||||
rm -rf /usr/lib/python3.12/test \
|
||||
/usr/lib/python3.12/idlelib \
|
||||
/usr/lib/python3.12/tkinter \
|
||||
/usr/lib/python3.12/lib2to3 \
|
||||
/usr/lib/python3.12/pydoc_data; \
|
||||
\
|
||||
rm -rf /usr/lib/python3/dist-packages/scipy \
|
||||
/usr/lib/python3/dist-packages/sympy \
|
||||
/usr/lib/python3/dist-packages/mpmath; \
|
||||
\
|
||||
rm -rf \
|
||||
/usr/lib/python3/dist-packages/cffi \
|
||||
/usr/lib/python3/dist-packages/cffi-*.dist-info \
|
||||
/usr/lib/python3/dist-packages/_cffi_backend*.so \
|
||||
/usr/lib/python3/dist-packages/_cffi_backend*.cpython*.so \
|
||||
2>/dev/null || true; \
|
||||
\
|
||||
# Strip debug symbols from ALL shared libraries
|
||||
find /usr/lib -name '*.so*' -type f \
|
||||
-not -path '*/jvm/*' \
|
||||
-not -path '*/libreoffice/*' \
|
||||
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
|
||||
\
|
||||
# Remove GPU backends not needed for headless operation.
|
||||
MULTIARCH_LIBDIR=$(dpkg-architecture -qDEB_HOST_MULTIARCH 2>/dev/null \
|
||||
|| find /usr/lib -maxdepth 1 -type d -name '*-linux-gnu' | head -1); \
|
||||
rm -f \
|
||||
"${MULTIARCH_LIBDIR}"/libLLVM*.so* \
|
||||
"${MULTIARCH_LIBDIR}"/libgallium*.so* \
|
||||
2>/dev/null || true; \
|
||||
\
|
||||
find /usr/lib/python3* -type d -name __pycache__ \
|
||||
-exec rm -rf {} + 2>/dev/null || true; \
|
||||
find /usr/lib/python3* \( -name '*.pyc' -o -name '*.pyi' \) \
|
||||
-delete 2>/dev/null || true; \
|
||||
\
|
||||
rm -rf /usr/share/bug /usr/share/lintian /usr/share/linda \
|
||||
/var/lib/dpkg/info/*.list \
|
||||
/var/lib/dpkg/info/*.md5sums \
|
||||
/var/lib/dpkg/info/*.conffiles \
|
||||
/var/lib/dpkg/info/*.postinst \
|
||||
/var/lib/dpkg/info/*.preinst \
|
||||
/var/lib/dpkg/info/*.prerm \
|
||||
/var/lib/dpkg/info/*.postrm \
|
||||
/var/lib/dpkg/info/*.triggers \
|
||||
/var/lib/dpkg/info/*.shlibs \
|
||||
/var/lib/dpkg/info/*.symbols \
|
||||
/var/lib/dpkg/info/*.templates \
|
||||
/var/log/dpkg.log /var/log/apt/* \
|
||||
/usr/local/share/ghostscript/*/doc \
|
||||
/usr/local/share/ghostscript/*/examples \
|
||||
/usr/share/ImageMagick-*/doc \
|
||||
/usr/share/ImageMagick-*/www; \
|
||||
\
|
||||
# Tesseract training configs (not needed for OCR, but keep configs/ for hocr/txt output)
|
||||
rm -rf /usr/share/tesseract-ocr/*/tessdata/tessconfigs; \
|
||||
\
|
||||
# Noto fonts: keep Regular weight only (~370MB savings)
|
||||
find /usr/share/fonts/truetype/noto -type f \
|
||||
! -name '*Regular*' -delete 2>/dev/null || true; \
|
||||
find /usr/share/fonts/opentype -type f \
|
||||
! -name '*Regular*' -delete 2>/dev/null || true; \
|
||||
# DejaVu: keep Regular and Bold only
|
||||
find /usr/share/fonts/truetype -name '*DejaVu*' \
|
||||
! -name '*-Regular*' ! -name '*-Bold*' ! -name '*Bold.ttf' \
|
||||
! -name 'DejaVuSans.ttf' ! -name 'DejaVuSerif.ttf' ! -name 'DejaVuSansMono.ttf' \
|
||||
-type f -delete 2>/dev/null || true; \
|
||||
# Remove empty font directories after cleanup
|
||||
find /usr/share/fonts -type d -empty -delete 2>/dev/null || true; \
|
||||
\
|
||||
# gconv: keep only essential charset conversion modules
|
||||
GCONV_DIR=$(find /usr/lib -type d -name gconv 2>/dev/null | head -1); \
|
||||
if [ -n "$GCONV_DIR" ] && [ -d "$GCONV_DIR" ]; then \
|
||||
mkdir -p /tmp/gconv-keep; \
|
||||
for mod in UTF-8.so UTF-16.so UTF-32.so UTF-7.so \
|
||||
ISO8859-1.so ISO8859-15.so ISO8859-2.so ISO8859-9.so \
|
||||
UNICODE.so CP1252.so CP1251.so CP1250.so \
|
||||
EUC-JP.so EUC-KR.so EUC-CN.so \
|
||||
SHIFT_JIS.so GB18030.so BIG5.so \
|
||||
gconv-modules gconv-modules.d gconv-modules.cache; do \
|
||||
[ -e "$GCONV_DIR/$mod" ] && \
|
||||
cp -a "$GCONV_DIR/$mod" /tmp/gconv-keep/ 2>/dev/null || true; \
|
||||
done; \
|
||||
rm -rf "$GCONV_DIR"/*; \
|
||||
cp -a /tmp/gconv-keep/* "$GCONV_DIR/" 2>/dev/null || true; \
|
||||
rm -rf /tmp/gconv-keep; \
|
||||
fi; \
|
||||
\
|
||||
# Misc caches
|
||||
rm -rf /var/cache/fontconfig/* /tmp/*
|
||||
|
||||
# External tool layers, all use --link for independent layer caching and parallel pulls.
|
||||
COPY --link --from=calibre-build /opt/calibre /opt/calibre
|
||||
COPY --link --from=pdf-tools-build /usr/local/bin/qpdf /usr/bin/qpdf
|
||||
# ImageMagick: 4 layers collapsed to 1 via the magick-export staging dir in pdf-tools-build
|
||||
COPY --link --from=pdf-tools-build /magick-export/ /
|
||||
COPY --link --from=gs-build /usr/local/bin/gs /usr/local/bin/gs
|
||||
COPY --link --from=gs-build /usr/local/share/ghostscript /usr/local/share/ghostscript
|
||||
# Python venv pre-built (no pip install at runtime, no build tools needed)
|
||||
COPY --link --from=python-venv-build /opt/venv /opt/venv
|
||||
|
||||
RUN ldconfig /usr/local/lib && \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
/opt/venv/bin/python -c "import cffi; print('cffi OK:', cffi.__version__)" && \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
/opt/venv/bin/python -c "import cv2; print('OpenCV', cv2.__version__)" && \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
/opt/venv/bin/python -c "import ocrmypdf; print('ocrmypdf OK')" && \
|
||||
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
|
||||
|
||||
# Non-root user
|
||||
ARG PUID=1000
|
||||
ARG PGID=1000
|
||||
|
||||
RUN set -eux; \
|
||||
if ! getent group stirlingpdfgroup >/dev/null 2>&1; then \
|
||||
groupadd -g "${PGID}" stirlingpdfgroup 2>/dev/null \
|
||||
|| groupadd stirlingpdfgroup; \
|
||||
fi; \
|
||||
if ! id -u stirlingpdfuser >/dev/null 2>&1; then \
|
||||
useradd -m -u "${PUID}" -g stirlingpdfgroup \
|
||||
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser 2>/dev/null \
|
||||
|| useradd -m -g stirlingpdfgroup \
|
||||
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser; \
|
||||
fi; \
|
||||
ln -sf /usr/sbin/gosu /usr/local/bin/su-exec
|
||||
|
||||
# Application directories
|
||||
RUN set -eux; \
|
||||
mkdir -p /configs /configs/cache /configs/heap_dumps /logs /customFiles \
|
||||
/pipeline/watchedFolders /pipeline/finishedFolders \
|
||||
/tmp/stirling-pdf/heap_dumps; \
|
||||
chown -R stirlingpdfuser:stirlingpdfgroup \
|
||||
/home/stirlingpdfuser /configs /logs /customFiles /pipeline \
|
||||
/tmp/stirling-pdf; \
|
||||
chmod 750 /tmp/stirling-pdf; \
|
||||
chmod 750 /tmp/stirling-pdf/heap_dumps
|
||||
|
||||
# Tool symlinks
|
||||
RUN set -eux; \
|
||||
ln -sf /opt/calibre/ebook-convert /usr/bin/ebook-convert; \
|
||||
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
|
||||
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
|
||||
ln -sf /opt/venv/bin/ocrmypdf /usr/local/bin/ocrmypdf; \
|
||||
ln -sf /opt/venv/bin/weasyprint /usr/local/bin/weasyprint; \
|
||||
ln -sf /opt/venv/bin/unoping /usr/local/bin/unoping; \
|
||||
fc-cache -f
|
||||
|
||||
# Metadata labels - base image
|
||||
LABEL org.opencontainers.image.title="Stirling-PDF Base" \
|
||||
org.opencontainers.image.description="Pre-built base image with Calibre, Ghostscript, QPDF, ImageMagick, LibreOffice, OCRmyPDF and dependencies" \
|
||||
org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF" \
|
||||
org.opencontainers.image.licenses="MIT" \
|
||||
org.opencontainers.image.vendor="Stirling-Tools" \
|
||||
org.opencontainers.image.url="https://www.stirlingpdf.com" \
|
||||
org.opencontainers.image.documentation="https://docs.stirlingpdf.com" \
|
||||
maintainer="Stirling-Tools" \
|
||||
org.opencontainers.image.authors="Stirling-Tools"
|
||||
@@ -1,269 +1,13 @@
|
||||
# Stirling-PDF - Full version (embedded frontend)
|
||||
# Uses pre-built base image for fast builds
|
||||
|
||||
FROM ubuntu:noble AS calibre-build
|
||||
ARG TARGETPLATFORM
|
||||
ARG CALIBRE_VERSION=9.4.0
|
||||
ARG CALIBRE_STRIP_WEBENGINE=false
|
||||
ARG BASE_VERSION=1.0.0
|
||||
ARG BASE_IMAGE=ghcr.io/stirling-tools/stirling-pdf-base:${BASE_VERSION}
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
set -eux; \
|
||||
apt-get update; \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates curl xz-utils libnss3 libfontconfig1 \
|
||||
libgl1 libegl1 libdbus-1-3 libasound2t64 libxcomposite1 \
|
||||
libxrandr2 libxkbcommon0 libxi6 libxtst6 libopengl0 \
|
||||
poppler-utils; \
|
||||
rm -rf /var/lib/apt/lists/*; \
|
||||
\
|
||||
case "$(uname -m)" in \
|
||||
x86_64) CALIBRE_ARCH="x86_64" ;; \
|
||||
aarch64) CALIBRE_ARCH="arm64" ;; \
|
||||
*) echo "Unsupported arch: $(uname -m)"; exit 1 ;; \
|
||||
esac; \
|
||||
\
|
||||
curl -fsSL \
|
||||
"https://download.calibre-ebook.com/${CALIBRE_VERSION}/calibre-${CALIBRE_VERSION}-${CALIBRE_ARCH}.txz" \
|
||||
-o /tmp/calibre.txz; \
|
||||
mkdir -p /opt/calibre; \
|
||||
tar xJf /tmp/calibre.txz -C /opt/calibre; \
|
||||
rm /tmp/calibre.txz; \
|
||||
\
|
||||
# We only need Qt6 WebEngine (Chromium) for ebook->PDF output.
|
||||
# PDF INPUT now uses the pdftohtml engine (poppler), not Qt.
|
||||
rm -f /opt/calibre/lib/libQt6Designer* \
|
||||
/opt/calibre/lib/libQt6Multimedia* \
|
||||
/opt/calibre/lib/libQt6SpatialAudio.so.* \
|
||||
/opt/calibre/lib/libQt6NetworkAuth.so.* \
|
||||
/opt/calibre/lib/libQt6Concurrent.so.* \
|
||||
/opt/calibre/lib/libQt6OpenGLWidgets.so.* \
|
||||
/opt/calibre/lib/libQt6QuickWidgets.so.* \
|
||||
/opt/calibre/lib/libQt6Svg.so.* \
|
||||
/opt/calibre/lib/libQt6SvgWidgets.so.* \
|
||||
/opt/calibre/lib/libQt6Pdf*.so.* \
|
||||
/opt/calibre/lib/libQt6ShaderTools.so.* \
|
||||
/opt/calibre/lib/libQt6SerialPort.so.* \
|
||||
/opt/calibre/lib/libQt6Sensors.so.* \
|
||||
/opt/calibre/lib/libQt6Test.so.* \
|
||||
/opt/calibre/lib/libQt6Sql.so.* \
|
||||
/opt/calibre/lib/libQt6RemoteObjects.so.* \
|
||||
/opt/calibre/lib/libQt6Help.so.* \
|
||||
/opt/calibre/lib/libQt6VirtualKeyboard.so.* \
|
||||
/opt/calibre/lib/libQt6WaylandClient.so.* \
|
||||
/opt/calibre/lib/libQt6WaylandCompositor.so.* \
|
||||
/opt/calibre/lib/libQt6Bluetooth.so.* \
|
||||
/opt/calibre/lib/libQt6Nfc.so.* \
|
||||
/opt/calibre/lib/libQt6Charts.so.* \
|
||||
/opt/calibre/lib/libQt6DataVisualization.so.* \
|
||||
/opt/calibre/lib/libQt6Scxml.so.* \
|
||||
/opt/calibre/lib/libQt6StateMachine.so.* \
|
||||
/opt/calibre/lib/libQt6TextToSpeech.so.* \
|
||||
/opt/calibre/lib/libQt63D*.so.* \
|
||||
/opt/calibre/lib/libavcodec.so.* \
|
||||
/opt/calibre/lib/libavfilter.so.* \
|
||||
/opt/calibre/lib/libavformat.so.* \
|
||||
/opt/calibre/lib/libavutil.so.* \
|
||||
/opt/calibre/lib/libavdevice.so.* \
|
||||
/opt/calibre/lib/libpostproc.so.* \
|
||||
/opt/calibre/lib/libswresample.so.* \
|
||||
/opt/calibre/lib/libswscale.so.* \
|
||||
/opt/calibre/lib/libspeex.so.* \
|
||||
/opt/calibre/lib/libFLAC.so.* \
|
||||
/opt/calibre/lib/libopus.so.* \
|
||||
/opt/calibre/lib/libvorbis*.so.* \
|
||||
/opt/calibre/lib/libasyncns.so.* \
|
||||
/opt/calibre/lib/libspeechd.so.* \
|
||||
/opt/calibre/lib/libespeak-ng.so.* \
|
||||
/opt/calibre/lib/libonnxruntime.so.* \
|
||||
/opt/calibre/lib/libgio-2.0.so.* \
|
||||
/opt/calibre/lib/libzstd.so.* \
|
||||
/opt/calibre/lib/libhunspell-1.7.so.* \
|
||||
/opt/calibre/lib/libbrotlienc.so.* \
|
||||
/opt/calibre/lib/libbrotlicommon.so.* \
|
||||
/opt/calibre/lib/libbrotlidec.so.* \
|
||||
/opt/calibre/lib/libstemmer.so.* \
|
||||
/opt/calibre/lib/libmtp.so.* \
|
||||
/opt/calibre/lib/libncursesw.so.* \
|
||||
/opt/calibre/lib/libchm.so.* \
|
||||
/opt/calibre/lib/libgcrypt.so.* \
|
||||
/opt/calibre/lib/libgpg-error.so.* \
|
||||
/opt/calibre/lib/libicuio.so.* \
|
||||
/opt/calibre/lib/libreadline.so.* \
|
||||
/opt/calibre/lib/libusb-1.0.so.* \
|
||||
/opt/calibre/lib/libpulse*.so.* \
|
||||
/opt/calibre/lib/libsndfile.so.* \
|
||||
/opt/calibre/lib/libmpv.so.* \
|
||||
/opt/calibre/lib/libass.so.* \
|
||||
/opt/calibre/lib/librubberband.so.* \
|
||||
/opt/calibre/lib/libsamplerate.so.*; \
|
||||
rm -rf /opt/calibre/lib/qt6/plugins/platformthemes \
|
||||
/opt/calibre/lib/qt6/plugins/multimedia \
|
||||
/opt/calibre/lib/qt6/plugins/designer \
|
||||
/opt/calibre/lib/qt6/plugins/virtualkeyboard \
|
||||
/opt/calibre/lib/qt6/plugins/wayland* \
|
||||
/opt/calibre/lib/qt6/plugins/texttospeech \
|
||||
/opt/calibre/lib/qt6/plugins/position \
|
||||
/opt/calibre/lib/qt6/plugins/sensors \
|
||||
/opt/calibre/lib/qt6/plugins/sqldrivers \
|
||||
/opt/calibre/lib/qt6/plugins/canbus \
|
||||
/opt/calibre/lib/qt6/plugins/sceneparsers \
|
||||
/opt/calibre/lib/qt6/plugins/renderers \
|
||||
/opt/calibre/lib/qt6/plugins/geometryloaders \
|
||||
/opt/calibre/lib/qt6/plugins/generic \
|
||||
/opt/calibre/lib/qt6/plugins/qmltooling \
|
||||
/opt/calibre/lib/qt6/libexec/QtWebEngineProcess.bak; \
|
||||
rm -rf /opt/calibre/plugins/sqldrivers \
|
||||
/opt/calibre/plugins/multimedia \
|
||||
/opt/calibre/plugins/wayland-shell-integration \
|
||||
/opt/calibre/plugins/wayland-graphics-integration-client \
|
||||
/opt/calibre/plugins/wayland-decoration-client \
|
||||
/opt/calibre/plugins/texttospeech \
|
||||
/opt/calibre/plugins/platformthemes \
|
||||
/opt/calibre/plugins/platforminputcontexts \
|
||||
/opt/calibre/plugins/egldeviceintegrations \
|
||||
/opt/calibre/plugins/iconengines; \
|
||||
\
|
||||
# Remove GUI executables but keep ebook-convert, ebook-meta, and calibre-parallel.
|
||||
rm -f /opt/calibre/calibre \
|
||||
/opt/calibre/calibre-server \
|
||||
/opt/calibre/calibre-smtp \
|
||||
/opt/calibre/calibre-debug \
|
||||
/opt/calibre/calibre-customize \
|
||||
/opt/calibre/calibredb \
|
||||
/opt/calibre/ebook-viewer \
|
||||
/opt/calibre/ebook-edit \
|
||||
/opt/calibre/ebook-polish \
|
||||
/opt/calibre/ebook-device \
|
||||
/opt/calibre/fetch-ebook-metadata \
|
||||
/opt/calibre/lrf2lrs \
|
||||
/opt/calibre/lrs2lrf \
|
||||
/opt/calibre/markdown-calibre \
|
||||
/opt/calibre/web2disk; \
|
||||
\
|
||||
# Remove Python modules not needed for conversion.
|
||||
rm -rf /opt/calibre/lib/calibre/gui2 \
|
||||
/opt/calibre/lib/calibre/devices \
|
||||
/opt/calibre/lib/calibre/library \
|
||||
/opt/calibre/lib/calibre/db \
|
||||
/opt/calibre/lib/calibre/srv \
|
||||
/opt/calibre/lib/calibre/spell \
|
||||
/opt/calibre/lib/calibre/live \
|
||||
/opt/calibre/lib/calibre/utils/piper \
|
||||
/opt/calibre/lib/calibre/utils/certgen.so \
|
||||
/opt/calibre/lib/calibre/utils/https \
|
||||
/opt/calibre/lib/calibre/utils/mdns; \
|
||||
\
|
||||
# Remove resources not needed for CLI conversion.
|
||||
rm -rf /opt/calibre/resources/images \
|
||||
/opt/calibre/resources/icons \
|
||||
/opt/calibre/resources/icons.rcc \
|
||||
/opt/calibre/resources/content-server \
|
||||
/opt/calibre/resources/editor* \
|
||||
/opt/calibre/resources/viewer \
|
||||
/opt/calibre/resources/viewer.js \
|
||||
/opt/calibre/resources/viewer.html \
|
||||
/opt/calibre/resources/recipes \
|
||||
/opt/calibre/resources/dictionaries \
|
||||
/opt/calibre/resources/hyphenation \
|
||||
/opt/calibre/resources/catalog \
|
||||
/opt/calibre/resources/calibre-mimetypes.xml \
|
||||
/opt/calibre/resources/changelog.json \
|
||||
/opt/calibre/resources/user-agent-data.json \
|
||||
/opt/calibre/resources/builtin_recipes.zip \
|
||||
/opt/calibre/resources/builtin_recipes.xml \
|
||||
/opt/calibre/resources/builtin_recipes.xml \
|
||||
/opt/calibre/resources/stylelint-bundle.min.js \
|
||||
/opt/calibre/resources/stylelint.js \
|
||||
/opt/calibre/resources/rapydscript \
|
||||
/opt/calibre/resources/quick_start \
|
||||
/opt/calibre/resources/piper-voices.json \
|
||||
/opt/calibre/resources/images.qrc \
|
||||
/opt/calibre/resources/mozilla-ca-certs.pem \
|
||||
/opt/calibre/resources/ebook-convert-complete.calibre_msgpack \
|
||||
/opt/calibre/resources/mathjax \
|
||||
/opt/calibre/resources/common-english-words.txt \
|
||||
/opt/calibre/resources/calibre-portable.sh \
|
||||
/opt/calibre/resources/calibre-portable.bat \
|
||||
/opt/calibre/resources/metadata_sqlite.sql \
|
||||
/opt/calibre/resources/notes_sqlite.sql \
|
||||
/opt/calibre/resources/fts_sqlite.sql \
|
||||
/opt/calibre/resources/fts_triggers.sql \
|
||||
/opt/calibre/resources/jacket \
|
||||
/opt/calibre/resources/editor-functions.json \
|
||||
/opt/calibre/resources/calibre-ebook-root-CA.crt \
|
||||
/opt/calibre/resources/csscolorparser.js \
|
||||
/opt/calibre/resources/lookup.js \
|
||||
/opt/calibre/resources/pdf-mathjax-loader.js \
|
||||
/opt/calibre/resources/scraper.js \
|
||||
/opt/calibre/resources/toc.js \
|
||||
/opt/calibre/resources/user-manual-translation-stats.json \
|
||||
/opt/calibre/resources/pin-template.svg \
|
||||
/opt/calibre/resources/scripts.calibre_msgpack \
|
||||
/opt/calibre/resources/fonts \
|
||||
/opt/calibre/resources/qtwebengine_devtools_resources.pak \
|
||||
/opt/calibre/lib/calibre/ebooks/docx/images \
|
||||
/opt/calibre/share \
|
||||
/opt/calibre/man; \
|
||||
\
|
||||
# Remove translations and localization while keeping required libraries.
|
||||
# Keep iso639.calibre_msgpack (required)
|
||||
# Keep only en-US.pak from qtwebengine_locales (required for WebEngine)
|
||||
rm -rf /opt/calibre/lib/qt6/translations; \
|
||||
find /opt/calibre/translations -mindepth 1 -maxdepth 1 ! -name 'qtwebengine_locales' -exec rm -rf {} +; \
|
||||
find /opt/calibre/translations/qtwebengine_locales -type f ! -name 'en-US.pak' -delete 2>/dev/null || true; \
|
||||
if [ -d /opt/calibre/resources/localization ]; then \
|
||||
rm -rf /opt/calibre/resources/localization/locales.zip \
|
||||
/opt/calibre/resources/localization/stats.calibre_msgpack \
|
||||
/opt/calibre/resources/localization/website-languages.txt; \
|
||||
find /opt/calibre/resources/localization -mindepth 1 -maxdepth 1 ! -name 'iso639.calibre_msgpack' -exec rm -rf {} +; \
|
||||
fi; \
|
||||
\
|
||||
# Strip debug symbols from calibre extension modules.
|
||||
# Exclude Qt6 libs: libQt6WebEngineCore and friends embed Chromium V8 JIT code
|
||||
# and internal resource blobs that strip corrupts, causing segfaults at render time.
|
||||
find /opt/calibre/lib -name '*.so*' \
|
||||
! -name 'libQt6*' \
|
||||
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
|
||||
\
|
||||
# Remove Python bytecode caches.
|
||||
find /opt/calibre -type d -name __pycache__ \
|
||||
-exec rm -rf {} + 2>/dev/null || true; \
|
||||
find /opt/calibre -name '*.pyc' -delete 2>/dev/null || true; \
|
||||
\
|
||||
# ── Verify conversion still works ──
|
||||
# NOTE: txt->epub used intentionally NOT txt->pdf.
|
||||
# Calibre 7+ uses WebEngine (Chromium) for PDF output, which requires kernel
|
||||
# capabilities unavailable in Docker RUN steps and segfaults under QEMU.
|
||||
# epub output exercises the same Python/plugin stack without touching WebEngine.
|
||||
/opt/calibre/ebook-convert --version; \
|
||||
echo "Hello" > /tmp/test.txt; \
|
||||
/opt/calibre/ebook-convert /tmp/test.txt /tmp/test.epub; \
|
||||
rm -f /tmp/test.txt /tmp/test.epub; \
|
||||
\
|
||||
# Verify pdftohtml (poppler) is available for the pdftohtml PDF engine.
|
||||
pdftohtml -v >/dev/null 2>&1 && echo "pdftohtml OK" || { echo "ERROR: pdftohtml not found"; exit 1; }; \
|
||||
echo "=== Calibre stripped successfully ==="
|
||||
|
||||
# Optional: strip Chromium/WebEngine (~80 MB savings) when PDF output via Calibre is not needed.
|
||||
# Build with --build-arg CALIBRE_STRIP_WEBENGINE=true to enable.
|
||||
RUN if [ "${CALIBRE_STRIP_WEBENGINE}" = "true" ]; then \
|
||||
echo "Stripping Calibre WebEngine (Chromium), PDF output via Calibre will be disabled"; \
|
||||
rm -rf /opt/calibre/lib/qt6/libexec/QtWebEngineProcess \
|
||||
/opt/calibre/lib/qt6/resources \
|
||||
/opt/calibre/lib/libQt6WebEngine*.so.* \
|
||||
/opt/calibre/lib/libQt6Quick*.so.* \
|
||||
/opt/calibre/lib/libQt6Qml*.so.* \
|
||||
/opt/calibre/translations/qtwebengine_locales 2>/dev/null || true; \
|
||||
echo "WebEngine stripped, Calibre PDF output disabled"; \
|
||||
else \
|
||||
echo "CALIBRE_STRIP_WEBENGINE=false, keeping WebEngine for PDF output"; \
|
||||
fi
|
||||
|
||||
|
||||
# Build the Java application and frontend.
|
||||
# Stage 1: Build the Java application and frontend
|
||||
FROM gradle:9.3.1-jdk25 AS app-build
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
apt-get update \
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends curl ca-certificates \
|
||||
&& update-ca-certificates \
|
||||
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||
@@ -286,403 +30,31 @@ COPY app/common/build.gradle app/common/
|
||||
COPY app/proprietary/build.gradle app/proprietary/
|
||||
|
||||
# Use system gradle instead of gradlew to avoid SSL issues downloading gradle distribution on emulated arm64
|
||||
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
|
||||
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
|
||||
gradle dependencies --no-daemon || true
|
||||
RUN gradle dependencies --no-daemon || true
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
|
||||
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
|
||||
--mount=type=cache,target=/root/.npm,sharing=locked \
|
||||
DISABLE_ADDITIONAL_FEATURES=false \
|
||||
RUN DISABLE_ADDITIONAL_FEATURES=false \
|
||||
gradle clean build \
|
||||
-PbuildWithFrontend=true \
|
||||
-x spotlessApply -x spotlessCheck -x test -x sonarqube \
|
||||
--no-daemon
|
||||
|
||||
# Extract Spring Boot Layers.
|
||||
# Stage 2: Extract Spring Boot Layers
|
||||
FROM eclipse-temurin:25-jre-noble AS jar-extract
|
||||
WORKDIR /tmp
|
||||
COPY --from=app-build /app/app/core/build/libs/*.jar app.jar
|
||||
RUN java -Djarmode=tools -jar app.jar extract --layers --destination /layers
|
||||
|
||||
|
||||
# Build Ghostscript 10.06.0 from source in an isolated stage (avoids library conflicts).
|
||||
FROM ubuntu:noble AS gs-build
|
||||
ARG TARGETPLATFORM
|
||||
ARG GS_VERSION=10.06.0
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/tmp/gs-build,id=gs-build-${TARGETPLATFORM:-local} \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential curl ca-certificates libfontconfig1-dev && rm -rf /var/lib/apt/lists/* && \
|
||||
GS_TAG="gs$(printf '%s' "${GS_VERSION}" | tr -d '.')" && \
|
||||
cd /tmp/gs-build && \
|
||||
rm -rf ghostscript-* && \
|
||||
(test -d "ghostscript-${GS_VERSION}" || curl -fsSL "https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/${GS_TAG}/ghostscript-${GS_VERSION}.tar.gz" | tar xz) && \
|
||||
cd "ghostscript-${GS_VERSION}" && \
|
||||
./configure \
|
||||
--prefix=/usr/local \
|
||||
--without-x \
|
||||
--disable-cups \
|
||||
--disable-gtk && \
|
||||
make -j"$(nproc)" && \
|
||||
make install && \
|
||||
cd ..
|
||||
# Stage 3: Final runtime image on top of pre-built base
|
||||
FROM ${BASE_IMAGE}
|
||||
|
||||
ARG VERSION_TAG
|
||||
|
||||
# Build PDF Tools (QPDF and ImageMagick 7).
|
||||
FROM ubuntu:noble AS pdf-tools-build
|
||||
ARG TARGETPLATFORM
|
||||
ARG QPDF_VERSION=12.3.2
|
||||
ARG IM_VERSION=7.1.2-13
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/tmp/pdf-tools-build,id=pdf-tools-${TARGETPLATFORM:-local} \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential cmake libssl-dev libjpeg-dev zlib1g-dev curl ca-certificates pkg-config \
|
||||
libpng-dev libtiff-dev libwebp-dev libxml2-dev libfreetype6-dev liblcms2-dev libzip-dev liblqr-1-0-dev \
|
||||
libltdl-dev libtool && rm -rf /var/lib/apt/lists/* && \
|
||||
cd /tmp/pdf-tools-build && \
|
||||
rm -rf qpdf-* ImageMagick-* && \
|
||||
# Build QPDF
|
||||
(test -d "qpdf-${QPDF_VERSION}" || curl -fsSL "https://github.com/qpdf/qpdf/releases/download/v${QPDF_VERSION}/qpdf-${QPDF_VERSION}.tar.gz" | tar xz) && \
|
||||
cd "qpdf-${QPDF_VERSION}" && \
|
||||
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DALLOW_CRYPTO_OPENSSL=ON -DDEFAULT_CRYPTO=openssl && \
|
||||
cmake --build build --parallel "$(nproc)" && \
|
||||
cmake --install build --strip && \
|
||||
cd .. && \
|
||||
# Build ImageMagick 7
|
||||
(test -d "ImageMagick-${IM_VERSION}" || curl -fsSL "https://github.com/ImageMagick/ImageMagick/archive/refs/tags/${IM_VERSION}.tar.gz" | tar xz) && \
|
||||
cd "ImageMagick-${IM_VERSION}" && \
|
||||
./configure --prefix=/usr/local --with-modules --with-perl=no --with-magick-plus-plus=no --with-quantum-depth=16 --disable-static --enable-shared && \
|
||||
make -j"$(nproc)" && \
|
||||
make install-strip && \
|
||||
# Enable PDF/PS/EPS in policy
|
||||
sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /usr/local/etc/ImageMagick-7/policy.xml && \
|
||||
sed -i 's/rights="none" pattern="PS"/rights="read|write" pattern="PS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
|
||||
sed -i 's/rights="none" pattern="EPS"/rights="read|write" pattern="EPS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
|
||||
cd .. && \
|
||||
ldconfig /usr/local/lib
|
||||
|
||||
# Stage ImageMagick outputs into a single directory so runtime can import them with one COPY
|
||||
# (reduces 4 separate COPY layers to 1 independent --link layer).
|
||||
RUN mkdir -p /magick-export/usr/bin \
|
||||
/magick-export/usr/local/lib \
|
||||
/magick-export/usr/local/etc && \
|
||||
cp /usr/local/bin/magick /magick-export/usr/bin/ && \
|
||||
cp -a /usr/local/lib/libMagick*.so* /magick-export/usr/local/lib/ && \
|
||||
cp -a /usr/local/lib/ImageMagick-7* /magick-export/usr/local/lib/ && \
|
||||
cp -a /usr/local/etc/ImageMagick-7 /magick-export/usr/local/etc/
|
||||
|
||||
|
||||
# Build Python venv in an isolated stage so runtime image never needs build tools.
|
||||
# Packages with native extensions (opencv, cryptography) use pre-built wheels (--prefer-binary).
|
||||
# python3-uno is intentionally NOT installed here, it is a system package in the runtime stage
|
||||
# and accessed via --system-site-packages at runtime.
|
||||
FROM ubuntu:noble AS python-venv-build
|
||||
ARG TARGETPLATFORM
|
||||
ARG UNOSERVER_VERSION=3.6
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3 python3-venv ca-certificates binutils && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
|
||||
python3 -m venv /opt/venv --system-site-packages && \
|
||||
/opt/venv/bin/pip install --no-cache-dir --prefer-binary \
|
||||
weasyprint pdf2image opencv-python-headless ocrmypdf \
|
||||
cryptography \
|
||||
"unoserver==${UNOSERVER_VERSION}" && \
|
||||
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
|
||||
find /opt/venv \( -name '*.pyc' -o -name '*.pyi' \) -delete 2>/dev/null || true && \
|
||||
rm -rf /opt/venv/lib/python*/site-packages/pip \
|
||||
/opt/venv/lib/python*/site-packages/pip-*.dist-info \
|
||||
/opt/venv/lib/python*/site-packages/setuptools \
|
||||
/opt/venv/lib/python*/site-packages/setuptools-*.dist-info && \
|
||||
find /opt/venv -name '*.so' -exec strip --strip-unneeded {} + 2>/dev/null || true
|
||||
|
||||
|
||||
# Final runtime image.
|
||||
FROM eclipse-temurin:25-jre-noble AS runtime
|
||||
|
||||
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
LANG=C.UTF-8 \
|
||||
LC_ALL=C.UTF-8 \
|
||||
TESS_BASE_PATH=/usr/share/tesseract-ocr/5/tessdata
|
||||
|
||||
ARG UNOSERVER_VERSION=3.6
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
set -eux; \
|
||||
apt-get update; \
|
||||
# Add LibreOffice Fresh PPA for latest version (26.2.x)
|
||||
apt-get install -y --no-install-recommends software-properties-common; \
|
||||
add-apt-repository -y ppa:libreoffice/ppa; \
|
||||
apt-get update; \
|
||||
apt-get install -y --no-install-recommends \
|
||||
# Core tools
|
||||
ca-certificates tzdata tini bash fontconfig curl \
|
||||
ffmpeg poppler-utils fontforge \
|
||||
gosu unpaper \
|
||||
# Fonts: full CJK coverage retained
|
||||
fonts-dejavu \
|
||||
fonts-liberation2 \
|
||||
fonts-crosextra-caladea fonts-crosextra-carlito \
|
||||
fonts-noto-core fonts-noto-mono fonts-noto-extra \
|
||||
fonts-noto-cjk poppler-data \
|
||||
# python3-uno required for UNO bridge (accessed by venv via --system-site-packages)
|
||||
# python3-venv is NOT needed: the copied /opt/venv works without it at runtime
|
||||
# python3-dev is NOT needed, venv is pre-built in python-venv-build stage
|
||||
python3 python3-uno \
|
||||
# Python packages are in /opt/venv (copied from python-venv-build stage below)
|
||||
# OCR
|
||||
tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra \
|
||||
tesseract-ocr-por tesseract-ocr-chi-sim \
|
||||
# Tesseract OSD for orientation detection
|
||||
tesseract-ocr-osd \
|
||||
# Graphics / AWT headless
|
||||
libcairo2 libpango-1.0-0 libpangoft2-1.0-0 libgdk-pixbuf-2.0-0 \
|
||||
libfreetype6 libfontconfig1 libx11-6 libxt6 libxext6 libxrender1 \
|
||||
libxtst6 libxi6 libxinerama1 libxkbcommon0 libsm6 libice6 \
|
||||
# Qt/EGL for Calibre CLI
|
||||
libegl1 libgl1 libopengl0 libdbus-1-3 libglib2.0-0 libnss3 \
|
||||
libasound2t64 libxcomposite1 libxrandr2 \
|
||||
# Virtual framebuffer (required for headless LibreOffice Impress/Draw)
|
||||
xvfb x11-utils coreutils \
|
||||
libreoffice-writer-nogui libreoffice-calc-nogui \
|
||||
libreoffice-impress-nogui libreoffice-draw-nogui \
|
||||
libreoffice-java-common \
|
||||
; \
|
||||
\
|
||||
\
|
||||
# Verify and fix LibreOffice
|
||||
libreoffice --version; \
|
||||
soffice --version 2>/dev/null || true; \
|
||||
# Rebuild UNO bridge type database
|
||||
/usr/lib/libreoffice/program/soffice.bin --headless --convert-to pdf /dev/null 2>/dev/null || true; \
|
||||
# Force font cache rebuild
|
||||
fc-cache -f -v 2>&1 | awk 'NR <= 20'; \
|
||||
\
|
||||
# Cleanup stage.
|
||||
\
|
||||
# Remove PPA helper, no longer needed after apt-get update
|
||||
apt-get remove --purge -y software-properties-common || true; \
|
||||
apt-get autoremove --purge -y || true; \
|
||||
rm -rf /var/lib/apt/lists/*; \
|
||||
\
|
||||
# Docs / man / info / icons / themes / GUI assets (headless server)
|
||||
rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/info/* \
|
||||
/usr/share/lintian/* /usr/share/linda/* \
|
||||
/usr/share/icons/* /usr/share/themes/* \
|
||||
/usr/share/javascript/* \
|
||||
/usr/share/gtk-3.0/* \
|
||||
/usr/share/fontforge/pixmaps \
|
||||
/usr/share/fontforge/osx \
|
||||
/usr/share/fontforge/cidmap \
|
||||
/usr/share/fontforge/prefs \
|
||||
/usr/share/liblangtag/* \
|
||||
/usr/share/tcltk/* \
|
||||
/usr/share/python-wheels/* \
|
||||
/usr/share/glib-2.0/schemas/* \
|
||||
/usr/share/mime/* \
|
||||
/usr/share/xml/iso-codes \
|
||||
/usr/share/GConf \
|
||||
/usr/share/bash-completion \
|
||||
/usr/share/zsh \
|
||||
/usr/share/libmysofa \
|
||||
/usr/share/alsa \
|
||||
/usr/share/iso-codes \
|
||||
/usr/share/perl5 \
|
||||
/usr/share/libthai \
|
||||
/usr/share/libexttextcat \
|
||||
/usr/share/openal \
|
||||
/usr/share/gcc; \
|
||||
\
|
||||
find /usr/share/locale -mindepth 1 -maxdepth 1 -type d \
|
||||
! -name 'en*' -exec rm -rf {} + 2>/dev/null || true; \
|
||||
rm -rf /usr/share/i18n/locales /usr/share/i18n/charmaps; \
|
||||
\
|
||||
rm -rf /usr/lib/libreoffice/share/gallery \
|
||||
/usr/lib/libreoffice/share/template \
|
||||
/usr/lib/libreoffice/share/wizards \
|
||||
/usr/lib/libreoffice/share/autotext \
|
||||
/usr/lib/libreoffice/help \
|
||||
/usr/lib/libreoffice/share/config/images_*.zip \
|
||||
/usr/lib/libreoffice/share/basic \
|
||||
/usr/lib/libreoffice/share/Scripts \
|
||||
/usr/lib/libreoffice/share/autocorr \
|
||||
/usr/lib/libreoffice/share/classification \
|
||||
/usr/lib/libreoffice/share/wordbook \
|
||||
/usr/lib/libreoffice/share/fingerprint \
|
||||
/usr/lib/libreoffice/share/xdg \
|
||||
/usr/lib/libreoffice/share/numbertext \
|
||||
/usr/lib/libreoffice/share/shell \
|
||||
/usr/lib/libreoffice/share/palette \
|
||||
/usr/lib/libreoffice/share/theme_definitions \
|
||||
/usr/lib/libreoffice/share/xslt \
|
||||
/usr/lib/libreoffice/share/labels \
|
||||
/usr/lib/libreoffice/share/dtd \
|
||||
/usr/lib/libreoffice/share/tipoftheday \
|
||||
/usr/lib/libreoffice/share/toolbarmode \
|
||||
/usr/lib/libreoffice/share/psprint \
|
||||
/usr/lib/libreoffice/CREDITS.fodt \
|
||||
/usr/lib/libreoffice/LICENSE.html; \
|
||||
\
|
||||
# Remove unused LO extensions (GUI-only; not needed for document conversion)
|
||||
rm -rf /usr/lib/libreoffice/share/extensions/wiki-publisher \
|
||||
/usr/lib/libreoffice/share/extensions/nlpsolver \
|
||||
/usr/lib/libreoffice/share/extensions/dict-* 2>/dev/null || true; \
|
||||
# Remove LO database components (LO Base; not needed for Writer/Calc/Impress conversion)
|
||||
rm -rf /usr/lib/libreoffice/program/libdba* \
|
||||
/usr/lib/libreoffice/program/libdbahsql* \
|
||||
/usr/lib/libreoffice/program/libdbu* \
|
||||
/usr/lib/libreoffice/program/libreport* 2>/dev/null || true; \
|
||||
\
|
||||
rm -rf /usr/lib/python3.12/test \
|
||||
/usr/lib/python3.12/idlelib \
|
||||
/usr/lib/python3.12/tkinter \
|
||||
/usr/lib/python3.12/lib2to3 \
|
||||
/usr/lib/python3.12/pydoc_data; \
|
||||
\
|
||||
rm -rf /usr/lib/python3/dist-packages/scipy \
|
||||
/usr/lib/python3/dist-packages/sympy \
|
||||
/usr/lib/python3/dist-packages/mpmath; \
|
||||
\
|
||||
rm -rf \
|
||||
/usr/lib/python3/dist-packages/cffi \
|
||||
/usr/lib/python3/dist-packages/cffi-*.dist-info \
|
||||
/usr/lib/python3/dist-packages/_cffi_backend*.so \
|
||||
/usr/lib/python3/dist-packages/_cffi_backend*.cpython*.so \
|
||||
2>/dev/null || true; \
|
||||
\
|
||||
# Strip debug symbols from ALL shared libraries
|
||||
find /usr/lib -name '*.so*' -type f \
|
||||
-not -path '*/jvm/*' \
|
||||
-not -path '*/libreoffice/*' \
|
||||
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
|
||||
\
|
||||
# Preserving ffmpeg codec libs as they are directly linked.
|
||||
\
|
||||
# Remove GPU backends not needed for headless operation.
|
||||
MULTIARCH_LIBDIR=$(dpkg-architecture -qDEB_HOST_MULTIARCH 2>/dev/null \
|
||||
|| find /usr/lib -maxdepth 1 -type d -name '*-linux-gnu' | head -1); \
|
||||
rm -f \
|
||||
"${MULTIARCH_LIBDIR}"/libLLVM*.so* \
|
||||
"${MULTIARCH_LIBDIR}"/libgallium*.so* \
|
||||
2>/dev/null || true; \
|
||||
\
|
||||
find /usr/lib/python3* -type d -name __pycache__ \
|
||||
-exec rm -rf {} + 2>/dev/null || true; \
|
||||
find /usr/lib/python3* \( -name '*.pyc' -o -name '*.pyi' \) \
|
||||
-delete 2>/dev/null || true; \
|
||||
\
|
||||
rm -rf /usr/share/bug /usr/share/lintian /usr/share/linda \
|
||||
/var/log/dpkg.log /var/log/apt/* \
|
||||
/usr/local/share/ghostscript/*/doc \
|
||||
/usr/local/share/ghostscript/*/examples \
|
||||
/usr/share/ImageMagick-*/doc \
|
||||
/usr/share/ImageMagick-*/www; \
|
||||
\
|
||||
\
|
||||
# NEW: Tesseract training configs (not needed for OCR, but keep configs/ for hocr/txt output)
|
||||
rm -rf /usr/share/tesseract-ocr/*/tessdata/tessconfigs; \
|
||||
\
|
||||
# Noto fonts ship 1800+ files in many weights (Bold, Italic, SemiBold, etc.)
|
||||
# For PDF processing, Regular weight covers all scripts. Saves ~370MB.
|
||||
find /usr/share/fonts/truetype/noto -type f \
|
||||
! -name '*Regular*' -delete 2>/dev/null || true; \
|
||||
find /usr/share/fonts/opentype -type f \
|
||||
! -name '*Regular*' -delete 2>/dev/null || true; \
|
||||
# DejaVu: keep Regular and Bold only (commonly referenced in PDFs)
|
||||
find /usr/share/fonts/truetype -name '*DejaVu*' \
|
||||
! -name '*-Regular*' ! -name '*-Bold*' ! -name '*Bold.ttf' \
|
||||
! -name 'DejaVuSans.ttf' ! -name 'DejaVuSerif.ttf' ! -name 'DejaVuSansMono.ttf' \
|
||||
-type f -delete 2>/dev/null || true; \
|
||||
# Remove empty font directories after cleanup
|
||||
find /usr/share/fonts -type d -empty -delete 2>/dev/null || true; \
|
||||
\
|
||||
# ── gconv: keep only essential charset conversion modules (~6MB savings) ──
|
||||
# PDF processing needs UTF-8, ISO-8859-*, and a few CJK encodings.
|
||||
GCONV_DIR=$(find /usr/lib -type d -name gconv 2>/dev/null | head -1); \
|
||||
if [ -n "$GCONV_DIR" ] && [ -d "$GCONV_DIR" ]; then \
|
||||
mkdir -p /tmp/gconv-keep; \
|
||||
for mod in UTF-8.so UTF-16.so UTF-32.so UTF-7.so \
|
||||
ISO8859-1.so ISO8859-15.so ISO8859-2.so ISO8859-9.so \
|
||||
UNICODE.so CP1252.so CP1251.so CP1250.so \
|
||||
EUC-JP.so EUC-KR.so EUC-CN.so \
|
||||
SHIFT_JIS.so GB18030.so BIG5.so \
|
||||
gconv-modules gconv-modules.d gconv-modules.cache; do \
|
||||
[ -e "$GCONV_DIR/$mod" ] && \
|
||||
cp -a "$GCONV_DIR/$mod" /tmp/gconv-keep/ 2>/dev/null || true; \
|
||||
done; \
|
||||
rm -rf "$GCONV_DIR"/*; \
|
||||
cp -a /tmp/gconv-keep/* "$GCONV_DIR/" 2>/dev/null || true; \
|
||||
rm -rf /tmp/gconv-keep; \
|
||||
fi; \
|
||||
\
|
||||
# NOTE: flite TTS voice libs (~26MB) are kept because ffmpeg directly links them.
|
||||
# Removing them breaks ffmpeg startup. To save these 26MB, ffmpeg would need
|
||||
# to be rebuilt without --enable-libflite (not worth the complexity).
|
||||
\
|
||||
# ── dpkg metadata cleanup (~14MB) ──
|
||||
# Not needed at runtime, container won't run apt-get.
|
||||
rm -rf /var/lib/dpkg/info/*.list \
|
||||
/var/lib/dpkg/info/*.md5sums \
|
||||
/var/lib/dpkg/info/*.conffiles \
|
||||
/var/lib/dpkg/info/*.postinst \
|
||||
/var/lib/dpkg/info/*.preinst \
|
||||
/var/lib/dpkg/info/*.prerm \
|
||||
/var/lib/dpkg/info/*.postrm \
|
||||
/var/lib/dpkg/info/*.triggers \
|
||||
/var/lib/dpkg/info/*.shlibs \
|
||||
/var/lib/dpkg/info/*.symbols \
|
||||
/var/lib/dpkg/info/*.templates; \
|
||||
\
|
||||
# Misc caches
|
||||
rm -rf /var/cache/fontconfig/* /tmp/*
|
||||
|
||||
# External tool layers, all use --link for independent layer caching and parallel pulls.
|
||||
COPY --link --from=calibre-build /opt/calibre /opt/calibre
|
||||
COPY --link --from=pdf-tools-build /usr/local/bin/qpdf /usr/bin/qpdf
|
||||
# ImageMagick: 4 layers collapsed to 1 via the magick-export staging dir in pdf-tools-build
|
||||
COPY --link --from=pdf-tools-build /magick-export/ /
|
||||
COPY --link --from=gs-build /usr/local/bin/gs /usr/local/bin/gs
|
||||
COPY --link --from=gs-build /usr/local/share/ghostscript /usr/local/share/ghostscript
|
||||
# Python venv pre-built in python-venv-build (no pip install at runtime, no build tools needed)
|
||||
COPY --link --from=python-venv-build /opt/venv /opt/venv
|
||||
RUN ldconfig /usr/local/lib && \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
/opt/venv/bin/python -c "import cffi; print('cffi OK:', cffi.__version__)" && \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
/opt/venv/bin/python -c "import cv2; print('OpenCV', cv2.__version__)" && \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
/opt/venv/bin/python -c "import ocrmypdf; print('ocrmypdf OK')" && \
|
||||
find /opt/venv -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
|
||||
|
||||
# ---
|
||||
# Non-root user
|
||||
# ---
|
||||
ARG PUID=1000
|
||||
ARG PGID=1000
|
||||
|
||||
RUN set -eux; \
|
||||
if ! getent group stirlingpdfgroup >/dev/null 2>&1; then \
|
||||
groupadd -g "${PGID}" stirlingpdfgroup 2>/dev/null \
|
||||
|| groupadd stirlingpdfgroup; \
|
||||
fi; \
|
||||
if ! id -u stirlingpdfuser >/dev/null 2>&1; then \
|
||||
useradd -m -u "${PUID}" -g stirlingpdfgroup \
|
||||
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser 2>/dev/null \
|
||||
|| useradd -m -g stirlingpdfgroup \
|
||||
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser; \
|
||||
fi; \
|
||||
ln -sf /usr/sbin/gosu /usr/local/bin/su-exec
|
||||
|
||||
# Application files.
|
||||
WORKDIR /app
|
||||
|
||||
# Application layers
|
||||
COPY --link --from=jar-extract --chown=1000:1000 /layers/dependencies/ /app/
|
||||
COPY --link --from=jar-extract --chown=1000:1000 /layers/spring-boot-loader/ /app/
|
||||
COPY --link --from=jar-extract --chown=1000:1000 /layers/snapshot-dependencies/ /app/
|
||||
@@ -695,63 +67,33 @@ COPY --link --chown=1000:1000 scripts/ /scripts/
|
||||
# Fonts go to system dir, root ownership is correct (world-readable)
|
||||
COPY app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/truetype/
|
||||
|
||||
# Permissions and configuration.
|
||||
# Permissions and configuration
|
||||
RUN set -eux; \
|
||||
ln -sf /opt/calibre/ebook-convert /usr/bin/ebook-convert; \
|
||||
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
|
||||
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
|
||||
ln -sf /opt/venv/bin/ocrmypdf /usr/local/bin/ocrmypdf; \
|
||||
ln -sf /opt/venv/bin/weasyprint /usr/local/bin/weasyprint; \
|
||||
ln -sf /opt/venv/bin/unoping /usr/local/bin/unoping; \
|
||||
chmod +x /scripts/*; \
|
||||
mkdir -p /configs /configs/cache /configs/heap_dumps /logs /customFiles \
|
||||
/pipeline/watchedFolders /pipeline/finishedFolders \
|
||||
/tmp/stirling-pdf/heap_dumps; \
|
||||
# Create symlinks to allow app to find these in /app/
|
||||
ln -s /logs /app/logs; \
|
||||
ln -s /configs /app/configs; \
|
||||
ln -s /customFiles /app/customFiles; \
|
||||
ln -s /pipeline /app/pipeline; \
|
||||
chown -R stirlingpdfuser:stirlingpdfgroup \
|
||||
/home/stirlingpdfuser /configs /logs /customFiles /pipeline \
|
||||
/tmp/stirling-pdf; \
|
||||
chown -h stirlingpdfuser:stirlingpdfgroup /app/logs /app/configs /app/customFiles /app/pipeline; \
|
||||
chown stirlingpdfuser:stirlingpdfgroup /app; \
|
||||
chmod 750 /tmp/stirling-pdf; \
|
||||
chmod 750 /tmp/stirling-pdf/heap_dumps; \
|
||||
fc-cache -f
|
||||
# NOTE: Project Leyden AOT cache is generated in the background on first boot
|
||||
# by init-without-ocr.sh and stored in /configs/cache/stirling.aot (persistent volume).
|
||||
# The cache is picked up on subsequent boots for 15-25% faster startup.
|
||||
# See: JEP 483 + 514 + 515 (JDK 25).
|
||||
|
||||
# Environment variables.
|
||||
ARG VERSION_TAG
|
||||
# Write version to a file so it is readable by scripts without env-var inheritance.
|
||||
# init-without-ocr.sh reads /etc/stirling_version for the AOT cache fingerprint.
|
||||
RUN echo "${VERSION_TAG:-dev}" > /etc/stirling_version
|
||||
|
||||
# Environment variables
|
||||
ENV VERSION_TAG=$VERSION_TAG \
|
||||
STIRLING_AOT_ENABLE="false" \
|
||||
STIRLING_JVM_PROFILE="balanced" \
|
||||
_JVM_OPTS_BALANCED="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=4m -XX:G1PeriodicGCInterval=60000 -XX:+UseStringDeduplication -XX:+UseCompactObjectHeaders -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
|
||||
_JVM_OPTS_PERFORMANCE="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseShenandoahGC -XX:ShenandoahGCMode=generational -XX:+UseCompactObjectHeaders -XX:+UseStringDeduplication -XX:+AlwaysPreTouch -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
|
||||
JAVA_CUSTOM_OPTS="" \
|
||||
HOME=/home/stirlingpdfuser \
|
||||
PUID=${PUID} \
|
||||
PGID=${PGID} \
|
||||
UMASK=022 \
|
||||
PATH="/opt/venv/bin:${PATH}" \
|
||||
UNO_PATH=/usr/lib/libreoffice/program \
|
||||
LIBREOFFICE_BIN_PATH=/usr/lib/libreoffice/program/soffice.bin \
|
||||
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
|
||||
TMPDIR=/tmp/stirling-pdf \
|
||||
TEMP=/tmp/stirling-pdf \
|
||||
TMP=/tmp/stirling-pdf \
|
||||
QTWEBENGINE_CHROMIUM_FLAGS="--no-sandbox --disable-gpu --disable-software-rasterizer" \
|
||||
DBUS_SESSION_BUS_ADDRESS=/dev/null
|
||||
SAL_TMP=/tmp/stirling-pdf/libre
|
||||
|
||||
# Metadata labels.
|
||||
# Metadata labels
|
||||
LABEL org.opencontainers.image.title="Stirling-PDF" \
|
||||
org.opencontainers.image.description="Full version with Calibre, LibreOffice, Tesseract, OCRmyPDF" \
|
||||
org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF" \
|
||||
|
||||
@@ -1,192 +1,14 @@
|
||||
# Stirling-PDF - Fat version (embedded frontend)
|
||||
# Extra fonts for air-gapped environments
|
||||
# Uses pre-built base image for fast builds
|
||||
|
||||
FROM ubuntu:noble AS calibre-build
|
||||
ARG BASE_VERSION=1.0.0
|
||||
ARG BASE_IMAGE=ghcr.io/stirling-tools/stirling-pdf-base:${BASE_VERSION}
|
||||
|
||||
ARG CALIBRE_VERSION=9.4.0
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
set -eux; \
|
||||
apt-get update; \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates curl xz-utils libnss3 libfontconfig1 \
|
||||
libgl1 libegl1 libdbus-1-3 libasound2t64 libxcomposite1 \
|
||||
libxrandr2 libxkbcommon0 libxi6 libxtst6 libopengl0; \
|
||||
rm -rf /var/lib/apt/lists/*; \
|
||||
\
|
||||
case "$(uname -m)" in \
|
||||
x86_64) CALIBRE_ARCH="x86_64" ;; \
|
||||
aarch64) CALIBRE_ARCH="arm64" ;; \
|
||||
*) echo "Unsupported arch: $(uname -m)"; exit 1 ;; \
|
||||
esac; \
|
||||
\
|
||||
curl -fsSL \
|
||||
"https://download.calibre-ebook.com/${CALIBRE_VERSION}/calibre-${CALIBRE_VERSION}-${CALIBRE_ARCH}.txz" \
|
||||
-o /tmp/calibre.txz; \
|
||||
mkdir -p /opt/calibre; \
|
||||
tar xJf /tmp/calibre.txz -C /opt/calibre; \
|
||||
rm /tmp/calibre.txz; \
|
||||
\
|
||||
# Remove GUI-only shared libraries.
|
||||
# Libs required by WebEngine PDF output are preserved.
|
||||
rm -f /opt/calibre/lib/libQt6Designer* \
|
||||
/opt/calibre/lib/libQt6Multimedia* \
|
||||
/opt/calibre/lib/libQt6SpatialAudio.so.* \
|
||||
/opt/calibre/lib/libQt6NetworkAuth.so.* \
|
||||
/opt/calibre/lib/libQt6Concurrent.so.* \
|
||||
/opt/calibre/lib/libQt6OpenGLWidgets.so.* \
|
||||
/opt/calibre/lib/libQt6QuickWidgets.so.* \
|
||||
# AV / multimedia
|
||||
/opt/calibre/lib/libavcodec.so.* \
|
||||
/opt/calibre/lib/libavfilter.so.* \
|
||||
/opt/calibre/lib/libavformat.so.* \
|
||||
/opt/calibre/lib/libavutil.so.* \
|
||||
/opt/calibre/lib/libavdevice.so.* \
|
||||
/opt/calibre/lib/libpostproc.so.* \
|
||||
/opt/calibre/lib/libswresample.so.* \
|
||||
/opt/calibre/lib/libswscale.so.* \
|
||||
# Audio / speech / TTS
|
||||
/opt/calibre/lib/libspeex.so.* \
|
||||
/opt/calibre/lib/libFLAC.so.* \
|
||||
/opt/calibre/lib/libopus.so.* \
|
||||
/opt/calibre/lib/libvorbis*.so.* \
|
||||
/opt/calibre/lib/libasyncns.so.* \
|
||||
/opt/calibre/lib/libspeechd.so.* \
|
||||
/opt/calibre/lib/libespeak-ng.so.* \
|
||||
# Other unused libs
|
||||
/opt/calibre/lib/libonnxruntime.so.* \
|
||||
/opt/calibre/lib/libgio-2.0.so.* \
|
||||
/opt/calibre/lib/libzstd.so.* \
|
||||
/opt/calibre/lib/libhunspell-1.7.so.* \
|
||||
/opt/calibre/lib/libbrotlienc.so.* \
|
||||
/opt/calibre/lib/libbrotlicommon.so.* \
|
||||
/opt/calibre/lib/libbrotlidec.so.* \
|
||||
/opt/calibre/lib/libstemmer.so.* \
|
||||
/opt/calibre/lib/libmtp.so.* \
|
||||
/opt/calibre/lib/libncursesw.so.* \
|
||||
/opt/calibre/lib/libchm.so.* \
|
||||
/opt/calibre/lib/libgcrypt.so.* \
|
||||
/opt/calibre/lib/libgpg-error.so.* \
|
||||
/opt/calibre/lib/libicuio.so.* \
|
||||
/opt/calibre/lib/libreadline.so.* \
|
||||
/opt/calibre/lib/libusb-1.0.so.*; \
|
||||
rm -rf /opt/calibre/lib/qt6/plugins/platformthemes \
|
||||
/opt/calibre/lib/qt6/plugins/multimedia \
|
||||
/opt/calibre/lib/qt6/plugins/designer \
|
||||
/opt/calibre/lib/qt6/plugins/qmltooling; \
|
||||
\
|
||||
# Remove GUI executables but keep ebook-convert, ebook-meta, and calibre-parallel.
|
||||
rm -f /opt/calibre/calibre \
|
||||
/opt/calibre/calibre-server \
|
||||
/opt/calibre/calibre-smtp \
|
||||
/opt/calibre/calibre-debug \
|
||||
/opt/calibre/calibre-customize \
|
||||
/opt/calibre/calibredb \
|
||||
/opt/calibre/ebook-viewer \
|
||||
/opt/calibre/ebook-edit \
|
||||
/opt/calibre/ebook-polish \
|
||||
/opt/calibre/ebook-device \
|
||||
/opt/calibre/fetch-ebook-metadata \
|
||||
/opt/calibre/lrf2lrs \
|
||||
/opt/calibre/lrs2lrf \
|
||||
/opt/calibre/markdown-calibre \
|
||||
/opt/calibre/web2disk; \
|
||||
\
|
||||
# Remove Python modules not needed for conversion.
|
||||
rm -rf /opt/calibre/lib/calibre/gui2 \
|
||||
/opt/calibre/lib/calibre/devices \
|
||||
/opt/calibre/lib/calibre/library \
|
||||
/opt/calibre/lib/calibre/db \
|
||||
/opt/calibre/lib/calibre/srv \
|
||||
/opt/calibre/lib/calibre/spell \
|
||||
/opt/calibre/lib/calibre/live; \
|
||||
\
|
||||
# Remove resources not needed for CLI conversion.
|
||||
rm -rf /opt/calibre/resources/images \
|
||||
/opt/calibre/resources/icons \
|
||||
/opt/calibre/resources/icons.rcc \
|
||||
/opt/calibre/resources/content-server \
|
||||
/opt/calibre/resources/editor* \
|
||||
/opt/calibre/resources/viewer \
|
||||
/opt/calibre/resources/viewer.js \
|
||||
/opt/calibre/resources/viewer.html \
|
||||
/opt/calibre/resources/recipes \
|
||||
/opt/calibre/resources/dictionaries \
|
||||
/opt/calibre/resources/hyphenation \
|
||||
/opt/calibre/resources/catalog \
|
||||
/opt/calibre/resources/calibre-mimetypes.xml \
|
||||
/opt/calibre/resources/changelog.json \
|
||||
/opt/calibre/resources/user-agent-data.json \
|
||||
/opt/calibre/resources/builtin_recipes.zip \
|
||||
/opt/calibre/resources/builtin_recipes.xml \
|
||||
/opt/calibre/resources/builtin_recipes.xml \
|
||||
/opt/calibre/resources/stylelint-bundle.min.js \
|
||||
/opt/calibre/resources/stylelint.js \
|
||||
/opt/calibre/resources/rapydscript \
|
||||
/opt/calibre/resources/quick_start \
|
||||
/opt/calibre/resources/piper-voices.json \
|
||||
/opt/calibre/resources/images.qrc \
|
||||
/opt/calibre/resources/mozilla-ca-certs.pem \
|
||||
/opt/calibre/resources/ebook-convert-complete.calibre_msgpack \
|
||||
/opt/calibre/resources/mathjax \
|
||||
/opt/calibre/resources/common-english-words.txt \
|
||||
/opt/calibre/resources/calibre-portable.sh \
|
||||
/opt/calibre/resources/calibre-portable.bat \
|
||||
/opt/calibre/resources/metadata_sqlite.sql \
|
||||
/opt/calibre/resources/notes_sqlite.sql \
|
||||
/opt/calibre/resources/fts_sqlite.sql \
|
||||
/opt/calibre/resources/fts_triggers.sql \
|
||||
/opt/calibre/resources/jacket \
|
||||
/opt/calibre/resources/editor-functions.json \
|
||||
/opt/calibre/resources/calibre-ebook-root-CA.crt \
|
||||
/opt/calibre/resources/csscolorparser.js \
|
||||
/opt/calibre/resources/lookup.js \
|
||||
/opt/calibre/resources/pdf-mathjax-loader.js \
|
||||
/opt/calibre/resources/scraper.js \
|
||||
/opt/calibre/resources/toc.js \
|
||||
/opt/calibre/resources/user-manual-translation-stats.json \
|
||||
/opt/calibre/resources/pin-template.svg \
|
||||
/opt/calibre/resources/scripts.calibre_msgpack \
|
||||
/opt/calibre/lib/calibre/ebooks/docx/images \
|
||||
/opt/calibre/share \
|
||||
/opt/calibre/man; \
|
||||
\
|
||||
# Remove translations and localization while keeping required libraries.
|
||||
rm -rf /opt/calibre/lib/qt6/translations; \
|
||||
find /opt/calibre/translations -mindepth 1 -maxdepth 1 ! -name 'qtwebengine_locales' -exec rm -rf {} +; \
|
||||
rm -rf /opt/calibre/resources/localization/locales.zip \
|
||||
/opt/calibre/resources/localization/stats.calibre_msgpack \
|
||||
/opt/calibre/resources/localization/website-languages.txt; \
|
||||
find /opt/calibre/resources/localization -mindepth 1 -maxdepth 1 ! -name 'iso639.calibre_msgpack' -exec rm -rf {} +; \
|
||||
\
|
||||
# Strip debug symbols from calibre extension modules.
|
||||
# Exclude Qt6 libs and all qt6/ subdirectory files to prevent Chromium renderer crashes.
|
||||
find /opt/calibre/lib -name '*.so*' \
|
||||
! -name 'libQt6*' \
|
||||
! -path '*/qt6/*' \
|
||||
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
|
||||
\
|
||||
find /opt/calibre -type d -name __pycache__ \
|
||||
-exec rm -rf {} + 2>/dev/null || true; \
|
||||
find /opt/calibre -name '*.pyc' -delete 2>/dev/null || true; \
|
||||
\
|
||||
# Verify conversion functionality.
|
||||
# NOTE: txt→epub used intentionally NOT txt→pdf.
|
||||
# Calibre 7+ uses WebEngine (Chromium) for PDF output, which requires kernel
|
||||
# capabilities unavailable in Docker RUN steps and segfaults under QEMU.
|
||||
# epub output exercises the same Python/plugin stack without touching WebEngine.
|
||||
/opt/calibre/ebook-convert --version; \
|
||||
echo "Hello" > /tmp/test.txt; \
|
||||
/opt/calibre/ebook-convert /tmp/test.txt /tmp/test.epub; \
|
||||
rm -f /tmp/test.txt /tmp/test.epub; \
|
||||
echo "=== Calibre stripped successfully ==="
|
||||
|
||||
|
||||
# Build the Java application and frontend.
|
||||
# Stage 1: Build the Java application and frontend
|
||||
FROM gradle:9.3.1-jdk25 AS app-build
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
apt-get update \
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends curl ca-certificates \
|
||||
&& update-ca-certificates \
|
||||
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||
@@ -209,381 +31,71 @@ COPY app/common/build.gradle app/common/
|
||||
COPY app/proprietary/build.gradle app/proprietary/
|
||||
|
||||
# Use system gradle instead of gradlew to avoid SSL issues downloading gradle distribution on emulated arm64
|
||||
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
|
||||
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
|
||||
gradle dependencies --no-daemon || true
|
||||
RUN gradle dependencies --no-daemon || true
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
|
||||
--mount=type=cache,target=/home/gradle/.gradle/wrapper \
|
||||
DISABLE_ADDITIONAL_FEATURES=false \
|
||||
RUN DISABLE_ADDITIONAL_FEATURES=false \
|
||||
gradle clean build \
|
||||
-PbuildWithFrontend=true \
|
||||
-x spotlessApply -x spotlessCheck -x test -x sonarqube \
|
||||
--no-daemon
|
||||
|
||||
|
||||
# Python Builder stage.
|
||||
FROM ubuntu:noble AS python-build
|
||||
|
||||
ARG UNOSERVER_VERSION=3.6
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3 python3-venv python3-dev \
|
||||
python3-packaging \
|
||||
build-essential \
|
||||
# Build dependencies for ocrmypdf/weasyprint/opencv
|
||||
zlib1g-dev libjpeg-dev libffi-dev libpango1.0-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN python3 -m venv /opt/venv --system-site-packages
|
||||
ENV PATH="/opt/venv/bin:$PATH"
|
||||
|
||||
# Build all heavy python packages here
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install \
|
||||
weasyprint pdf2image opencv-python-headless ocrmypdf \
|
||||
"unoserver==${UNOSERVER_VERSION}"
|
||||
# Stage 2: Extract Spring Boot Layers
|
||||
FROM eclipse-temurin:25-jre-noble AS jar-extract
|
||||
WORKDIR /tmp
|
||||
COPY --from=app-build /app/app/core/build/libs/*.jar app.jar
|
||||
RUN java -Djarmode=tools -jar app.jar extract --layers --destination /layers
|
||||
|
||||
|
||||
# Build Ghostscript 10.06.0 from source in an isolated stage (avoids library conflicts).
|
||||
FROM ubuntu:noble AS gs-build
|
||||
ARG GS_VERSION=10.06.0
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/tmp/gs-build,id=gs-build-${TARGETPLATFORM:-local} \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential curl ca-certificates libfontconfig1-dev && rm -rf /var/lib/apt/lists/* && \
|
||||
GS_TAG="gs$(printf '%s' "${GS_VERSION}" | tr -d '.')" && \
|
||||
cd /tmp/gs-build && \
|
||||
rm -rf ghostscript-* && \
|
||||
(test -d "ghostscript-${GS_VERSION}" || curl -fsSL "https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/${GS_TAG}/ghostscript-${GS_VERSION}.tar.gz" | tar xz) && \
|
||||
cd "ghostscript-${GS_VERSION}" && \
|
||||
./configure \
|
||||
--prefix=/usr/local \
|
||||
--without-x \
|
||||
--disable-cups \
|
||||
--disable-gtk && \
|
||||
make -j"$(nproc)" && \
|
||||
make install && \
|
||||
cd ..
|
||||
# Stage 3: Final runtime image on top of pre-built base
|
||||
FROM ${BASE_IMAGE}
|
||||
|
||||
ARG VERSION_TAG
|
||||
|
||||
# Build PDF Tools (QPDF and ImageMagick 7).
|
||||
FROM ubuntu:noble AS pdf-tools-build
|
||||
ARG QPDF_VERSION=12.3.2
|
||||
ARG IM_VERSION=7.1.2-13
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/tmp/pdf-tools-build,id=pdf-tools-${TARGETPLATFORM:-local} \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential cmake libssl-dev libjpeg-dev zlib1g-dev curl ca-certificates pkg-config \
|
||||
libpng-dev libtiff-dev libwebp-dev libxml2-dev libfreetype6-dev liblcms2-dev libzip-dev liblqr-1-0-dev \
|
||||
libltdl-dev libtool && rm -rf /var/lib/apt/lists/* && \
|
||||
cd /tmp/pdf-tools-build && \
|
||||
rm -rf qpdf-* ImageMagick-* && \
|
||||
# Build QPDF
|
||||
(test -d "qpdf-${QPDF_VERSION}" || curl -fsSL "https://github.com/qpdf/qpdf/releases/download/v${QPDF_VERSION}/qpdf-${QPDF_VERSION}.tar.gz" | tar xz) && \
|
||||
cd "qpdf-${QPDF_VERSION}" && \
|
||||
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DALLOW_CRYPTO_OPENSSL=ON -DDEFAULT_CRYPTO=openssl && \
|
||||
cmake --build build --parallel "$(nproc)" && \
|
||||
cmake --install build && \
|
||||
cd .. && \
|
||||
# Build ImageMagick 7
|
||||
(test -d "ImageMagick-${IM_VERSION}" || curl -fsSL "https://github.com/ImageMagick/ImageMagick/archive/refs/tags/${IM_VERSION}.tar.gz" | tar xz) && \
|
||||
cd "ImageMagick-${IM_VERSION}" && \
|
||||
./configure --prefix=/usr/local --with-modules --with-perl=no --with-magick-plus-plus=no --with-quantum-depth=16 --disable-static --enable-shared && \
|
||||
make -j"$(nproc)" && \
|
||||
make install && \
|
||||
# Enable PDF/PS/EPS in policy
|
||||
sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /usr/local/etc/ImageMagick-7/policy.xml && \
|
||||
sed -i 's/rights="none" pattern="PS"/rights="read|write" pattern="PS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
|
||||
sed -i 's/rights="none" pattern="EPS"/rights="read|write" pattern="EPS"/' /usr/local/etc/ImageMagick-7/policy.xml && \
|
||||
cd .. && \
|
||||
ldconfig /usr/local/lib
|
||||
WORKDIR /app
|
||||
|
||||
# Application layers
|
||||
COPY --link --from=jar-extract --chown=1000:1000 /layers/dependencies/ /app/
|
||||
COPY --link --from=jar-extract --chown=1000:1000 /layers/spring-boot-loader/ /app/
|
||||
COPY --link --from=jar-extract --chown=1000:1000 /layers/snapshot-dependencies/ /app/
|
||||
COPY --link --from=jar-extract --chown=1000:1000 /layers/application/ /app/
|
||||
|
||||
# Final runtime image.
|
||||
FROM eclipse-temurin:25-jre AS runtime
|
||||
|
||||
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
LANG=C.UTF-8 \
|
||||
LC_ALL=C.UTF-8 \
|
||||
TESS_BASE_PATH=/usr/share/tesseract-ocr/5/tessdata
|
||||
|
||||
ARG UNOSERVER_VERSION=3.6
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/root/.cache/pip \
|
||||
set -eux; \
|
||||
apt-get update; \
|
||||
# Add LibreOffice Fresh PPA for latest version (26.2.x)
|
||||
apt-get install -y --no-install-recommends software-properties-common; \
|
||||
add-apt-repository -y ppa:libreoffice/ppa; \
|
||||
apt-get update; \
|
||||
apt-get install -y --no-install-recommends \
|
||||
# Core tools
|
||||
ca-certificates tzdata tini bash fontconfig curl \
|
||||
ffmpeg poppler-utils fontforge \
|
||||
gosu unpaper pngquant \
|
||||
fonts-liberation2 \
|
||||
fonts-crosextra-caladea fonts-crosextra-carlito \
|
||||
fonts-noto-core fonts-noto-mono fonts-noto-extra \
|
||||
fonts-noto-cjk poppler-data \
|
||||
fonts-freefont-ttf fonts-terminus \
|
||||
# Python runtime & UNO bridge (python3-full -> python3 optimization)
|
||||
python3 python3-uno python3-packaging \
|
||||
# OCR
|
||||
tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra \
|
||||
tesseract-ocr-por tesseract-ocr-chi-sim \
|
||||
# Graphics / AWT headless
|
||||
libcairo2 libpango-1.0-0 libpangoft2-1.0-0 \
|
||||
libfreetype6 libfontconfig1 libx11-6 libxt6 libxext6 libxrender1 \
|
||||
libxtst6 libxi6 libxinerama1 libxkbcommon0 libsm6 libice6 \
|
||||
# Qt/EGL for Calibre CLI
|
||||
libegl1 libgl1 libopengl0 libdbus-1-3 libglib2.0-0 libnss3 \
|
||||
libasound2t64 libxcomposite1 libxrandr2 \
|
||||
# Virtual framebuffer (required for headless LibreOffice Impress/Draw)
|
||||
xvfb x11-utils coreutils \
|
||||
libreoffice-writer-nogui libreoffice-calc-nogui \
|
||||
libreoffice-impress-nogui libreoffice-draw-nogui \
|
||||
libreoffice-base-nogui libreoffice-java-common \
|
||||
; \
|
||||
\
|
||||
# Fix LibreOffice UNO bridge and filter availability
|
||||
libreoffice --version; \
|
||||
soffice --version 2>/dev/null || true; \
|
||||
# Rebuild UNO bridge type database
|
||||
/usr/lib/libreoffice/program/soffice.bin --headless --convert-to pdf /dev/null 2>/dev/null || true; \
|
||||
# Force font cache rebuild and verify filters are available
|
||||
fc-cache -f -v 2>&1 | awk 'NR <= 20'; \
|
||||
\
|
||||
# Cleanup stage.
|
||||
\
|
||||
rm -rf /var/lib/apt/lists/*; \
|
||||
\
|
||||
# Docs / man / info / icons / themes / GUI assets (headless server)
|
||||
rm -rf /usr/share/doc/* /usr/share/man/* /usr/share/info/* \
|
||||
/usr/share/lintian/* /usr/share/linda/* \
|
||||
/usr/share/icons/* /usr/share/themes/* \
|
||||
/usr/share/javascript/* \
|
||||
/usr/share/gtk-3.0/* \
|
||||
/usr/share/fontforge/pixmaps \
|
||||
/usr/share/liblangtag/* \
|
||||
/usr/share/tcltk/* \
|
||||
/usr/share/python-wheels/*; \
|
||||
\
|
||||
# Clean up system locale data (LANG=C.UTF-8 doesn't use them)
|
||||
find /usr/share/locale -mindepth 1 -maxdepth 1 -type d \
|
||||
! -name 'en*' -exec rm -rf {} + 2>/dev/null || true; \
|
||||
rm -rf /usr/share/i18n/locales /usr/share/i18n/charmaps; \
|
||||
\
|
||||
rm -rf /usr/lib/libreoffice/share/gallery \
|
||||
/usr/lib/libreoffice/share/template \
|
||||
/usr/lib/libreoffice/share/wizards \
|
||||
/usr/lib/libreoffice/share/autotext \
|
||||
/usr/lib/libreoffice/help \
|
||||
/usr/lib/libreoffice/share/config/images_*.zip \
|
||||
/usr/lib/libreoffice/share/basic \
|
||||
/usr/lib/libreoffice/share/Scripts \
|
||||
/usr/lib/libreoffice/share/autocorr \
|
||||
/usr/lib/libreoffice/share/classification \
|
||||
/usr/lib/libreoffice/share/wordbook \
|
||||
/usr/lib/libreoffice/share/fingerprint \
|
||||
/usr/lib/libreoffice/share/xdg \
|
||||
/usr/lib/libreoffice/share/numbertext \
|
||||
/usr/lib/libreoffice/share/shell \
|
||||
/usr/lib/libreoffice/share/palette \
|
||||
/usr/lib/libreoffice/share/theme_definitions \
|
||||
/usr/lib/libreoffice/share/xslt \
|
||||
/usr/lib/libreoffice/share/labels \
|
||||
/usr/lib/libreoffice/share/dtd \
|
||||
/usr/lib/libreoffice/share/tipoftheday \
|
||||
/usr/lib/libreoffice/share/toolbarmode \
|
||||
/usr/lib/libreoffice/share/psprint; \
|
||||
\
|
||||
# Preserving soffice.cfg because LibreOffice needs it to load documents.
|
||||
\
|
||||
\
|
||||
\
|
||||
find /usr/lib -name '*.so*' -type f \
|
||||
-not -path '*/jvm/*' \
|
||||
-not -path '*/libreoffice/*' \
|
||||
-exec strip --strip-unneeded {} + 2>/dev/null || true; \
|
||||
\
|
||||
# Preserving ffmpeg codec libs as they are directly linked.
|
||||
\
|
||||
# Remove Mesa/LLVM GPU backends (~179 MB, not needed for headless/offscreen)
|
||||
MULTIARCH_LIBDIR=$(dpkg-architecture -qDEB_HOST_MULTIARCH 2>/dev/null \
|
||||
|| find /usr/lib -maxdepth 1 -type d -name '*-linux-gnu' | head -1); \
|
||||
rm -f \
|
||||
"${MULTIARCH_LIBDIR}"/libLLVM*.so* \
|
||||
"${MULTIARCH_LIBDIR}"/libgallium*.so* \
|
||||
2>/dev/null || true; \
|
||||
\
|
||||
# Python stdlib: remove unused modules (~71 MB)
|
||||
rm -rf /usr/lib/python3.12/test \
|
||||
/usr/lib/python3.12/idlelib \
|
||||
/usr/lib/python3.12/tkinter \
|
||||
/usr/lib/python3.12/lib2to3 \
|
||||
/usr/lib/python3.12/pydoc_data; \
|
||||
\
|
||||
# System Python packages not needed at runtime (~153 MB)
|
||||
rm -rf /usr/lib/python3/dist-packages/scipy \
|
||||
/usr/lib/python3/dist-packages/sympy \
|
||||
/usr/lib/python3/dist-packages/mpmath; \
|
||||
\
|
||||
# Duplicate system packages (superseded by venv versions, ~55 MB)
|
||||
rm -rf /usr/lib/python3/dist-packages/numpy \
|
||||
/usr/lib/python3/dist-packages/fontTools \
|
||||
/usr/lib/python3/dist-packages/PIL; \
|
||||
\
|
||||
# System-wide Python cache cleanup
|
||||
find /usr/lib/python3* -type d -name __pycache__ \
|
||||
-exec rm -rf {} + 2>/dev/null || true; \
|
||||
find /usr/lib/python3* \( -name '*.pyc' -o -name '*.pyi' \) \
|
||||
-delete 2>/dev/null || true; \
|
||||
\
|
||||
# Additional metadata cleanup
|
||||
# FIX: Only remove ImageMagick doc/www, NOT the whole dir (preserves policy.xml/delegates.xml)
|
||||
rm -rf /usr/share/bug /usr/share/lintian /usr/share/linda \
|
||||
/var/lib/dpkg/info/*.md5sums \
|
||||
/var/log/dpkg.log /var/log/apt/* \
|
||||
/usr/local/share/ghostscript/*/doc \
|
||||
/usr/local/share/ghostscript/*/examples \
|
||||
/usr/share/ImageMagick-*/doc \
|
||||
/usr/share/ImageMagick-*/www; \
|
||||
\
|
||||
\
|
||||
# NEW: Tesseract training configs (not needed for OCR, but keep configs/ for hocr/txt output)
|
||||
rm -rf /usr/share/tesseract-ocr/*/tessdata/tessconfigs; \
|
||||
\
|
||||
# Trim CJK fonts to Regular weight only (FIX: Broadened path)
|
||||
find /usr/share/fonts -name '*CJK*' \
|
||||
! -name '*Regular*' -type f -delete 2>/dev/null || true; \
|
||||
\
|
||||
# Misc caches
|
||||
rm -rf /var/cache/fontconfig/* /tmp/*
|
||||
|
||||
# Python virtual environment.
|
||||
COPY --from=python-build /opt/venv /opt/venv
|
||||
|
||||
RUN set -eux; \
|
||||
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
|
||||
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
|
||||
# Verify python libs are accessible
|
||||
/opt/venv/bin/python -c "import cv2; import ocrmypdf; import weasyprint; print('Python libs verified')"; \
|
||||
# Cleanup venv from builder leftovers
|
||||
find /opt/venv -type d -name __pycache__ \
|
||||
-exec rm -rf {} + 2>/dev/null || true; \
|
||||
find /opt/venv \( -name '*.pyc' -o -name '*.pyi' \) -delete 2>/dev/null || true; \
|
||||
rm -rf /opt/venv/lib/python*/site-packages/pip \
|
||||
/opt/venv/lib/python*/site-packages/pip-*.dist-info \
|
||||
/opt/venv/lib/python*/site-packages/setuptools \
|
||||
/opt/venv/lib/python*/site-packages/setuptools-*.dist-info;
|
||||
|
||||
# Calibre and PDF Tools.
|
||||
COPY --link --from=calibre-build /opt/calibre /opt/calibre
|
||||
COPY --link --from=pdf-tools-build /usr/local/bin/qpdf /usr/bin/qpdf
|
||||
COPY --link --from=pdf-tools-build /usr/local/bin/magick /usr/bin/magick
|
||||
COPY --link --from=pdf-tools-build /usr/local/lib/libMagick* /usr/local/lib/
|
||||
COPY --link --from=pdf-tools-build /usr/local/etc/ImageMagick-7 /usr/local/etc/ImageMagick-7
|
||||
COPY --link --from=gs-build /usr/local/bin/gs /usr/local/bin/gs
|
||||
COPY --link --from=gs-build /usr/local/share/ghostscript /usr/local/share/ghostscript
|
||||
|
||||
RUN set -eux; \
|
||||
ldconfig /usr/local/lib; \
|
||||
# Clean pycache that may have been generated during stage-1 verify
|
||||
find /opt/calibre -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true;
|
||||
|
||||
# Non-root user.
|
||||
ARG PUID=1000
|
||||
ARG PGID=1000
|
||||
|
||||
RUN set -eux; \
|
||||
if ! getent group stirlingpdfgroup >/dev/null 2>&1; then \
|
||||
groupadd -g "${PGID}" stirlingpdfgroup 2>/dev/null \
|
||||
|| groupadd stirlingpdfgroup; \
|
||||
fi; \
|
||||
if ! id -u stirlingpdfuser >/dev/null 2>&1; then \
|
||||
useradd -m -u "${PUID}" -g stirlingpdfgroup \
|
||||
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser 2>/dev/null \
|
||||
|| useradd -m -g stirlingpdfgroup \
|
||||
-d /home/stirlingpdfuser -s /bin/bash stirlingpdfuser; \
|
||||
fi; \
|
||||
ln -sf /usr/sbin/gosu /usr/local/bin/su-exec
|
||||
|
||||
# Application files.
|
||||
COPY --link --from=app-build --chown=1000:1000 \
|
||||
/app/app/core/build/libs/*.jar /app.jar
|
||||
COPY --link --from=app-build --chown=1000:1000 \
|
||||
/app/build/libs/restart-helper.jar /restart-helper.jar
|
||||
COPY --link --chown=1000:1000 scripts/ /scripts/
|
||||
|
||||
# Fonts go to system dir — root ownership is correct (world-readable)
|
||||
COPY --link app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/truetype/
|
||||
# Fonts go to system dir, root ownership is correct (world-readable)
|
||||
COPY app/core/src/main/resources/static/fonts/*.ttf /usr/share/fonts/truetype/
|
||||
|
||||
# Permissions and configuration.
|
||||
# Permissions and configuration
|
||||
RUN set -eux; \
|
||||
ln -sf /opt/calibre/ebook-convert /usr/bin/ebook-convert; \
|
||||
ln -sf /opt/venv/bin/unoconvert /usr/local/bin/unoconvert; \
|
||||
ln -sf /opt/venv/bin/unoserver /usr/local/bin/unoserver; \
|
||||
ln -sf /opt/venv/bin/ocrmypdf /usr/local/bin/ocrmypdf; \
|
||||
ln -sf /opt/venv/bin/weasyprint /usr/local/bin/weasyprint; \
|
||||
ln -sf /opt/venv/bin/unoping /usr/local/bin/unoping; \
|
||||
chmod +x /scripts/*; \
|
||||
mkdir -p /configs /logs /customFiles \
|
||||
/pipeline/watchedFolders /pipeline/finishedFolders \
|
||||
/tmp/stirling-pdf/heap_dumps; \
|
||||
# Create symlinks to allow app to find these in /app/
|
||||
mkdir -p /app; \
|
||||
ln -s /logs /app/logs; \
|
||||
ln -s /configs /app/configs; \
|
||||
ln -s /customFiles /app/customFiles; \
|
||||
ln -s /pipeline /app/pipeline; \
|
||||
chown -R stirlingpdfuser:stirlingpdfgroup \
|
||||
/home/stirlingpdfuser /configs /logs /customFiles /pipeline \
|
||||
/tmp/stirling-pdf; \
|
||||
chown -h stirlingpdfuser:stirlingpdfgroup /app/logs /app/configs /app/customFiles /app/pipeline; \
|
||||
chown stirlingpdfuser:stirlingpdfgroup /app; \
|
||||
chmod 1777 /tmp/stirling-pdf; \
|
||||
fc-cache -f; \
|
||||
# NOTE: Project Leyden AOT cache is generated in the background on first boot
|
||||
# by init-without-ocr.sh. The cache is picked up on subsequent boots for
|
||||
# 15-25% faster startup. See: JEP 483 + 514 + 515 (JDK 25).
|
||||
\
|
||||
# Clean Calibre pycache that may have been generated during stage-1 verify
|
||||
find /opt/calibre -type d -name __pycache__ \
|
||||
-exec rm -rf {} + 2>/dev/null || true
|
||||
chmod 750 /tmp/stirling-pdf; \
|
||||
chmod 750 /tmp/stirling-pdf/heap_dumps; \
|
||||
fc-cache -f
|
||||
|
||||
# Environment variables.
|
||||
ARG VERSION_TAG
|
||||
# Write version to a file so it is readable by scripts without env-var inheritance.
|
||||
RUN echo "${VERSION_TAG:-dev}" > /etc/stirling_version
|
||||
|
||||
# Environment variables
|
||||
ENV VERSION_TAG=$VERSION_TAG \
|
||||
STIRLING_AOT_ENABLE="false" \
|
||||
STIRLING_JVM_PROFILE="balanced" \
|
||||
_JVM_OPTS_BALANCED="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=4m -XX:G1PeriodicGCInterval=60000 -XX:+UseStringDeduplication -XX:+UseCompactObjectHeaders -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
|
||||
_JVM_OPTS_PERFORMANCE="-XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/configs/heap_dumps -XX:+UseShenandoahGC -XX:ShenandoahGCMode=generational -XX:+UseCompactObjectHeaders -XX:+UseStringDeduplication -XX:+AlwaysPreTouch -XX:+ExplicitGCInvokesConcurrent -Dspring.threads.virtual.enabled=true -Djava.awt.headless=true" \
|
||||
JAVA_CUSTOM_OPTS="" \
|
||||
HOME=/home/stirlingpdfuser \
|
||||
PUID=${PUID} \
|
||||
PGID=${PGID} \
|
||||
UMASK=022 \
|
||||
FAT_DOCKER=true \
|
||||
INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false \
|
||||
PATH="/opt/venv/bin:${PATH}" \
|
||||
UNO_PATH=/usr/lib/libreoffice/program \
|
||||
LIBREOFFICE_BIN_PATH=/usr/lib/libreoffice/program/soffice.bin \
|
||||
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
|
||||
TMPDIR=/tmp/stirling-pdf \
|
||||
TEMP=/tmp/stirling-pdf \
|
||||
TMP=/tmp/stirling-pdf \
|
||||
QTWEBENGINE_CHROMIUM_FLAGS="--no-sandbox --disable-gpu --disable-software-rasterizer" \
|
||||
DBUS_SESSION_BUS_ADDRESS=/dev/null
|
||||
SAL_TMP=/tmp/stirling-pdf/libre
|
||||
|
||||
# Metadata labels.
|
||||
# Metadata labels
|
||||
LABEL org.opencontainers.image.title="Stirling-PDF Fat" \
|
||||
org.opencontainers.image.description="Fat version with extra fonts for air-gapped environments, includes Calibre, LibreOffice, Tesseract, OCRmyPDF" \
|
||||
org.opencontainers.image.source="https://github.com/Stirling-Tools/Stirling-PDF" \
|
||||
@@ -599,8 +111,8 @@ LABEL org.opencontainers.image.title="Stirling-PDF Fat" \
|
||||
EXPOSE 8080/tcp
|
||||
STOPSIGNAL SIGTERM
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
||||
CMD curl -f http://localhost:8080/api/v1/info/status || exit 1
|
||||
HEALTHCHECK --interval=30s --timeout=15s --start-period=120s --retries=5 \
|
||||
CMD curl -fs --max-time 10 http://localhost:8080/api/v1/info/status || exit 1
|
||||
|
||||
ENTRYPOINT ["tini", "--", "/scripts/init.sh"]
|
||||
CMD []
|
||||
|
||||
@@ -5,8 +5,7 @@
|
||||
FROM gradle:9.3.1-jdk25 AS build
|
||||
|
||||
# Install Node.js and npm for frontend build
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||
&& apt-get install -y --no-install-recommends nodejs \
|
||||
@@ -30,15 +29,13 @@ ENV JDK_JAVA_OPTIONS="--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNN
|
||||
--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
|
||||
--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED"
|
||||
|
||||
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
|
||||
./gradlew dependencies --no-daemon || true
|
||||
RUN ./gradlew dependencies --no-daemon || true
|
||||
|
||||
# Copy entire project
|
||||
COPY . .
|
||||
|
||||
# Build ultra-lite JAR with embedded frontend (minimal features)
|
||||
RUN --mount=type=cache,target=/home/gradle/.gradle/caches \
|
||||
DISABLE_ADDITIONAL_FEATURES=true \
|
||||
RUN DISABLE_ADDITIONAL_FEATURES=true \
|
||||
./gradlew clean build \
|
||||
-PbuildWithFrontend=true \
|
||||
-x spotlessApply -x spotlessCheck -x test -x sonarqube \
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
services:
|
||||
stirling-pdf:
|
||||
container_name: Stirling-PDF-Fat-Disable-Endpoints
|
||||
build:
|
||||
context: ../../..
|
||||
dockerfile: docker/embedded/Dockerfile.fat
|
||||
image: docker.stirlingpdf.com/stirlingtools/stirling-pdf:fat
|
||||
build:
|
||||
context: ../../../
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
services:
|
||||
stirling-pdf:
|
||||
container_name: Stirling-PDF-Security-Fat
|
||||
build:
|
||||
context: ../../..
|
||||
dockerfile: docker/embedded/Dockerfile.fat
|
||||
image: docker.stirlingpdf.com/stirlingtools/stirling-pdf:fat
|
||||
build:
|
||||
context: ../../../
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
services:
|
||||
stirling-pdf:
|
||||
container_name: Stirling-PDF-Ultra-Lite
|
||||
build:
|
||||
context: ../../..
|
||||
dockerfile: docker/embedded/Dockerfile.ultra-lite
|
||||
image: docker.stirlingpdf.com/stirlingtools/stirling-pdf:ultra-lite
|
||||
build:
|
||||
context: ../../../
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
services:
|
||||
stirling-pdf:
|
||||
container_name: Stirling-PDF-Security-Fat-with-login
|
||||
build:
|
||||
context: ../../..
|
||||
dockerfile: docker/embedded/Dockerfile.fat
|
||||
image: docker.stirlingpdf.com/stirlingtools/stirling-pdf:fat
|
||||
build:
|
||||
context: ../../../
|
||||
|
||||
@@ -1,463 +0,0 @@
|
||||
# Stirling-PDF Unified Container
|
||||
|
||||
Single Docker container that can run as **frontend + backend**, **frontend only**, or **backend only** using the `MODE` environment variable.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### MODE=BOTH (Default)
|
||||
Single container with both frontend and backend on port 8080:
|
||||
|
||||
```bash
|
||||
docker run -p 8080:8080 \
|
||||
-e MODE=BOTH \
|
||||
stirlingtools/stirling-pdf:unified
|
||||
```
|
||||
|
||||
Access at: `http://localhost:8080`
|
||||
|
||||
### MODE=FRONTEND
|
||||
Frontend only, connecting to separate backend:
|
||||
|
||||
```bash
|
||||
docker run -p 8080:8080 \
|
||||
-e MODE=FRONTEND \
|
||||
-e VITE_API_BASE_URL=http://backend:8080 \
|
||||
stirlingtools/stirling-pdf:unified
|
||||
```
|
||||
|
||||
### MODE=BACKEND
|
||||
Backend API only:
|
||||
|
||||
```bash
|
||||
docker run -p 8080:8080 \
|
||||
-e MODE=BACKEND \
|
||||
stirlingtools/stirling-pdf:unified
|
||||
```
|
||||
|
||||
Access API at: `http://localhost:8080/api`
|
||||
Swagger UI at: `http://localhost:8080/swagger-ui/index.html`
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### MODE=BOTH (Default)
|
||||
```
|
||||
┌─────────────────────────────────────┐
|
||||
│ Port 8080 (External) │
|
||||
│ ┌───────────────────────────────┐ │
|
||||
│ │ Nginx │ │
|
||||
│ │ • Serves frontend (/) │ │
|
||||
│ │ • Proxies /api/* → backend │ │
|
||||
│ └───────────┬───────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌───────────▼───────────────────┐ │
|
||||
│ │ Backend (Internal 8081) │ │
|
||||
│ │ • Spring Boot │ │
|
||||
│ │ • PDF Processing │ │
|
||||
│ │ • UnoServer │ │
|
||||
│ └───────────────────────────────┘ │
|
||||
└─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### MODE=FRONTEND
|
||||
```
|
||||
┌─────────────────────────────┐ ┌──────────────────┐
|
||||
│ Frontend Container │ │ Backend │
|
||||
│ Port 8080 │ │ (External) │
|
||||
│ ┌───────────────────────┐ │ │ │
|
||||
│ │ Nginx │ │──────▶ :8080/api │
|
||||
│ │ • Serves frontend │ │ │ │
|
||||
│ │ • Proxies to backend │ │ │ │
|
||||
│ └───────────────────────┘ │ └──────────────────┘
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
### MODE=BACKEND
|
||||
```
|
||||
┌─────────────────────────────┐
|
||||
│ Backend Container │
|
||||
│ Port 8080 │
|
||||
│ ┌───────────────────────┐ │
|
||||
│ │ Spring Boot │ │
|
||||
│ │ • API Endpoints │ │
|
||||
│ │ • PDF Processing │ │
|
||||
│ │ • UnoServer │ │
|
||||
│ └───────────────────────┘ │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
|
||||
### MODE Configuration
|
||||
|
||||
| Variable | Values | Default | Description |
|
||||
|----------|--------|---------|-------------|
|
||||
| `MODE` | `BOTH`, `FRONTEND`, `BACKEND` | `BOTH` | Container operation mode |
|
||||
|
||||
### MODE=BOTH Specific
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `BACKEND_INTERNAL_PORT` | `8081` | Internal port for backend when MODE=BOTH |
|
||||
|
||||
### MODE=FRONTEND Specific
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `VITE_API_BASE_URL` | `http://backend:8080` | Backend URL for API proxying |
|
||||
|
||||
### Standard Configuration
|
||||
|
||||
All modes support standard Stirling-PDF environment variables:
|
||||
|
||||
- `DISABLE_ADDITIONAL_FEATURES` - Enable/disable OCR and LibreOffice features
|
||||
- `DOCKER_ENABLE_SECURITY` - Enable authentication
|
||||
- `PUID` / `PGID` - User/Group IDs
|
||||
- `SYSTEM_MAXFILESIZE` - Max upload size (MB)
|
||||
- `TESSERACT_LANGS` - Comma-separated OCR language codes
|
||||
- `JAVA_CUSTOM_OPTS` - Additional JVM options
|
||||
- `PROCESS_EXECUTOR_AUTO_UNO_SERVER` - Overrides `processExecutor.autoUnoServer` (true or false)
|
||||
- `PROCESS_EXECUTOR_SESSION_LIMIT_LIBRE_OFFICE_SESSION_LIMIT` - Overrides `processExecutor.sessionLimit.libreOfficeSessionLimit`
|
||||
- `UNO_SERVER_AUTO` - Legacy alias for `processExecutor.autoUnoServer`
|
||||
- `UNO_SERVER_COUNT` - Legacy alias for `processExecutor.sessionLimit.libreOfficeSessionLimit`
|
||||
- `UNO_SERVER_HEALTH_INTERVAL` - Seconds between unoserver PID checks (default: 30)
|
||||
|
||||
See full configuration docs at: https://docs.stirlingpdf.com
|
||||
|
||||
---
|
||||
|
||||
## Docker Compose Examples
|
||||
|
||||
### Example 1: All-in-One (MODE=BOTH)
|
||||
|
||||
**File:** `docker/compose/docker-compose-unified-both.yml`
|
||||
|
||||
```yaml
|
||||
services:
|
||||
stirling-pdf:
|
||||
image: stirlingtools/stirling-pdf:unified
|
||||
ports:
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- ./data:/usr/share/tessdata:rw
|
||||
- ./config:/configs:rw
|
||||
environment:
|
||||
MODE: BOTH
|
||||
restart: unless-stopped
|
||||
```
|
||||
|
||||
### Example 2: Separate Frontend & Backend
|
||||
|
||||
**File:** `docker/compose/docker-compose-unified-frontend.yml`
|
||||
|
||||
```yaml
|
||||
services:
|
||||
backend:
|
||||
image: stirlingtools/stirling-pdf:unified
|
||||
ports:
|
||||
- "8081:8080"
|
||||
environment:
|
||||
MODE: BACKEND
|
||||
volumes:
|
||||
- ./data:/usr/share/tessdata:rw
|
||||
- ./config:/configs:rw
|
||||
|
||||
frontend:
|
||||
image: stirlingtools/stirling-pdf:unified
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment:
|
||||
MODE: FRONTEND
|
||||
VITE_API_BASE_URL: http://backend:8080
|
||||
depends_on:
|
||||
- backend
|
||||
```
|
||||
|
||||
### Example 3: Backend API Only
|
||||
|
||||
**File:** `docker/compose/docker-compose-unified-backend.yml`
|
||||
|
||||
```yaml
|
||||
services:
|
||||
stirling-pdf-api:
|
||||
image: stirlingtools/stirling-pdf:unified
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment:
|
||||
MODE: BACKEND
|
||||
volumes:
|
||||
- ./data:/usr/share/tessdata:rw
|
||||
- ./config:/configs:rw
|
||||
restart: unless-stopped
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Building the Image
|
||||
|
||||
```bash
|
||||
# From repository root
|
||||
docker build -t stirlingtools/stirling-pdf:unified -f docker/Dockerfile.unified .
|
||||
```
|
||||
|
||||
### Build Arguments
|
||||
|
||||
| Argument | Description |
|
||||
|----------|-------------|
|
||||
| `VERSION_TAG` | Version tag for the image |
|
||||
|
||||
Example:
|
||||
```bash
|
||||
docker build \
|
||||
--build-arg VERSION_TAG=v1.0.0 \
|
||||
-t stirlingtools/stirling-pdf:unified \
|
||||
-f docker/Dockerfile.unified .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### 1. Simple Deployment (MODE=BOTH)
|
||||
- **Best for:** Personal use, small teams, simple deployments
|
||||
- **Pros:** Single container, easy setup, minimal configuration
|
||||
- **Cons:** Frontend and backend scale together
|
||||
|
||||
### 2. Scaled Frontend (MODE=FRONTEND + BACKEND)
|
||||
- **Best for:** High traffic, need to scale frontend independently
|
||||
- **Pros:** Scale frontend containers separately, CDN-friendly
|
||||
- **Example:**
|
||||
```yaml
|
||||
services:
|
||||
backend:
|
||||
image: stirlingtools/stirling-pdf:unified
|
||||
environment:
|
||||
MODE: BACKEND
|
||||
deploy:
|
||||
replicas: 1
|
||||
|
||||
frontend:
|
||||
image: stirlingtools/stirling-pdf:unified
|
||||
environment:
|
||||
MODE: FRONTEND
|
||||
VITE_API_BASE_URL: http://backend:8080
|
||||
deploy:
|
||||
replicas: 5 # Scale frontend independently
|
||||
```
|
||||
|
||||
### 3. API-Only (MODE=BACKEND)
|
||||
- **Best for:** Headless deployments, custom frontends, API integrations
|
||||
- **Pros:** Minimal resources, no nginx overhead
|
||||
- **Example:** Use with external frontend or API consumers
|
||||
|
||||
### 4. Multi-Backend Setup
|
||||
- **Best for:** Load balancing, high availability
|
||||
- **Example:**
|
||||
```yaml
|
||||
services:
|
||||
backend-1:
|
||||
image: stirlingtools/stirling-pdf:unified
|
||||
environment:
|
||||
MODE: BACKEND
|
||||
|
||||
backend-2:
|
||||
image: stirlingtools/stirling-pdf:unified
|
||||
environment:
|
||||
MODE: BACKEND
|
||||
|
||||
frontend:
|
||||
image: stirlingtools/stirling-pdf:unified
|
||||
environment:
|
||||
MODE: FRONTEND
|
||||
VITE_API_BASE_URL: http://load-balancer:8080
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Port Configuration
|
||||
|
||||
All modes use **port 8080** by default:
|
||||
|
||||
- **MODE=BOTH**: Nginx listens on 8080, proxies to backend on internal 8081
|
||||
- **MODE=FRONTEND**: Nginx listens on 8080
|
||||
- **MODE=BACKEND**: Spring Boot listens on 8080
|
||||
|
||||
**Expose port 8080** in all configurations:
|
||||
```yaml
|
||||
ports:
|
||||
- "8080:8080"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Health Checks
|
||||
|
||||
### MODE=BOTH and MODE=BACKEND
|
||||
```yaml
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:8080/api/v1/info/status || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
```
|
||||
|
||||
### MODE=FRONTEND
|
||||
```yaml
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:8080/ || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Check logs
|
||||
```bash
|
||||
docker logs stirling-pdf-container
|
||||
```
|
||||
|
||||
Look for the startup banner:
|
||||
```
|
||||
===================================
|
||||
Stirling-PDF Unified Container
|
||||
MODE: BOTH
|
||||
===================================
|
||||
```
|
||||
|
||||
### Invalid MODE error
|
||||
```
|
||||
ERROR: Invalid MODE 'XYZ'. Must be BOTH, FRONTEND, or BACKEND
|
||||
```
|
||||
**Fix:** Set `MODE` to one of the three valid values.
|
||||
|
||||
### Frontend can't connect to backend (MODE=FRONTEND)
|
||||
**Check:**
|
||||
1. `VITE_API_BASE_URL` points to correct backend URL
|
||||
2. Backend container is running and accessible
|
||||
3. Network connectivity between containers
|
||||
|
||||
### Backend not starting (MODE=BOTH or BACKEND)
|
||||
**Check:**
|
||||
1. Sufficient memory allocated (4GB recommended)
|
||||
2. Java heap size (`JAVA_CUSTOM_OPTS`)
|
||||
3. Volume permissions for `/tmp/stirling-pdf`
|
||||
|
||||
---
|
||||
|
||||
## Migration Guide
|
||||
|
||||
### From Separate Containers → MODE=BOTH
|
||||
|
||||
**Before:**
|
||||
```yaml
|
||||
services:
|
||||
frontend:
|
||||
image: stirlingtools/stirling-pdf:frontend
|
||||
ports: ["80:80"]
|
||||
|
||||
backend:
|
||||
image: stirlingtools/stirling-pdf:backend
|
||||
ports: ["8080:8080"]
|
||||
```
|
||||
|
||||
**After:**
|
||||
```yaml
|
||||
services:
|
||||
stirling-pdf:
|
||||
image: stirlingtools/stirling-pdf:unified
|
||||
ports: ["8080:8080"]
|
||||
environment:
|
||||
MODE: BOTH
|
||||
```
|
||||
|
||||
### From Legacy → MODE=BACKEND
|
||||
```yaml
|
||||
services:
|
||||
stirling-pdf:
|
||||
image: stirlingtools/stirling-pdf:latest
|
||||
ports: ["8080:8080"]
|
||||
```
|
||||
|
||||
**Becomes:**
|
||||
```yaml
|
||||
services:
|
||||
stirling-pdf:
|
||||
image: stirlingtools/stirling-pdf:unified
|
||||
ports: ["8080:8080"]
|
||||
environment:
|
||||
MODE: BACKEND
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Tuning
|
||||
|
||||
### MODE=BOTH
|
||||
```yaml
|
||||
environment:
|
||||
JAVA_CUSTOM_OPTS: "-Xmx4g -XX:MaxRAMPercentage=75"
|
||||
BACKEND_INTERNAL_PORT: 8081
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 4G
|
||||
reservations:
|
||||
memory: 2G
|
||||
```
|
||||
|
||||
### MODE=FRONTEND (Lightweight)
|
||||
```yaml
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
reservations:
|
||||
memory: 256M
|
||||
```
|
||||
|
||||
### MODE=BACKEND (Heavy Processing)
|
||||
```yaml
|
||||
environment:
|
||||
JAVA_CUSTOM_OPTS: "-Xmx8g"
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 10G
|
||||
reservations:
|
||||
memory: 4G
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Security Considerations
|
||||
|
||||
1. **MODE=BOTH**: Backend not exposed externally (runs on internal port)
|
||||
2. **MODE=BACKEND**: API exposed directly - consider API authentication
|
||||
3. **MODE=FRONTEND**: Only serves static files - minimal attack surface
|
||||
|
||||
Enable security features:
|
||||
```yaml
|
||||
environment:
|
||||
DOCKER_ENABLE_SECURITY: "true"
|
||||
SECURITY_ENABLELOGIN: "true"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Support
|
||||
|
||||
- Documentation: https://docs.stirlingpdf.com
|
||||
- GitHub Issues: https://github.com/Stirling-Tools/Stirling-PDF/issues
|
||||
- Docker Hub: https://hub.docker.com/r/stirlingtools/stirling-pdf
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
MIT License - See repository for full details
|
||||
@@ -1,38 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Build script for Stirling-PDF Unified Container
|
||||
# Usage: ./build.sh [version-tag]
|
||||
|
||||
set -e
|
||||
|
||||
VERSION_TAG=${1:-latest}
|
||||
IMAGE_NAME="stirlingtools/stirling-pdf:unified-${VERSION_TAG}"
|
||||
|
||||
echo "==================================="
|
||||
echo "Building Stirling-PDF Unified Container"
|
||||
echo "Version: $VERSION_TAG"
|
||||
echo "Image: $IMAGE_NAME"
|
||||
echo "==================================="
|
||||
|
||||
# Navigate to repository root (assuming script is in docker/unified/)
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
REPO_ROOT="$SCRIPT_DIR/../.."
|
||||
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
# Build the image
|
||||
docker build \
|
||||
--build-arg VERSION_TAG="$VERSION_TAG" \
|
||||
-t "$IMAGE_NAME" \
|
||||
-f docker/Dockerfile.unified \
|
||||
.
|
||||
|
||||
echo "==================================="
|
||||
echo "✓ Build complete!"
|
||||
echo "Image: $IMAGE_NAME"
|
||||
echo ""
|
||||
echo "Test the image:"
|
||||
echo " MODE=BOTH: docker run -p 8080:8080 -e MODE=BOTH $IMAGE_NAME"
|
||||
echo " MODE=FRONTEND: docker run -p 8080:8080 -e MODE=FRONTEND $IMAGE_NAME"
|
||||
echo " MODE=BACKEND: docker run -p 8080:8080 -e MODE=BACKEND $IMAGE_NAME"
|
||||
echo "==================================="
|
||||
@@ -1,379 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# Default MODE to BOTH if not set
|
||||
MODE=${MODE:-BOTH}
|
||||
|
||||
echo "==================================="
|
||||
echo "Stirling-PDF Unified Container"
|
||||
echo "MODE: $MODE"
|
||||
echo "==================================="
|
||||
|
||||
# Function to setup OCR (from init.sh)
|
||||
setup_ocr() {
|
||||
echo "Setting up OCR languages..."
|
||||
|
||||
# In Alpine, tesseract uses /usr/share/tessdata
|
||||
TESSDATA_DIR="/usr/share/tessdata"
|
||||
|
||||
# Create tessdata directory
|
||||
mkdir -p "$TESSDATA_DIR"
|
||||
|
||||
# Restore system languages from backup (Dockerfile moved them to tessdata-original)
|
||||
if [ -d /usr/share/tessdata-original ]; then
|
||||
echo "Restoring system tessdata from backup..."
|
||||
cp -rn /usr/share/tessdata-original/* "$TESSDATA_DIR"/ 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Note: If user mounted custom languages to /usr/share/tessdata, they'll be overlaid here.
|
||||
# The cp -rn above won't overwrite user files, just adds missing system files.
|
||||
|
||||
# Install additional languages if specified
|
||||
if [ -n "$TESSERACT_LANGS" ]; then
|
||||
SPACE_SEPARATED_LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')
|
||||
for LANG in $SPACE_SEPARATED_LANGS; do
|
||||
case "$LANG" in
|
||||
[a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z][a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z]_[a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z][a-zA-Z]_[a-zA-Z][a-zA-Z][a-zA-Z]|[a-zA-Z][a-zA-Z][a-zA-Z][a-zA-Z]_[a-zA-Z][a-zA-Z][a-zA-Z][a-zA-Z])
|
||||
apk add --no-cache "tesseract-ocr-data-$LANG" 2>/dev/null || true
|
||||
;;
|
||||
esac
|
||||
done
|
||||
fi
|
||||
|
||||
# Point to the consolidated location
|
||||
export TESSDATA_PREFIX="$TESSDATA_DIR"
|
||||
echo "Using TESSDATA_PREFIX=$TESSDATA_PREFIX"
|
||||
}
|
||||
|
||||
# Function to setup user permissions (from init-without-ocr.sh)
|
||||
setup_permissions() {
|
||||
echo "Setting up user permissions..."
|
||||
|
||||
export JAVA_TOOL_OPTIONS="${JAVA_BASE_OPTS} ${JAVA_CUSTOM_OPTS}"
|
||||
|
||||
# Update user and group IDs
|
||||
if [ ! -z "$PUID" ] && [ "$PUID" != "$(id -u stirlingpdfuser)" ]; then
|
||||
usermod -o -u "$PUID" stirlingpdfuser || true
|
||||
fi
|
||||
|
||||
if [ ! -z "$PGID" ] && [ "$PGID" != "$(getent group stirlingpdfgroup | cut -d: -f3)" ]; then
|
||||
groupmod -o -g "$PGID" stirlingpdfgroup || true
|
||||
fi
|
||||
|
||||
umask "$UMASK" || true
|
||||
|
||||
# Install fonts if needed
|
||||
if [[ -n "$LANGS" ]]; then
|
||||
/scripts/installFonts.sh $LANGS
|
||||
fi
|
||||
|
||||
# Ensure directories exist with correct permissions
|
||||
mkdir -p /tmp/stirling-pdf || true
|
||||
|
||||
# Set ownership and permissions
|
||||
chown -R stirlingpdfuser:stirlingpdfgroup \
|
||||
$HOME /logs /scripts /usr/share/fonts/opentype/noto \
|
||||
/configs /customFiles /pipeline /tmp/stirling-pdf \
|
||||
/var/lib/nginx /var/log/nginx /usr/share/nginx \
|
||||
/app.jar 2>/dev/null || echo "[WARN] Some chown operations failed, may run as host user"
|
||||
|
||||
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto \
|
||||
/configs /customFiles /pipeline /tmp/stirling-pdf 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Function to configure nginx
|
||||
configure_nginx() {
|
||||
local backend_url=$1
|
||||
echo "Configuring nginx with backend URL: $backend_url"
|
||||
sed -i "s|\${BACKEND_URL}|${backend_url}|g" /etc/nginx/nginx.conf
|
||||
}
|
||||
|
||||
# Function to run as user or root depending on permissions
|
||||
run_as_user() {
|
||||
if [ "$(id -u)" = "0" ]; then
|
||||
# Running as root, use su-exec
|
||||
su-exec stirlingpdfuser "$@"
|
||||
else
|
||||
# Already running as non-root
|
||||
exec "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
run_with_timeout() {
|
||||
local secs=$1; shift
|
||||
if command -v timeout >/dev/null 2>&1; then
|
||||
timeout "${secs}s" "$@"
|
||||
else
|
||||
"$@"
|
||||
fi
|
||||
}
|
||||
|
||||
run_as_user_with_timeout() {
|
||||
local secs=$1; shift
|
||||
if command -v timeout >/dev/null 2>&1; then
|
||||
run_as_user timeout "${secs}s" "$@"
|
||||
else
|
||||
run_as_user "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
tcp_port_check() {
|
||||
local host=$1
|
||||
local port=$2
|
||||
local timeout_secs=${3:-5}
|
||||
|
||||
# Try nc first (most portable)
|
||||
if command -v nc >/dev/null 2>&1; then
|
||||
run_with_timeout "$timeout_secs" nc -z "$host" "$port" 2>/dev/null
|
||||
return $?
|
||||
fi
|
||||
|
||||
# Fallback to /dev/tcp (bash-specific)
|
||||
if [ -n "${BASH_VERSION:-}" ] && command -v bash >/dev/null 2>&1; then
|
||||
run_with_timeout "$timeout_secs" bash -c "exec 3<>/dev/tcp/${host}/${port}" 2>/dev/null
|
||||
local result=$?
|
||||
exec 3>&- 2>/dev/null || true
|
||||
return $result
|
||||
fi
|
||||
|
||||
# No TCP check method available
|
||||
return 2
|
||||
}
|
||||
|
||||
CONFIG_FILE=${CONFIG_FILE:-/configs/settings.yml}
|
||||
UNOSERVER_PIDS=()
|
||||
UNOSERVER_PORTS=()
|
||||
UNOSERVER_UNO_PORTS=()
|
||||
|
||||
read_setting_value() {
|
||||
local key=$1
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
return
|
||||
fi
|
||||
awk -F: -v key="$key" '
|
||||
$1 ~ "^[[:space:]]*"key"[[:space:]]*$" {
|
||||
val=$2
|
||||
sub(/#.*/, "", val)
|
||||
gsub(/^[[:space:]]+|[[:space:]]+$/, "", val)
|
||||
gsub(/^["'"'"']|["'"'"']$/, "", val)
|
||||
print val
|
||||
exit
|
||||
}
|
||||
' "$CONFIG_FILE"
|
||||
}
|
||||
|
||||
get_unoserver_auto() {
|
||||
if [ -n "${PROCESS_EXECUTOR_AUTO_UNO_SERVER:-}" ]; then
|
||||
echo "$PROCESS_EXECUTOR_AUTO_UNO_SERVER"
|
||||
return
|
||||
fi
|
||||
if [ -n "${UNO_SERVER_AUTO:-}" ]; then
|
||||
echo "$UNO_SERVER_AUTO"
|
||||
return
|
||||
fi
|
||||
read_setting_value "autoUnoServer"
|
||||
}
|
||||
|
||||
get_unoserver_count() {
|
||||
if [ -n "${PROCESS_EXECUTOR_SESSION_LIMIT_LIBRE_OFFICE_SESSION_LIMIT:-}" ]; then
|
||||
echo "$PROCESS_EXECUTOR_SESSION_LIMIT_LIBRE_OFFICE_SESSION_LIMIT"
|
||||
return
|
||||
fi
|
||||
if [ -n "${UNO_SERVER_COUNT:-}" ]; then
|
||||
echo "$UNO_SERVER_COUNT"
|
||||
return
|
||||
fi
|
||||
read_setting_value "libreOfficeSessionLimit"
|
||||
}
|
||||
|
||||
start_unoserver_instance() {
|
||||
local port=$1
|
||||
local uno_port=$2
|
||||
run_as_user /opt/venv/bin/unoserver --port "$port" --interface 127.0.0.1 --uno-port "$uno_port" &
|
||||
LAST_UNOSERVER_PID=$!
|
||||
}
|
||||
|
||||
start_unoserver_watchdog() {
|
||||
local interval=${UNO_SERVER_HEALTH_INTERVAL:-30}
|
||||
case "$interval" in
|
||||
''|*[!0-9]*) interval=30 ;;
|
||||
esac
|
||||
(
|
||||
while true; do
|
||||
local i=0
|
||||
while [ "$i" -lt "${#UNOSERVER_PIDS[@]}" ]; do
|
||||
local pid=${UNOSERVER_PIDS[$i]}
|
||||
local port=${UNOSERVER_PORTS[$i]}
|
||||
local uno_port=${UNOSERVER_UNO_PORTS[$i]}
|
||||
local needs_restart=false
|
||||
|
||||
# Check 1: PID exists
|
||||
if [ -z "$pid" ] || ! kill -0 "$pid" 2>/dev/null; then
|
||||
echo "unoserver PID ${pid} not found for port ${port}"
|
||||
needs_restart=true
|
||||
else
|
||||
# PID exists, now check if server is actually healthy
|
||||
local health_ok=false
|
||||
|
||||
# Check 2A: Health check with unoping (best - checks actual server health)
|
||||
if command -v unoping >/dev/null 2>&1; then
|
||||
if run_as_user_with_timeout 5 unoping --host 127.0.0.1 --port "$port" >/dev/null 2>&1; then
|
||||
health_ok=true
|
||||
else
|
||||
echo "unoserver health check failed (unoping) for port ${port}, trying TCP fallback"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check 2B: Fallback to TCP port check (verifies service is listening)
|
||||
if [ "$health_ok" = false ]; then
|
||||
tcp_port_check "127.0.0.1" "$port" 5
|
||||
local tcp_rc=$?
|
||||
if [ $tcp_rc -eq 0 ]; then
|
||||
health_ok=true
|
||||
elif [ $tcp_rc -eq 2 ]; then
|
||||
echo "No TCP check available; falling back to PID-only for port ${port}"
|
||||
health_ok=true
|
||||
else
|
||||
echo "unoserver TCP check failed for port ${port}"
|
||||
needs_restart=true
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$needs_restart" = true ]; then
|
||||
echo "Restarting unoserver on 127.0.0.1:${port} (uno-port ${uno_port})"
|
||||
# Kill the old process if it exists
|
||||
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
|
||||
kill -TERM "$pid" 2>/dev/null || true
|
||||
sleep 1
|
||||
kill -KILL "$pid" 2>/dev/null || true
|
||||
fi
|
||||
start_unoserver_instance "$port" "$uno_port"
|
||||
UNOSERVER_PIDS[$i]=$LAST_UNOSERVER_PID
|
||||
fi
|
||||
i=$((i + 1))
|
||||
done
|
||||
sleep "$interval"
|
||||
done
|
||||
) &
|
||||
}
|
||||
|
||||
start_unoserver_pool() {
|
||||
local auto
|
||||
auto="$(get_unoserver_auto)"
|
||||
auto="${auto,,}"
|
||||
if [ -z "$auto" ]; then
|
||||
auto="true"
|
||||
fi
|
||||
if [ "$auto" != "true" ]; then
|
||||
echo "Skipping local unoserver pool (autoUnoServer=$auto)"
|
||||
return
|
||||
fi
|
||||
|
||||
local count
|
||||
count="$(get_unoserver_count)"
|
||||
case "$count" in
|
||||
''|*[!0-9]*) count=1 ;;
|
||||
esac
|
||||
if [ "$count" -le 0 ]; then
|
||||
count=1
|
||||
fi
|
||||
|
||||
local i=0
|
||||
while [ "$i" -lt "$count" ]; do
|
||||
local port=$((2003 + (i * 2)))
|
||||
local uno_port=$((2004 + (i * 2)))
|
||||
echo "Starting unoserver on 127.0.0.1:${port} (uno-port ${uno_port})"
|
||||
UNOSERVER_PORTS+=("$port")
|
||||
UNOSERVER_UNO_PORTS+=("$uno_port")
|
||||
start_unoserver_instance "$port" "$uno_port"
|
||||
UNOSERVER_PIDS+=("$LAST_UNOSERVER_PID")
|
||||
i=$((i + 1))
|
||||
done
|
||||
|
||||
start_unoserver_watchdog
|
||||
}
|
||||
|
||||
# Setup OCR and permissions
|
||||
setup_ocr
|
||||
setup_permissions
|
||||
|
||||
# Handle different modes
|
||||
case "$MODE" in
|
||||
BOTH)
|
||||
echo "Starting in BOTH mode: Frontend + Backend on port 8080"
|
||||
|
||||
# Configure nginx to proxy to internal backend
|
||||
configure_nginx "http://localhost:${BACKEND_INTERNAL_PORT:-8081}"
|
||||
|
||||
# Start backend on internal port
|
||||
echo "Starting backend on port ${BACKEND_INTERNAL_PORT:-8081}..."
|
||||
run_as_user sh -c "java -Dfile.encoding=UTF-8 \
|
||||
-Djava.io.tmpdir=/tmp/stirling-pdf \
|
||||
-Dserver.port=${BACKEND_INTERNAL_PORT:-8081} \
|
||||
-jar /app.jar" &
|
||||
BACKEND_PID=$!
|
||||
|
||||
# Start unoserver pool for document conversion
|
||||
start_unoserver_pool
|
||||
|
||||
# Wait for backend to start
|
||||
sleep 3
|
||||
|
||||
# Start nginx on port 8080
|
||||
echo "Starting nginx on port 8080..."
|
||||
run_as_user nginx -g "daemon off;" &
|
||||
NGINX_PID=$!
|
||||
|
||||
echo "==================================="
|
||||
echo "✓ Frontend available at: http://localhost:8080"
|
||||
echo "✓ Backend API at: http://localhost:8080/api"
|
||||
echo "✓ Backend running internally on port ${BACKEND_INTERNAL_PORT:-8081}"
|
||||
echo "==================================="
|
||||
;;
|
||||
|
||||
FRONTEND)
|
||||
echo "Starting in FRONTEND mode: Frontend only on port 8080"
|
||||
|
||||
# Configure nginx with external backend URL
|
||||
BACKEND_URL=${VITE_API_BASE_URL:-http://backend:8080}
|
||||
configure_nginx "$BACKEND_URL"
|
||||
|
||||
# Start nginx on port 8080
|
||||
echo "Starting nginx on port 8080..."
|
||||
run_as_user nginx -g "daemon off;" &
|
||||
NGINX_PID=$!
|
||||
|
||||
echo "==================================="
|
||||
echo "✓ Frontend available at: http://localhost:8080"
|
||||
echo "✓ Proxying API calls to: $BACKEND_URL"
|
||||
echo "==================================="
|
||||
;;
|
||||
|
||||
BACKEND)
|
||||
echo "Starting in BACKEND mode: Backend only on port 8080"
|
||||
|
||||
# Start backend on port 8080
|
||||
echo "Starting backend on port 8080..."
|
||||
run_as_user sh -c "java -Dfile.encoding=UTF-8 \
|
||||
-Djava.io.tmpdir=/tmp/stirling-pdf \
|
||||
-Dserver.port=8080 \
|
||||
-jar /app.jar" &
|
||||
BACKEND_PID=$!
|
||||
start_unoserver_pool
|
||||
|
||||
echo "==================================="
|
||||
echo "✓ Backend API available at: http://localhost:8080/api"
|
||||
echo "✓ Swagger UI at: http://localhost:8080/swagger-ui/index.html"
|
||||
echo "==================================="
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "ERROR: Invalid MODE '$MODE'. Must be BOTH, FRONTEND, or BACKEND"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# Wait for all background processes
|
||||
wait
|
||||
@@ -1,121 +0,0 @@
|
||||
# Run nginx as non-root user
|
||||
pid /tmp/nginx.pid;
|
||||
|
||||
events {
|
||||
worker_connections 1024;
|
||||
}
|
||||
|
||||
http {
|
||||
include /etc/nginx/mime.types;
|
||||
default_type application/octet-stream;
|
||||
|
||||
# Add .mjs MIME type mapping
|
||||
types {
|
||||
text/javascript mjs;
|
||||
}
|
||||
|
||||
# Gzip compression
|
||||
gzip on;
|
||||
gzip_vary on;
|
||||
gzip_min_length 1024;
|
||||
gzip_types text/plain text/css text/xml text/javascript application/javascript application/xml+rss application/json;
|
||||
|
||||
server {
|
||||
listen 8080;
|
||||
server_name _;
|
||||
root /usr/share/nginx/html;
|
||||
index index.html index.htm;
|
||||
|
||||
# Global settings for file uploads
|
||||
client_max_body_size 100m;
|
||||
|
||||
# Handle client-side routing - support subpaths
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
# Proxy API calls to backend
|
||||
location /api/ {
|
||||
proxy_pass ${BACKEND_URL}/api/;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header X-Forwarded-Port $server_port;
|
||||
|
||||
# Additional headers for proper API proxying
|
||||
proxy_set_header Connection '';
|
||||
proxy_http_version 1.1;
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
|
||||
# Timeout settings for large file uploads
|
||||
proxy_connect_timeout 60s;
|
||||
proxy_send_timeout 60s;
|
||||
proxy_read_timeout 60s;
|
||||
|
||||
# Request size limits for file uploads
|
||||
client_max_body_size 100m;
|
||||
proxy_request_buffering off;
|
||||
}
|
||||
|
||||
# Proxy Swagger UI to backend (including versioned paths)
|
||||
location ~ ^/swagger-ui(.*)$ {
|
||||
proxy_pass ${BACKEND_URL}/swagger-ui$1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header X-Forwarded-Port $server_port;
|
||||
|
||||
proxy_set_header Connection '';
|
||||
proxy_http_version 1.1;
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
}
|
||||
|
||||
# Proxy API docs to backend (with query parameters and sub-paths)
|
||||
location ~ ^/v3/api-docs(.*)$ {
|
||||
proxy_pass ${BACKEND_URL}/v3/api-docs$1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header X-Forwarded-Port $server_port;
|
||||
}
|
||||
|
||||
# Proxy v1 API docs to backend (with query parameters and sub-paths)
|
||||
location ~ ^/v1/api-docs(.*)$ {
|
||||
proxy_pass ${BACKEND_URL}/v1/api-docs$1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header X-Forwarded-Port $server_port;
|
||||
}
|
||||
|
||||
# Serve .mjs files with correct MIME type (must come before general static assets)
|
||||
location ~* \.mjs$ {
|
||||
try_files $uri =404;
|
||||
add_header Content-Type "text/javascript; charset=utf-8" always;
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
# Cache static assets (but not API endpoints)
|
||||
location ~* ^(?!/api/).*\.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
# Security headers
|
||||
add_header X-Frame-Options "SAMEORIGIN" always;
|
||||
add_header X-Content-Type-Options "nosniff" always;
|
||||
add_header X-XSS-Protection "1; mode=block" always;
|
||||
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
|
||||
}
|
||||
}
|
||||
@@ -16,30 +16,30 @@ security:
|
||||
loginResetTimeMinutes: 120 # lock account for 2 hours after x attempts
|
||||
loginMethod: all # Accepts values like 'all' and 'normal'(only Login with Username/Password), 'oauth2'(only Login with OAuth2) or 'saml2'(only Login with SAML2)
|
||||
initialLogin:
|
||||
username: '' # initial username for the first login
|
||||
password: '' # initial password for the first login
|
||||
username: "" # initial username for the first login
|
||||
password: "" # initial password for the first login
|
||||
oauth2:
|
||||
enabled: false # set to 'true' to enable login (Note: enableLogin must also be 'true' for this to work)
|
||||
client:
|
||||
keycloak:
|
||||
issuer: '' # URL of the Keycloak realm's OpenID Connect Discovery endpoint
|
||||
clientId: '' # client ID for Keycloak OAuth2
|
||||
clientSecret: '' # client secret for Keycloak OAuth2
|
||||
issuer: "" # URL of the Keycloak realm's OpenID Connect Discovery endpoint
|
||||
clientId: "" # client ID for Keycloak OAuth2
|
||||
clientSecret: "" # client secret for Keycloak OAuth2
|
||||
scopes: openid, profile, email # scopes for Keycloak OAuth2
|
||||
useAsUsername: preferred_username # field to use as the username for Keycloak OAuth2. Available options are: [email | name | given_name | family_name | preferred_name]
|
||||
google:
|
||||
clientId: '' # client ID for Google OAuth2
|
||||
clientSecret: '' # client secret for Google OAuth2
|
||||
clientId: "" # client ID for Google OAuth2
|
||||
clientSecret: "" # client secret for Google OAuth2
|
||||
scopes: email, profile # scopes for Google OAuth2
|
||||
useAsUsername: email # field to use as the username for Google OAuth2. Available options are: [email | name | given_name | family_name]
|
||||
github:
|
||||
clientId: '' # client ID for GitHub OAuth2
|
||||
clientSecret: '' # client secret for GitHub OAuth2
|
||||
clientId: "" # client ID for GitHub OAuth2
|
||||
clientSecret: "" # client secret for GitHub OAuth2
|
||||
scopes: read:user # scope for GitHub OAuth2
|
||||
useAsUsername: login # field to use as the username for GitHub OAuth2. Available options are: [email | login | name]
|
||||
issuer: '' # set to any Provider that supports OpenID Connect Discovery (/.well-known/openid-configuration) endpoint
|
||||
clientId: '' # client ID from your Provider
|
||||
clientSecret: '' # client secret from your Provider
|
||||
issuer: "" # set to any Provider that supports OpenID Connect Discovery (/.well-known/openid-configuration) endpoint
|
||||
clientId: "" # client ID from your Provider
|
||||
clientSecret: "" # client secret from your Provider
|
||||
autoCreateUser: true # set to 'true' to allow auto-creation of non-existing users
|
||||
blockRegistration: false # set to 'true' to deny login with SSO without prior registration by an admin
|
||||
useAsUsername: email # default is 'email'; custom fields can be used as the username
|
||||
@@ -47,21 +47,27 @@ security:
|
||||
provider: google # set this to your OAuth Provider's name, e.g., 'google' or 'keycloak'
|
||||
saml2:
|
||||
enabled: false # Only enabled for paid enterprise clients (enterpriseEdition.enabled must be true)
|
||||
provider: '' # The name of your Provider
|
||||
provider: "" # The name of your Provider
|
||||
autoCreateUser: true # set to 'true' to allow auto-creation of non-existing users
|
||||
blockRegistration: false # set to 'true' to deny login with SSO without prior registration by an admin
|
||||
registrationId: stirling # The name of your Service Provider (SP) app name. Should match the name in the path for your SSO & SLO URLs
|
||||
idpMetadataUri: https://dev-XXXXXXXX.okta.com/app/externalKey/sso/saml/metadata # The uri for your Provider's metadata
|
||||
idpSingleLoginUrl: https://dev-XXXXXXXX.okta.com/app/dev-XXXXXXXX_stirlingpdf_1/externalKey/sso/saml # The URL for initiating SSO. Provided by your Provider
|
||||
idpSingleLogoutUrl: https://dev-XXXXXXXX.okta.com/app/dev-XXXXXXXX_stirlingpdf_1/externalKey/slo/saml # The URL for initiating SLO. Provided by your Provider
|
||||
idpIssuer: '' # The ID of your Provider
|
||||
idpIssuer: "" # The ID of your Provider
|
||||
idpCert: classpath:okta.cert # The certificate your Provider will use to authenticate your app's SAML authentication requests. Provided by your Provider
|
||||
privateKey: classpath:saml-private-key.key # Your private key. Generated from your keypair
|
||||
spCert: classpath:saml-public-cert.crt # Your signing certificate. Generated from your keypair
|
||||
# IMPORTANT: For SAML setup, download your SP metadata from the BACKEND URL: http://localhost:8080/saml2/service-provider-metadata/{registrationId}
|
||||
# Do NOT use the frontend dev server URL (localhost:5173) as it will generate incorrect ACS URLs. Always use the backend URL (localhost:8080) for SAML configuration.
|
||||
jwt: # This feature is currently under development and not yet fully supported. Do not use in production.
|
||||
persistence: true # Set to 'true' to enable JWT key store
|
||||
enableKeyRotation: true # Set to 'true' to enable key pair rotation
|
||||
enableKeyCleanup: true # Set to 'true' to enable key pair cleanup
|
||||
tokenExpiryMinutes: 1440 # JWT access token lifetime in minutes for web clients (1 day).
|
||||
desktopTokenExpiryMinutes: 43200 # JWT access token lifetime in minutes for desktop clients (30 days).
|
||||
allowedClockSkewSeconds: 60 # Allowed JWT validation clock skew in seconds to tolerate small client/server time drift.
|
||||
refreshGraceMinutes: 15 # Allow refresh using an expired access token only within this many minutes after expiry.
|
||||
validation: # PDF signature validation settings
|
||||
trust:
|
||||
serverAsAnchor: true # Trust server certificate as anchor for PDF signatures (if configured and self-signed or CA)
|
||||
@@ -78,6 +84,7 @@ security:
|
||||
revocation:
|
||||
mode: none # Revocation checking mode: 'none' (disabled), 'ocsp' (OCSP only), 'crl' (CRL only), 'ocsp+crl' (OCSP with CRL fallback)
|
||||
hardFail: false # Fail validation if revocation status cannot be determined (true=strict, false=soft-fail)
|
||||
xFrameOptions: DENY # X-Frame-Options header value. Options: 'DENY' (default, prevents all framing), 'SAMEORIGIN' (allows framing from same domain), 'DISABLED' (no X-Frame-Options header sent). Note: automatically set to DISABLED when login is disabled
|
||||
|
||||
premium:
|
||||
key: 00000000-0000-0000-0000-000000000000
|
||||
@@ -89,11 +96,19 @@ premium:
|
||||
author: username
|
||||
creator: Stirling-PDF
|
||||
producer: Stirling-PDF
|
||||
googleDrive:
|
||||
enabled: false # Enable Google Drive file picker integration
|
||||
clientId: "" # Google OAuth 2.0 client ID (obtain from Google Cloud Console)
|
||||
apiKey: "" # Google API key for Google Picker API (obtain from Google Cloud Console)
|
||||
appId: "" # Google Drive app ID
|
||||
enterpriseFeatures:
|
||||
audit:
|
||||
enabled: true # Enable audit logging
|
||||
level: 2 # Audit logging level: 0=OFF, 1=BASIC, 2=STANDARD, 3=VERBOSE
|
||||
retentionDays: 90 # Number of days to retain audit logs
|
||||
enabled: true # Enable audit logging for security and compliance tracking
|
||||
level: 2 # Audit logging level: 0=OFF, 1=BASIC (compress/split/merge/etc and settings), 2=STANDARD (BASIC + user actions, excludes polling), 3=VERBOSE (everything including polling).
|
||||
retentionDays: 90 # Number of days to retain audit logs (0 or negative = infinite retention)
|
||||
captureFileHash: false # Capture SHA-256 hash of uploaded/processed files. Warning: adds 50-200ms per file depending on size. Only enabled independently of audit level.
|
||||
capturePdfAuthor: false # Capture author metadata from PDF documents. Warning: requires PDF parsing which increases processing time. Only enabled independently of audit level.
|
||||
captureOperationResults: false # Capture operation return values and responses in audit log. Warning: not recommended, significantly increases log volume and disk usage. Use only for debugging.
|
||||
databaseNotifications:
|
||||
backups:
|
||||
successful: false # set to 'true' to enable email notifications for successful database backups
|
||||
@@ -107,21 +122,45 @@ mail:
|
||||
enableInvites: false # set to 'true' to enable email invites for user management (requires mail.enabled and security.enableLogin)
|
||||
host: smtp.example.com # SMTP server hostname
|
||||
port: 587 # SMTP server port
|
||||
username: '' # SMTP server username
|
||||
password: '' # SMTP server password
|
||||
from: '' # sender email address
|
||||
username: "" # SMTP server username
|
||||
password: "" # SMTP server password
|
||||
from: "" # sender email address
|
||||
startTlsEnable: true # enable STARTTLS (explicit TLS upgrade after connecting) when supported by the SMTP server
|
||||
startTlsRequired: false # require STARTTLS; connection fails if the upgrade command is not supported
|
||||
sslEnable: false # enable SSL/TLS wrapper for implicit TLS (typically used with port 465)
|
||||
sslTrust: '' # optional trusted host override, e.g. "smtp.example.com" or "*"; defaults to "*" (trust all) when empty
|
||||
sslTrust: "" # optional trusted host override, e.g. "smtp.example.com" or "*"; defaults to "*" (trust all) when empty
|
||||
sslCheckServerIdentity: false # enable hostname verification when using SSL/TLS
|
||||
|
||||
telegram:
|
||||
enabled: false # set to 'true' to enable Telegram bot integration
|
||||
botToken: "" # Telegram bot token obtained from BotFather
|
||||
botUsername: "" # Telegram bot username (without @)
|
||||
pipelineInboxFolder: telegram # Name of the pipeline inbox folder for Telegram uploads
|
||||
customFolderSuffix: true # set to 'true' to allow users to specify custom target folders via UserID
|
||||
enableAllowUserIDs: true # set to 'true' to restrict access to specific Telegram user IDs
|
||||
allowUserIDs: [] # List of allowed Telegram user IDs (e.g. [123456789, 987654321]). Leave empty to allow all users.
|
||||
enableAllowChannelIDs: true # set to 'true' to restrict access to specific Telegram channel IDs
|
||||
allowChannelIDs: [] # List of allowed Telegram channel IDs (e.g. [-1001234567890, -1009876543210]). Leave empty to allow all channels.
|
||||
processingTimeoutSeconds: 180 # Maximum time in seconds to wait for processing a Telegram request
|
||||
pollingIntervalMillis: 2000 # Interval in milliseconds between polling for new messages
|
||||
feedback:
|
||||
channel:
|
||||
noValidDocument: true # set to 'false' to hide/suppress feedback messages in channels (to avoid spam)
|
||||
errorProcessing: true # set to 'false' to hide/suppress feedback messages in channels (to avoid spam)
|
||||
errorMessage: true # set to 'false' to hide/suppress error messages in channels (to avoid spam)
|
||||
processing: true # set to 'false' to hide/suppress processing messages in channels (to avoid spam)
|
||||
user:
|
||||
noValidDocument: true # set to 'false' to hide/suppress feedback messages to users (to avoid spam)
|
||||
errorProcessing: true # set to 'false' to hide/suppress feedback messages to users (to avoid spam)
|
||||
errorMessage: true # set to 'false' to hide/suppress error messages to users (to avoid spam)
|
||||
processing: true # set to 'false' to hide/suppress processing messages to users (to avoid spam)
|
||||
|
||||
legal:
|
||||
termsAndConditions: https://www.stirlingpdf.com/terms # URL to the terms and conditions of your application (e.g. https://example.com/terms). Empty string to disable or filename to load from local file in static folder
|
||||
privacyPolicy: https://www.stirlingpdf.com/privacy-policy # URL to the privacy policy of your application (e.g. https://example.com/privacy). Empty string to disable or filename to load from local file in static folder
|
||||
accessibilityStatement: '' # URL to the accessibility statement of your application (e.g. https://example.com/accessibility). Empty string to disable or filename to load from local file in static folder
|
||||
cookiePolicy: '' # URL to the cookie policy of your application (e.g. https://example.com/cookie). Empty string to disable or filename to load from local file in static folder
|
||||
impressum: '' # URL to the impressum of your application (e.g. https://example.com/impressum). Empty string to disable or filename to load from local file in static folder
|
||||
accessibilityStatement: "" # URL to the accessibility statement of your application (e.g. https://example.com/accessibility). Empty string to disable or filename to load from local file in static folder
|
||||
cookiePolicy: "" # URL to the cookie policy of your application (e.g. https://example.com/cookie). Empty string to disable or filename to load from local file in static folder
|
||||
impressum: "" # URL to the impressum of your application (e.g. https://example.com/impressum). Empty string to disable or filename to load from local file in static folder
|
||||
|
||||
system:
|
||||
defaultLocale: en-US # set the default language (e.g. 'de-DE', 'fr-FR', etc)
|
||||
@@ -129,16 +168,25 @@ system:
|
||||
enableAlphaFunctionality: false # set to enable functionality which might need more testing before it fully goes live (this feature might make no changes)
|
||||
showUpdate: false # see when a new update is available
|
||||
showUpdateOnlyAdmin: false # only admins can see when a new update is available, depending on showUpdate it must be set to 'true'
|
||||
showSettingsWhenNoLogin: true # set to 'false' to hide settings button when login is disabled (enableLogin: false). Only applies when login is disabled.
|
||||
customHTMLFiles: false # enable to have files placed in /customFiles/templates override the existing template HTML files
|
||||
tessdataDir: /usr/share/tessdata # path to the directory containing the Tessdata files. This setting is relevant for Windows systems. For Windows users, this path should be adjusted to point to the appropriate directory where the Tessdata files are stored.
|
||||
enableAnalytics: true # Master toggle for analytics: set to 'true' to enable all analytics, 'false' to disable all analytics, or leave as 'null' to prompt admin on first launch
|
||||
enableDesktopInstallSlide: true # Set to 'false' to hide the desktop app installation slide in the onboarding flow
|
||||
enablePosthog: null # Enable PostHog analytics (open-source product analytics): set to 'true' to enable, 'false' to disable, or 'null' to enable by default when analytics is enabled
|
||||
enableScarf: null # Enable Scarf tracking pixel: set to 'true' to enable, 'false' to disable, or 'null' to enable by default when analytics is enabled
|
||||
enableUrlToPDF: false # Set to 'true' to enable URL to PDF, INTERNAL ONLY, known security issues, should not be used externally
|
||||
disableSanitize: false # set to true to disable Sanitize HTML; (can lead to injections in HTML)
|
||||
maxDPI: 500 # Maximum allowed DPI for PDF to image conversion
|
||||
corsAllowedOrigins: [] # List of allowed origins for CORS (e.g. ['http://localhost:5173', 'https://app.example.com']). Leave empty to disable CORS.
|
||||
frontendUrl: '' # Base URL for frontend (e.g. 'https://pdf.example.com'). Used for generating invite links in emails. If empty, falls back to backend URL.
|
||||
corsAllowedOrigins: [] # List of allowed origins for CORS (e.g. ['http://localhost:5173', 'https://app.example.com']). Leave empty to disable CORS. For local development with frontend on port 5173, add 'http://localhost:5173'
|
||||
backendUrl: "" # Backend base URL for SAML/OAuth/API callbacks (e.g. 'http://localhost:8080' for dev, 'https://api.example.com' for production). REQUIRED for SSO authentication to work correctly. This is where your IdP will send SAML responses and OAuth callbacks. Leave empty to default to 'http://localhost:8080' in development.
|
||||
frontendUrl: "" # Frontend URL for invite email links (e.g. 'https://app.example.com'). Optional - if not set, will use backendUrl. This is the URL users click in invite emails.
|
||||
enableMobileScanner: true # Enable mobile phone QR code upload feature. Requires frontendUrl to be configured.
|
||||
mobileScannerSettings:
|
||||
convertToPdf: true # Automatically convert uploaded images to PDF format. If false, images are kept as-is.
|
||||
imageResolution: full # Image resolution for mobile uploads: 'full' (original size) or 'reduced' (max 1200px on longest side). Only applies when convertToPdf is true.
|
||||
pageFormat: A4 # Page format for converted PDFs: 'keep' (original image dimensions), 'A4' (A4 page size), or 'letter' (US Letter page size). Only applies when convertToPdf is true.
|
||||
stretchToFit: false # Whether to stretch images to fill the entire page (may distort aspect ratio). If false, images are centered with preserved aspect ratio. Only applies when convertToPdf is true.
|
||||
serverCertificate:
|
||||
enabled: true # Enable server-side certificate for "Sign with Stirling-PDF" option
|
||||
organizationName: Stirling-PDF # Organization name for generated certificates
|
||||
@@ -150,14 +198,14 @@ system:
|
||||
level: MEDIUM # Security level: MAX (whitelist only), MEDIUM (block internal networks), OFF (no restrictions)
|
||||
allowedDomains: [] # Whitelist of allowed domains (e.g. ['cdn.example.com', 'images.google.com'])
|
||||
blockedDomains: [] # Additional domains to block (e.g. ['evil.com', 'malicious.org'])
|
||||
internalTlds: [.local, .internal, .corp, .home] # Block domains with these TLD patterns
|
||||
internalTlds: [".local", ".internal", ".corp", ".home"] # Block domains with these TLD patterns
|
||||
blockPrivateNetworks: true # Block RFC 1918 private networks (10.x.x.x, 192.168.x.x, 172.16-31.x.x)
|
||||
blockLocalhost: true # Block localhost and loopback addresses (127.x.x.x, ::1)
|
||||
blockLinkLocal: true # Block link-local addresses (169.254.x.x, fe80::/10)
|
||||
blockCloudMetadata: true # Block cloud provider metadata endpoints (169.254.169.254)
|
||||
datasource:
|
||||
enableCustomDatabase: false # Enterprise users ONLY, set this property to 'true' if you would like to use your own custom database configuration
|
||||
customDatabaseUrl: '' # eg jdbc:postgresql://localhost:5432/postgres, set the url for your own custom database connection. If provided, the type, hostName, port and name are not necessary and will not be used
|
||||
customDatabaseUrl: "" # eg jdbc:postgresql://localhost:5432/postgres, set the url for your own custom database connection. If provided, the type, hostName, port and name are not necessary and will not be used
|
||||
username: postgres # set the database username
|
||||
password: postgres # set the database password
|
||||
type: postgresql # the type of the database to set (e.g. 'h2', 'postgresql')
|
||||
@@ -166,52 +214,42 @@ system:
|
||||
name: postgres # set the name of your database. Should match the name of the database you create
|
||||
customPaths:
|
||||
pipeline:
|
||||
watchedFoldersDir: '' # Defaults to /pipeline/watchedFolders
|
||||
finishedFoldersDir: '' # Defaults to /pipeline/finishedFolders
|
||||
pipelineDir: "" # Defaults to /pipeline
|
||||
watchedFoldersDir: "" # Defaults to /pipeline/watchedFolders
|
||||
watchedFoldersDirs: [] # List of watched folder directories. Defaults to watchedFoldersDir or /pipeline/watchedFolders.
|
||||
finishedFoldersDir: "" # Defaults to /pipeline/finishedFolders
|
||||
operations:
|
||||
weasyprint: '' # Defaults to /opt/venv/bin/weasyprint
|
||||
unoconvert: '' # Defaults to /opt/venv/bin/unoconvert
|
||||
calibre: '' # Defaults to /usr/bin/ebook-convert
|
||||
ocrmypdf: '' # Defaults to /usr/bin/ocrmypdf
|
||||
soffice: '' # Defaults to /usr/bin/soffice
|
||||
fileUploadLimit: '' # Defaults to "". No limit when string is empty. Set a number, between 0 and 999, followed by one of the following strings to set a limit. "KB", "MB", "GB".
|
||||
weasyprint: "" # Defaults to /opt/venv/bin/weasyprint
|
||||
unoconvert: "" # Defaults to /opt/venv/bin/unoconvert
|
||||
calibre: "" # Defaults to /usr/bin/ebook-convert
|
||||
ocrmypdf: "" # Defaults to /usr/bin/ocrmypdf
|
||||
soffice: "" # Defaults to /usr/bin/soffice
|
||||
fileUploadLimit: "" # Defaults to "". No limit when string is empty. Set a number, between 0 and 999, followed by one of the following strings to set a limit. "KB", "MB", "GB".
|
||||
tempFileManagement:
|
||||
baseTmpDir: '' # Defaults to java.io.tmpdir/stirling-pdf
|
||||
libreofficeDir: '' # Defaults to tempFileManagement.baseTmpDir/libreoffice
|
||||
systemTempDir: '' # Only used if cleanupSystemTemp is true
|
||||
baseTmpDir: "" # Defaults to java.io.tmpdir/stirling-pdf
|
||||
libreofficeDir: "" # Defaults to tempFileManagement.baseTmpDir/libreoffice
|
||||
systemTempDir: "" # Only used if cleanupSystemTemp is true
|
||||
prefix: stirling-pdf- # Prefix for temp file names
|
||||
maxAgeHours: 24 # Maximum age in hours before temp files are cleaned up
|
||||
cleanupIntervalMinutes: 30 # How often to run cleanup (in minutes)
|
||||
startupCleanup: true # Clean up old temp files on startup
|
||||
cleanupSystemTemp: false # Whether to clean broader system temp directory
|
||||
databaseBackup:
|
||||
cron: 0 0 0 * * ? # Cron expression for automatic database backups "0 0 0 * * ?" daily at midnight
|
||||
|
||||
stirling:
|
||||
pdf:
|
||||
fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
|
||||
json:
|
||||
font-normalization:
|
||||
enabled: false # IMPORTANT: Disable to preserve ToUnicode CMaps for correct font rendering. Ghostscript strips Unicode mappings from CID fonts.
|
||||
cff-converter:
|
||||
enabled: true # Wrap CFF/Type1C fonts as OpenType-CFF for browser compatibility
|
||||
method: python # Converter method: 'python' (fontTools, recommended - wraps as OTF), 'fontforge' (legacy - converts to TTF, may hang on CID fonts)
|
||||
python-command: /opt/venv/bin/python3 # Python interpreter path
|
||||
python-script: /scripts/convert_cff_to_ttf.py # Path to font wrapping script
|
||||
fontforge-command: fontforge # Override if FontForge is installed under a different name/path
|
||||
type3:
|
||||
library:
|
||||
enabled: true # Match common Type3 fonts against the built-in library of converted programs
|
||||
index: classpath:/type3/library/index.json # Override to point at a custom index.json (supports http:, file:, classpath:)
|
||||
cron: "0 0 0 * * ?" # Cron expression for automatic database backups "0 0 0 * * ?" daily at midnight
|
||||
|
||||
ui:
|
||||
appNameNavbar: '' # name displayed on the navigation bar
|
||||
appNameNavbar: "" # name displayed on the navigation bar
|
||||
logoStyle: classic # Options: 'classic' (default - classic S icon) or 'modern' (minimalist logo)
|
||||
languages: [] # If empty, all languages are enabled. To display only German and Polish ["de_DE", "pl_PL"]. British English is always enabled.
|
||||
languages: [] # If empty, all languages are enabled. To restrict to specific languages, use a whitelist like ["de_DE", "pl_PL", "sv_SE"]. Empty list or not restricting any languages will enable all available languages.
|
||||
defaultHideUnavailableTools: false # Default user preference: hide disabled tools instead of greying them out
|
||||
defaultHideUnavailableConversions: false # Default user preference: hide disabled conversion options instead of greying them out
|
||||
hideDisabledTools:
|
||||
googleDrive: false # Hide Google Drive button when not enabled
|
||||
mobileQRScanner: false # Hide mobile QR scanner button when not enabled
|
||||
|
||||
endpoints:
|
||||
toRemove: [ebook-to-pdf, crop, merge-pdfs, multi-page-layout, overlay-pdfs, pdf-to-single-page, rearrange-pages, remove-image-pdf, remove-pages, rotate-pdf, scale-pages, split-by-size-or-count, split-pages, split-pdf-by-chapters, split-pdf-by-sections, add-password, add-watermark, auto-redact, cert-sign, get-info-on-pdf, redact, remove-cert-sign, remove-password, sanitize-pdf, validate-signature, file-to-pdf, html-to-pdf, img-to-pdf, markdown-to-pdf, pdf-to-csv, pdf-to-html, pdf-to-img, pdf-to-markdown, pdf-to-pdfa, pdf-to-presentation, pdf-to-text, pdf-to-word, pdf-to-xml, url-to-pdf, add-image, add-page-numbers, add-stamp, auto-rename, auto-split-pdf, compress-pdf, decompress-pdf, extract-image-scans, extract-images, flatten, ocr-pdf, remove-blanks, repair, replace-invert-pdf, show-javascript, update-metadata, filter-contains-image, filter-contains-text, filter-file-size, filter-page-count, filter-page-rotation, filter-page-size, add-attachments] # list endpoints to disable (e.g. ['img-to-pdf', 'remove-pages'])
|
||||
groupsToRemove: [] # list groups to disable (e.g. ['LibreOffice'])
|
||||
groupsToRemove: [] # list groups to disable (e.g. ['LibreOffice', 'DeveloperTools', 'DeveloperDocs', 'Automation'])
|
||||
|
||||
metrics:
|
||||
enabled: true # 'true' to enable Info APIs (`/api/*`) endpoints, 'false' to disable
|
||||
@@ -220,11 +258,23 @@ metrics:
|
||||
AutomaticallyGenerated:
|
||||
key: cbb81c0f-50b1-450c-a2b5-89ae527776eb
|
||||
UUID: 10dd4fba-01fa-4717-9b78-3dc4f54e398a
|
||||
appVersion: 2.1.2
|
||||
appVersion: 2.7.2
|
||||
|
||||
processExecutor:
|
||||
autoUnoServer: true # true: use local pool based on libreOfficeSessionLimit; false: use unoServerEndpoints
|
||||
unoServerEndpoints: [] # Used when autoUnoServer is false
|
||||
# Example manual endpoints (uncomment to use):
|
||||
# unoServerEndpoints:
|
||||
# - host: "127.0.0.1"
|
||||
# port: 2003
|
||||
# hostLocation: "auto" # auto|local|remote (use "remote" for port-forwarded servers)
|
||||
# protocol: "http" # http|https
|
||||
# - host: "remote-server.local"
|
||||
# port: 8080
|
||||
# hostLocation: "remote"
|
||||
# protocol: "https"
|
||||
sessionLimit: # Process executor instances limits
|
||||
libreOfficeSessionLimit: 1
|
||||
libreOfficeSessionLimit: 1 # Each additional uno server adds ~50MB idle RAM
|
||||
pdfToHtmlSessionLimit: 1
|
||||
qpdfSessionLimit: 4
|
||||
tesseractSessionLimit: 1
|
||||
@@ -232,6 +282,7 @@ processExecutor:
|
||||
weasyPrintSessionLimit: 16
|
||||
installAppSessionLimit: 1
|
||||
calibreSessionLimit: 1
|
||||
imageMagickSessionLimit: 4
|
||||
ghostscriptSessionLimit: 8
|
||||
ocrMyPdfSessionLimit: 2
|
||||
timeoutMinutes: # Process executor timeout in minutes
|
||||
@@ -241,7 +292,26 @@ processExecutor:
|
||||
weasyPrinttimeoutMinutes: 30
|
||||
installApptimeoutMinutes: 60
|
||||
calibretimeoutMinutes: 30
|
||||
imageMagickTimeoutMinutes: 30
|
||||
tesseractTimeoutMinutes: 30
|
||||
qpdfTimeoutMinutes: 30
|
||||
ghostscriptTimeoutMinutes: 30
|
||||
ocrMyPdfTimeoutMinutes: 30
|
||||
|
||||
pdfEditor:
|
||||
fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
|
||||
cache:
|
||||
max-bytes: -1 # Max in-memory cache size in bytes; -1 disables byte cap
|
||||
max-percent: 20 # Max in-memory cache as % of JVM max; used when max-bytes <= 0
|
||||
font-normalization:
|
||||
enabled: false # IMPORTANT: Disable to preserve ToUnicode CMaps for correct font rendering. Ghostscript strips Unicode mappings from CID fonts.
|
||||
cff-converter:
|
||||
enabled: true # Wrap CFF/Type1CFF fonts as OpenType-CFF for browser compatibility
|
||||
method: python # Converter method: 'python' (fontTools, recommended - wraps as OTF), 'fontforge' (legacy - converts to TTF, may hang on CID fonts)
|
||||
python-command: /opt/venv/bin/python3 # Python interpreter path
|
||||
python-script: /scripts/convert_cff_to_ttf.py # Path to font wrapping script
|
||||
fontforge-command: fontforge # Override if FontForge is installed under a different name/path
|
||||
type3:
|
||||
library:
|
||||
enabled: true # Match common Type3 fonts against the built-in library of converted programs
|
||||
index: classpath:/type3/library/index.json # Override to point at a custom index.json (supports http:, file:, classpath:)
|
||||
|
||||
@@ -31,6 +31,47 @@ find_root() {
|
||||
|
||||
PROJECT_ROOT=$(find_root)
|
||||
|
||||
# Base image version - must be provided or read from environment
|
||||
# This is a testing-specific version; production should pass explicit BASE_VERSION
|
||||
if [ -z "$BASE_VERSION" ]; then
|
||||
# For CI/automation: use a unique test identifier
|
||||
if [ -n "${GITHUB_RUN_ID}" ]; then
|
||||
BASE_VERSION="test-${GITHUB_RUN_ID}"
|
||||
else
|
||||
# For local testing: generate unique identifier
|
||||
BASE_VERSION="test-local-$(date +%s)"
|
||||
fi
|
||||
fi
|
||||
BASE_IMAGE="ghcr.io/stirling-tools/stirling-pdf-base:${BASE_VERSION}"
|
||||
|
||||
# Function to ensure base image exists (build if missing)
|
||||
ensure_base_image() {
|
||||
echo "Checking for base image: $BASE_IMAGE"
|
||||
|
||||
if docker image inspect "$BASE_IMAGE" >/dev/null 2>&1; then
|
||||
echo "✓ Base image found locally: $BASE_IMAGE"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "Base image not found. Attempting to pull from registry..."
|
||||
if docker pull "$BASE_IMAGE" 2>/dev/null; then
|
||||
echo "✓ Pulled base image from registry: $BASE_IMAGE"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "Base image not available in registry. Building from source..."
|
||||
if docker build -f "$PROJECT_ROOT/docker/base/Dockerfile" \
|
||||
-t "$BASE_IMAGE" \
|
||||
--build-arg BASE_VERSION="$BASE_VERSION" \
|
||||
"$PROJECT_ROOT/docker/base"; then
|
||||
echo "✓ Built base image: $BASE_IMAGE"
|
||||
return 0
|
||||
else
|
||||
echo "ERROR: Failed to build base image"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to check application readiness via HTTP instead of Docker's health status
|
||||
check_health() {
|
||||
local container_name=$1 # real container name
|
||||
@@ -101,14 +142,16 @@ capture_file_list() {
|
||||
-not -path '/configs/*' \
|
||||
-not -path '/logs/*' \
|
||||
-not -path '*/home/stirlingpdfuser/.config/libreoffice/*' \
|
||||
-not -path '*/home/stirlingpdfuser/.config/calibre/*' \
|
||||
-not -path '*/home/stirlingpdfuser/.java/fonts/*' \
|
||||
-not -path '*/home/stirlingpdfuser/.pdfbox.cache' \
|
||||
-not -path '*/tmp/stirling-pdf/PDFBox*' \
|
||||
-not -path '*/tmp/stirling-pdf/hsperfdata_stirlingpdfuser/*' \
|
||||
-not -path '*/tmp/hsperfdata_stirlingpdfuser/*' \
|
||||
-not -path '*/tmp/hsperfdata_root/*' \
|
||||
-not -path '*/tmp/stirling-pdf/jetty-*/*' \
|
||||
-not -path '*/tmp/stirling-pdf/lu*' \
|
||||
-not -path '*/tmp/stirling-pdf/tmp*' \
|
||||
-not -path '/tmp/lu*' \
|
||||
-not -path '*/tmp/*/user/registrymodifications.xcu' \
|
||||
-not -path '/app/stirling.aot' \
|
||||
-not -path '*/tmp/stirling.aotconf' \
|
||||
-not -path '*/tmp/aot-*.log' \
|
||||
@@ -128,14 +171,16 @@ capture_file_list() {
|
||||
-not -path '/configs/*' \
|
||||
-not -path '/logs/*' \
|
||||
-not -path '*/home/stirlingpdfuser/.config/libreoffice/*' \
|
||||
-not -path '*/home/stirlingpdfuser/.config/calibre/*' \
|
||||
-not -path '*/home/stirlingpdfuser/.java/fonts/*' \
|
||||
-not -path '*/home/stirlingpdfuser/.pdfbox.cache' \
|
||||
-not -path '*/tmp/PDFBox*' \
|
||||
-not -path '*/tmp/hsperfdata_stirlingpdfuser/*' \
|
||||
-not -path '*/tmp/hsperfdata_root/*' \
|
||||
-not -path '*/tmp/stirling-pdf/hsperfdata_stirlingpdfuser/*' \
|
||||
-not -path '*/tmp/stirling-pdf/jetty-*/*' \
|
||||
-not -path '*/tmp/lu*' \
|
||||
-not -path '*/tmp/tmp*' \
|
||||
-not -path '/tmp/lu*' \
|
||||
-not -path '/tmp/tmp*' \
|
||||
-not -path '/app/stirling.aot' \
|
||||
-not -path '*/tmp/stirling.aotconf' \
|
||||
-not -path '*/tmp/aot-*.log' \
|
||||
@@ -374,6 +419,13 @@ main() {
|
||||
SECONDS=0
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Ensure base image exists before running tests
|
||||
echo "=========================================="
|
||||
echo "Preparing Docker base image..."
|
||||
echo "=========================================="
|
||||
ensure_base_image || exit 1
|
||||
echo ""
|
||||
|
||||
# Parse command line arguments
|
||||
RERUN_MODE=false
|
||||
declare -a RERUN_TESTS
|
||||
|
||||
Reference in New Issue
Block a user