mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-12-18 20:04:17 +01:00
# Description of Changes ### What was changed This PR introduces a major refinement to the Docker runtime, system path resolution, conversion tooling, and integration logic across the codebase. Key improvements include: - Migration of **Dockerfile**, **Dockerfile.fat** to a unified Debian-based environment. - Introduction of **RuntimePathConfig** enhancements to dynamically resolve: - `weasyprint`, `unoconvert`, `calibre`, `ocrmypdf`, `soffice` - Tesseract `tessdata` paths with Docker-aware defaults. - Support for **UNO server (unoserver/unoconvert)** as primary document converter with automatic fallback to `soffice`. - Isolation of Python environments for WeasyPrint and UNO tooling. - Updated controllers and services to correctly inject `RuntimePathConfig`. - Improved process execution logic in converters and OCR handling. - Major updates to `init.sh` and `init-without-ocr.sh`: - Unified environment initialization - Proper UID/GID remapping - Safer permissions handling - Automatic Tesseract path detection - Reliable startup of headless LibreOffice + Xvfb + UNO server - Full test suite updates: - Adaptation to new conversion paths - Mocking of UNO and LibreOffice commands - More robust Docker test logic - Updated example docker-compose files referencing GHCR test images. - Expanded configuration schema for new operations paths. ### Why the change was made These changes address long-standing issues around: - Inconsistent or missing binary paths between image variants. - Reduced reliability of document conversions (UNO vs. soffice). - Lack of uniform runtime initialization across Docker images. - Repetitive environment setup logic split across multiple scripts. - Fragile test scenarios tied to Alpine-based images. Switching to a unified Debian-based runtime significantly improves: - Compatibility with LibreOffice, Calibre, WebEngine and graphics stack. - UNO stability for document conversions. - Tesseract deterministic behavior. - Debuggability and reliability of CI/CD Docker-based tests. The improvements to `RuntimePathConfig` ensure all system binaries are fully configurable and correctly detected at runtime. --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Translations (if applicable) - [ ] I ran [`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [x] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details.
111 lines
3.4 KiB
Bash
111 lines
3.4 KiB
Bash
#!/bin/bash
|
|
# This script initializes environment variables and paths,
|
|
# prepares Tesseract data directories, and then runs the main init script.
|
|
|
|
set -euo pipefail
|
|
|
|
append_env_path() {
|
|
local target="$1" current="$2" separator=":"
|
|
if [ -d "$target" ] && [[ ":${current}:" != *":${target}:"* ]]; then
|
|
if [ -n "$current" ]; then
|
|
printf '%s' "${target}${separator}${current}"
|
|
else
|
|
printf '%s' "${target}"
|
|
fi
|
|
else
|
|
printf '%s' "$current"
|
|
fi
|
|
}
|
|
|
|
python_site_dir() {
|
|
local venv_dir="$1"
|
|
local python_bin="$venv_dir/bin/python"
|
|
if [ -x "$python_bin" ]; then
|
|
local py_tag
|
|
if py_tag="$("$python_bin" -c 'import sys; print(f"python{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null)" \
|
|
&& [ -n "$py_tag" ] \
|
|
&& [ -d "$venv_dir/lib/$py_tag/site-packages" ]; then
|
|
printf '%s' "$venv_dir/lib/$py_tag/site-packages"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# === LD_LIBRARY_PATH ===
|
|
# Adjust the library path depending on CPU architecture.
|
|
ARCH=$(uname -m)
|
|
case "$ARCH" in
|
|
x86_64)
|
|
[ -d /usr/lib/x86_64-linux-gnu ] && export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
|
;;
|
|
aarch64)
|
|
[ -d /usr/lib/aarch64-linux-gnu ] && export LD_LIBRARY_PATH="/usr/lib/aarch64-linux-gnu${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
|
;;
|
|
esac
|
|
|
|
# Add LibreOffice program directory to library path if available.
|
|
if [ -d /usr/lib/libreoffice/program ]; then
|
|
export LD_LIBRARY_PATH="/usr/lib/libreoffice/program${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
|
fi
|
|
|
|
# === Python PATH ===
|
|
# Add virtual environments to PATH and PYTHONPATH.
|
|
for dir in /opt/venv/bin /opt/unoserver-venv/bin; do
|
|
PATH="$(append_env_path "$dir" "$PATH")"
|
|
done
|
|
export PATH
|
|
|
|
PYTHON_PATH_ENTRIES=()
|
|
for venv in /opt/venv /opt/unoserver-venv; do
|
|
if [ -d "$venv" ]; then
|
|
site_dir="$(python_site_dir "$venv")"
|
|
[ -n "${site_dir:-}" ] && PYTHON_PATH_ENTRIES+=("$site_dir")
|
|
fi
|
|
done
|
|
if [ ${#PYTHON_PATH_ENTRIES[@]} -gt 0 ]; then
|
|
PYTHONPATH="$(IFS=:; printf '%s' "${PYTHON_PATH_ENTRIES[*]}")${PYTHONPATH:+:$PYTHONPATH}"
|
|
export PYTHONPATH
|
|
fi
|
|
|
|
# # === tessdata ===
|
|
# # Prepare Tesseract OCR data directory.
|
|
REAL_TESSDATA="/usr/share/tesseract-ocr/5/tessdata"
|
|
SEC_TESSDATA="/usr/share/tessdata"
|
|
|
|
log_warn() {
|
|
echo "[init][warn] $*" >&2
|
|
}
|
|
|
|
if [ -d "$REAL_TESSDATA" ] && [ -w "$REAL_TESSDATA" ]; then
|
|
log_warn "Skipping tessdata adjustments; directory writable: $REAL_TESSDATA"
|
|
else
|
|
log_warn "Skipping tessdata adjustments; directory missing or not writable: $REAL_TESSDATA"
|
|
fi
|
|
|
|
if [ -d /usr/share/tesseract-ocr/5/tessdata ]; then
|
|
REAL_TESSDATA="/usr/share/tesseract-ocr/5/tessdata"
|
|
log_warn "Using /usr/share/tesseract-ocr/5/tessdata as TESSDATA_PREFIX"
|
|
elif [ -d /usr/share/tessdata ]; then
|
|
REAL_TESSDATA="/usr/share/tessdata"
|
|
log_warn "Using /usr/share/tessdata as TESSDATA_PREFIX"
|
|
elif [ -d /tessdata ]; then
|
|
REAL_TESSDATA="/tessdata"
|
|
log_warn "Using /tessdata as TESSDATA_PREFIX"
|
|
else
|
|
REAL_TESSDATA=""
|
|
log_warn "No tessdata directory found"
|
|
fi
|
|
|
|
if [ -n "$REAL_TESSDATA" ]; then
|
|
export TESSDATA_PREFIX="$REAL_TESSDATA"
|
|
fi
|
|
|
|
# === Temp dir ===
|
|
# Ensure the temporary directory exists and has proper permissions.
|
|
mkdir -p /tmp/stirling-pdf
|
|
chown -R stirlingpdfuser:stirlingpdfgroup /tmp/stirling-pdf || true
|
|
chmod -R 755 /tmp/stirling-pdf || true
|
|
|
|
# === Start application ===
|
|
# Run the main init script that handles the full startup logic.
|
|
exec /scripts/init-without-ocr.sh
|