mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-02-17 13:52:14 +01:00
V1 merge (#5193)
# Description of Changes <!-- Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --> --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: Balázs Szücs <bszucs1209@gmail.com> Signed-off-by: stirlingbot[bot] <stirlingbot[bot]@users.noreply.github.com> Co-authored-by: ConnorYoh <40631091+ConnorYoh@users.noreply.github.com> Co-authored-by: Connor Yoh <connor@stirlingpdf.com> Co-authored-by: OUNZAR Aymane <aymane.ounzar@imt-atlantique.net> Co-authored-by: YAOU Reda <yaoureda24@gmail.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: stirlingbot[bot] <195170888+stirlingbot[bot]@users.noreply.github.com> Co-authored-by: Balázs Szücs <127139797+balazs-szucs@users.noreply.github.com> Co-authored-by: Ludy <Ludy87@users.noreply.github.com> Co-authored-by: tkymmm <136296842+tkymmm@users.noreply.github.com> Co-authored-by: Peter Dave Hello <hsu@peterdavehello.org> Co-authored-by: albanobattistella <34811668+albanobattistella@users.noreply.github.com> Co-authored-by: PingLin8888 <88387490+PingLin8888@users.noreply.github.com> Co-authored-by: FdaSilvaYY <FdaSilvaYY@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: OteJlo <106060728+OteJlo@users.noreply.github.com> Co-authored-by: Angel <41905618+TheShadowAngel@users.noreply.github.com> Co-authored-by: Ricardo Catarino <ricardomicc@gmail.com> Co-authored-by: Luis Antonio Argüelles González <luis.arguelles@encora.com> Co-authored-by: Dawid Urbański <31166488+urbaned121@users.noreply.github.com> Co-authored-by: Stephan Paternotte <Stephan-P@users.noreply.github.com> Co-authored-by: Leonardo Santos Paulucio <leonardo.paulucio@hotmail.com> Co-authored-by: hamza khalem <72972114+hamzakhalem@users.noreply.github.com> Co-authored-by: IT Creativity + Art Team <admin@it-playground.net> Co-authored-by: Reece Browne <74901996+reecebrowne@users.noreply.github.com> Co-authored-by: James Brunton <jbrunton96@gmail.com> Co-authored-by: Victor Villarreal <133383186+vvillarreal-cfee@users.noreply.github.com>
This commit is contained in:
120
scripts/init.sh
120
scripts/init.sh
@@ -1,36 +1,110 @@
|
||||
#!/bin/bash
|
||||
# This script initializes environment variables and paths,
|
||||
# prepares Tesseract data directories, and then runs the main init script.
|
||||
|
||||
# Copy the original tesseract-ocr files to the volume directory without overwriting existing files
|
||||
echo "Copying original files without overwriting existing files"
|
||||
mkdir -p /usr/share/tessdata
|
||||
cp -rn /usr/share/tessdata-original/* /usr/share/tessdata
|
||||
set -euo pipefail
|
||||
|
||||
if [ -d /usr/share/tesseract-ocr/4.00/tessdata ]; then
|
||||
cp -r /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tessdata || true;
|
||||
append_env_path() {
|
||||
local target="$1" current="$2" separator=":"
|
||||
if [ -d "$target" ] && [[ ":${current}:" != *":${target}:"* ]]; then
|
||||
if [ -n "$current" ]; then
|
||||
printf '%s' "${target}${separator}${current}"
|
||||
else
|
||||
printf '%s' "${target}"
|
||||
fi
|
||||
else
|
||||
printf '%s' "$current"
|
||||
fi
|
||||
}
|
||||
|
||||
python_site_dir() {
|
||||
local venv_dir="$1"
|
||||
local python_bin="$venv_dir/bin/python"
|
||||
if [ -x "$python_bin" ]; then
|
||||
local py_tag
|
||||
if py_tag="$("$python_bin" -c 'import sys; print(f"python{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null)" \
|
||||
&& [ -n "$py_tag" ] \
|
||||
&& [ -d "$venv_dir/lib/$py_tag/site-packages" ]; then
|
||||
printf '%s' "$venv_dir/lib/$py_tag/site-packages"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# === LD_LIBRARY_PATH ===
|
||||
# Adjust the library path depending on CPU architecture.
|
||||
ARCH=$(uname -m)
|
||||
case "$ARCH" in
|
||||
x86_64)
|
||||
[ -d /usr/lib/x86_64-linux-gnu ] && export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
||||
;;
|
||||
aarch64)
|
||||
[ -d /usr/lib/aarch64-linux-gnu ] && export LD_LIBRARY_PATH="/usr/lib/aarch64-linux-gnu${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Add LibreOffice program directory to library path if available.
|
||||
if [ -d /usr/lib/libreoffice/program ]; then
|
||||
export LD_LIBRARY_PATH="/usr/lib/libreoffice/program${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
||||
fi
|
||||
|
||||
# === Python PATH ===
|
||||
# Add virtual environments to PATH and PYTHONPATH.
|
||||
for dir in /opt/venv/bin /opt/unoserver-venv/bin; do
|
||||
PATH="$(append_env_path "$dir" "$PATH")"
|
||||
done
|
||||
export PATH
|
||||
|
||||
PYTHON_PATH_ENTRIES=()
|
||||
for venv in /opt/venv /opt/unoserver-venv; do
|
||||
if [ -d "$venv" ]; then
|
||||
site_dir="$(python_site_dir "$venv")"
|
||||
[ -n "${site_dir:-}" ] && PYTHON_PATH_ENTRIES+=("$site_dir")
|
||||
fi
|
||||
done
|
||||
if [ ${#PYTHON_PATH_ENTRIES[@]} -gt 0 ]; then
|
||||
PYTHONPATH="$(IFS=:; printf '%s' "${PYTHON_PATH_ENTRIES[*]}")${PYTHONPATH:+:$PYTHONPATH}"
|
||||
export PYTHONPATH
|
||||
fi
|
||||
|
||||
# # === tessdata ===
|
||||
# # Prepare Tesseract OCR data directory.
|
||||
REAL_TESSDATA="/usr/share/tesseract-ocr/5/tessdata"
|
||||
SEC_TESSDATA="/usr/share/tessdata"
|
||||
|
||||
log_warn() {
|
||||
echo "[init][warn] $*" >&2
|
||||
}
|
||||
|
||||
if [ -d "$REAL_TESSDATA" ] && [ -w "$REAL_TESSDATA" ]; then
|
||||
log_warn "Skipping tessdata adjustments; directory writable: $REAL_TESSDATA"
|
||||
else
|
||||
log_warn "Skipping tessdata adjustments; directory missing or not writable: $REAL_TESSDATA"
|
||||
fi
|
||||
|
||||
if [ -d /usr/share/tesseract-ocr/5/tessdata ]; then
|
||||
cp -r /usr/share/tesseract-ocr/5/tessdata/* /usr/share/tessdata || true;
|
||||
REAL_TESSDATA="/usr/share/tesseract-ocr/5/tessdata"
|
||||
log_warn "Using /usr/share/tesseract-ocr/5/tessdata as TESSDATA_PREFIX"
|
||||
elif [ -d /usr/share/tessdata ]; then
|
||||
REAL_TESSDATA="/usr/share/tessdata"
|
||||
log_warn "Using /usr/share/tessdata as TESSDATA_PREFIX"
|
||||
elif [ -d /tessdata ]; then
|
||||
REAL_TESSDATA="/tessdata"
|
||||
log_warn "Using /tessdata as TESSDATA_PREFIX"
|
||||
else
|
||||
REAL_TESSDATA=""
|
||||
log_warn "No tessdata directory found"
|
||||
fi
|
||||
|
||||
# Check if TESSERACT_LANGS environment variable is set and is not empty
|
||||
if [[ -n "$TESSERACT_LANGS" ]]; then
|
||||
# Convert comma-separated values to a space-separated list
|
||||
SPACE_SEPARATED_LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')
|
||||
pattern='^[a-zA-Z]{2,4}(_[a-zA-Z]{2,4})?$'
|
||||
# Install each language pack
|
||||
for LANG in $SPACE_SEPARATED_LANGS; do
|
||||
if [[ $LANG =~ $pattern ]]; then
|
||||
apk add --no-cache "tesseract-ocr-data-$LANG"
|
||||
else
|
||||
echo "Skipping invalid language code"
|
||||
fi
|
||||
done
|
||||
if [ -n "$REAL_TESSDATA" ]; then
|
||||
export TESSDATA_PREFIX="$REAL_TESSDATA"
|
||||
fi
|
||||
|
||||
# Ensure temp directory exists with correct permissions before running main init
|
||||
mkdir -p /tmp/stirling-pdf || true
|
||||
# === Temp dir ===
|
||||
# Ensure the temporary directory exists and has proper permissions.
|
||||
mkdir -p /tmp/stirling-pdf
|
||||
chown -R stirlingpdfuser:stirlingpdfgroup /tmp/stirling-pdf || true
|
||||
chmod -R 755 /tmp/stirling-pdf || true
|
||||
|
||||
/scripts/init-without-ocr.sh "$@"
|
||||
# === Start application ===
|
||||
# Run the main init script that handles the full startup logic.
|
||||
exec /scripts/init-without-ocr.sh
|
||||
|
||||
Reference in New Issue
Block a user