Stirling-PDF/scripts/init.sh

31 lines
1.1 KiB
Bash
Raw Normal View History

2023-05-01 22:57:48 +02:00
#!/bin/bash
# Copy the original tesseract-ocr files to the volume directory without overwriting existing files
echo "Copying original files without overwriting existing files"
2023-12-31 17:14:55 +01:00
mkdir -p /usr/share/tessdata
cp -rn /usr/share/tessdata-original/* /usr/share/tessdata
2023-05-01 22:57:48 +02:00
if [ -d /usr/share/tesseract-ocr/4.00/tessdata ]; then
cp -r /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tessdata || true;
fi
if [ -d /usr/share/tesseract-ocr/5/tessdata ]; then
cp -r /usr/share/tesseract-ocr/5/tessdata/* /usr/share/tessdata || true;
2023-12-11 00:06:35 +01:00
fi
2023-07-29 14:53:30 +02:00
# Check if TESSERACT_LANGS environment variable is set and is not empty
if [[ -n "$TESSERACT_LANGS" ]]; then
# Convert comma-separated values to a space-separated list
SPACE_SEPARATED_LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')
pattern='^[a-zA-Z]{2,4}(_[a-zA-Z]{2,4})?$'
2023-07-29 14:53:30 +02:00
# Install each language pack
for LANG in $SPACE_SEPARATED_LANGS; do
if [[ $LANG =~ $pattern ]]; then
apk add --no-cache "tesseract-ocr-data-$LANG"
else
echo "Skipping invalid language code"
fi
2023-07-29 14:53:30 +02:00
done
fi
/scripts/init-without-ocr.sh "$@"