Stirling-PDF/DockerfileBase

29 lines
1012 B
Plaintext
Raw Normal View History

2023-05-01 22:57:48 +02:00
# Main stage
FROM alpine:3.19.0
2023-12-10 23:02:30 +01:00
2023-12-11 00:06:35 +01:00
# JDK for app
2024-02-07 06:15:32 +01:00
RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
apk add --no-cache \
ca-certificates \
tzdata \
tini \
bash \
curl \
2024-02-07 06:15:32 +01:00
openjdk17-jre \
2023-12-11 00:06:35 +01:00
# Doc conversion
2024-02-07 06:15:32 +01:00
libreoffice@testing \
2023-12-11 00:06:35 +01:00
# OCR MY PDF (unpaper for descew and other advanced featues)
2023-12-10 23:02:30 +01:00
ocrmypdf \
tesseract-ocr-data-eng \
# CV
py3-opencv \
# python3/pip
python3 && \
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
# uno unoconv and HTML
pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \
2024-02-07 06:15:32 +01:00
mv /usr/share/tessdata /usr/share/tessdata-original