2024-02-10 00:45:18 +01:00
|
|
|
# Main stage
|
2024-12-13 21:07:45 +01:00
|
|
|
FROM alpine:3.20.3
|
2024-02-11 17:47:00 +01:00
|
|
|
|
2024-03-04 21:51:49 +01:00
|
|
|
# Copy necessary files
|
|
|
|
COPY scripts /scripts
|
|
|
|
COPY pipeline /pipeline
|
2024-04-29 00:33:55 +02:00
|
|
|
COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto/
|
2024-05-05 13:18:52 +02:00
|
|
|
#COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto/
|
2024-03-04 21:51:49 +01:00
|
|
|
COPY build/libs/*.jar app.jar
|
|
|
|
|
|
|
|
ARG VERSION_TAG
|
|
|
|
|
|
|
|
# Set Environment Variables
|
|
|
|
ENV DOCKER_ENABLE_SECURITY=false \
|
|
|
|
VERSION_TAG=$VERSION_TAG \
|
|
|
|
JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -XX:MaxRAMPercentage=75" \
|
2024-05-22 23:58:01 +02:00
|
|
|
HOME=/home/stirlingpdfuser \
|
|
|
|
PUID=1000 \
|
2024-03-08 21:49:19 +01:00
|
|
|
PGID=1000 \
|
|
|
|
UMASK=022
|
2024-03-04 21:51:49 +01:00
|
|
|
|
2024-02-10 00:45:18 +01:00
|
|
|
# JDK for app
|
|
|
|
RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
|
|
|
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
|
|
|
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
|
2024-05-22 23:58:01 +02:00
|
|
|
apk upgrade --no-cache -a && \
|
2024-02-10 00:45:18 +01:00
|
|
|
apk add --no-cache \
|
|
|
|
ca-certificates \
|
|
|
|
tzdata \
|
|
|
|
tini \
|
|
|
|
bash \
|
|
|
|
curl \
|
2024-11-26 21:50:35 +01:00
|
|
|
qpdf \
|
2024-03-09 15:03:46 +01:00
|
|
|
shadow \
|
2024-05-23 00:02:55 +02:00
|
|
|
su-exec \
|
|
|
|
openssl \
|
|
|
|
openssl-dev \
|
|
|
|
openjdk21-jre \
|
2024-02-10 00:45:18 +01:00
|
|
|
# Doc conversion
|
2024-05-22 23:58:01 +02:00
|
|
|
libreoffice \
|
2024-03-29 22:02:33 +01:00
|
|
|
# pdftohtml
|
|
|
|
poppler-utils \
|
2024-08-20 17:17:54 +02:00
|
|
|
# OCR MY PDF (unpaper for descew and other advanced features)
|
2024-02-10 00:45:18 +01:00
|
|
|
tesseract-ocr-data-eng \
|
|
|
|
# CV
|
|
|
|
py3-opencv \
|
|
|
|
# python3/pip
|
2024-08-08 22:13:59 +02:00
|
|
|
python3 \
|
2024-08-20 17:17:54 +02:00
|
|
|
py3-pip && \
|
2024-02-10 00:45:18 +01:00
|
|
|
# uno unoconv and HTML
|
2024-08-20 17:17:54 +02:00
|
|
|
pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint pdf2image pillow && \
|
2024-03-04 21:51:49 +01:00
|
|
|
mv /usr/share/tessdata /usr/share/tessdata-original && \
|
|
|
|
mkdir -p $HOME /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
|
2023-12-31 15:54:34 +01:00
|
|
|
fc-cache -f -v && \
|
2024-03-04 21:51:49 +01:00
|
|
|
chmod +x /scripts/* && \
|
|
|
|
chmod +x /scripts/init.sh && \
|
|
|
|
# User permissions
|
|
|
|
addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \
|
2024-03-09 15:03:46 +01:00
|
|
|
chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline && \
|
2024-04-05 13:51:22 +02:00
|
|
|
chown stirlingpdfuser:stirlingpdfgroup /app.jar && \
|
2024-05-22 23:58:01 +02:00
|
|
|
tesseract --list-langs
|
2024-02-11 17:47:00 +01:00
|
|
|
|
2024-05-22 23:58:01 +02:00
|
|
|
EXPOSE 8080/tcp
|
2024-02-11 17:47:00 +01:00
|
|
|
|
2023-09-04 19:42:22 +02:00
|
|
|
# Set user and run command
|
2023-12-31 15:54:34 +01:00
|
|
|
ENTRYPOINT ["tini", "--", "/scripts/init.sh"]
|
2023-12-29 10:35:02 +01:00
|
|
|
CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"]
|