From bf65c456d1829dd4109aff51bf9c59e4845bd381 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Fri, 7 Feb 2025 13:06:19 +0000 Subject: [PATCH] PDFA fixes (#2896) # Description of Changes Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --- Dockerfile | 4 +++- Dockerfile.fat | 4 +++- .../software/SPDF/config/EndpointConfiguration.java | 3 --- .../SPDF/controller/api/converters/ConvertPDFToPDFA.java | 6 +++--- src/main/resources/messages_en_GB.properties | 2 +- src/main/resources/messages_en_US.properties | 2 +- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index 377e6de20..d99153d23 100644 --- a/Dockerfile +++ b/Dockerfile @@ -56,13 +56,15 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et openssl-dev \ openjdk21-jre \ # Doc conversion + gcompat \ + libc6-compat \ libreoffice \ # pdftohtml poppler-utils \ # OCR MY PDF (unpaper for descew and other advanced features) tesseract-ocr-data-eng \ # CV - py3-opencv \ + py3-opencv \ # python3/pip python3 \ py3-pip && \ diff --git a/Dockerfile.fat b/Dockerfile.fat index f8450fa02..97e328501 100644 --- a/Dockerfile.fat +++ b/Dockerfile.fat @@ -57,6 +57,8 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et openssl-dev \ openjdk21-jre \ # Doc conversion + gcompat \ + libc6-compat \ libreoffice \ # pdftohtml poppler-utils \ @@ -65,7 +67,7 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et tesseract-ocr-data-eng \ font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra \ # CV - py3-opencv \ + py3-opencv \ # python3/pip python3 \ py3-pip && \ diff --git a/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java b/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java index a55831ab6..c8fe0c291 100644 --- a/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java +++ b/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java @@ -265,9 +265,6 @@ public class EndpointConfiguration { // Pdftohtml dependent endpoints addEndpointToGroup("Pdftohtml", "pdf-to-html"); addEndpointToGroup("Pdftohtml", "pdf-to-markdown"); - - // disabled for now while we resolve issues - disableEndpoint("pdf-to-pdfa"); } private void processEnvironmentConfigs() { diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java index 5caee0f96..69c427016 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java @@ -73,8 +73,8 @@ public class ConvertPDFToPDFA { // Determine PDF/A filter based on requested format String pdfFilter = "pdfa".equals(outputFormat) - ? "writer_pdf_Export:{'SelectPdfVersion':{'Value':'2'}}:writer_pdf_Export" - : "writer_pdf_Export:{'SelectPdfVersion':{'Value':'1'}}:writer_pdf_Export"; + ? "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"2\"}}" + : "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"1\"}}"; // Prepare LibreOffice command List command = @@ -84,7 +84,7 @@ public class ConvertPDFToPDFA { "--headless", "--nologo", "--convert-to", - "pdf:" + pdfFilter, + pdfFilter, "--outdir", tempOutputDir.toString(), tempInputFile.toString())); diff --git a/src/main/resources/messages_en_GB.properties b/src/main/resources/messages_en_GB.properties index 0139fa96e..9be009db5 100644 --- a/src/main/resources/messages_en_GB.properties +++ b/src/main/resources/messages_en_GB.properties @@ -1185,7 +1185,7 @@ changeMetadata.submit=Change #pdfToPDFA pdfToPDFA.title=PDF To PDF/A pdfToPDFA.header=PDF To PDF/A -pdfToPDFA.credit=This service uses qpdf for PDF/A conversion +pdfToPDFA.credit=This service uses libreoffice for PDF/A conversion pdfToPDFA.submit=Convert pdfToPDFA.tip=Currently does not work for multiple inputs at once pdfToPDFA.outputFormat=Output format diff --git a/src/main/resources/messages_en_US.properties b/src/main/resources/messages_en_US.properties index f4a675391..57f60786b 100644 --- a/src/main/resources/messages_en_US.properties +++ b/src/main/resources/messages_en_US.properties @@ -1185,7 +1185,7 @@ changeMetadata.submit=Change #pdfToPDFA pdfToPDFA.title=PDF To PDF/A pdfToPDFA.header=PDF To PDF/A -pdfToPDFA.credit=This service uses qpdf for PDF/A conversion +pdfToPDFA.credit=This service uses libreoffice for PDF/A conversion pdfToPDFA.submit=Convert pdfToPDFA.tip=Currently does not work for multiple inputs at once pdfToPDFA.outputFormat=Output format