From 80820e75c905d027006ec484dd03935862cfcddc Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Sat, 1 Apr 2023 16:03:40 +0100 Subject: [PATCH] pdfa --- .../SPDF/controller/CompressController.java | 3 +- .../converters/ConvertPDFToPDFA.java | 53 ++++++++++++++++++- src/main/resources/messages_en_GB.properties | 9 ++++ .../templates/convert/pdf-to-pdfa.html | 30 +++++++++++ .../resources/templates/fragments/navbar.html | 3 +- src/main/resources/templates/home.html | 1 + 6 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 src/main/resources/templates/convert/pdf-to-pdfa.html diff --git a/src/main/java/stirling/software/SPDF/controller/CompressController.java b/src/main/java/stirling/software/SPDF/controller/CompressController.java index 323eadcef..b14e07214 100644 --- a/src/main/java/stirling/software/SPDF/controller/CompressController.java +++ b/src/main/java/stirling/software/SPDF/controller/CompressController.java @@ -53,9 +53,10 @@ public class CompressController { command.add("--skip-text"); command.add("--tesseract-timeout=0"); command.add("--optimize"); + command.add(String.valueOf(optimizeLevel)); command.add("--output-type"); command.add("pdf"); - command.add(String.valueOf(optimizeLevel)); + if (fastWebView != null && fastWebView) { long fileSize = inputFile.getSize(); diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java index fd45777e5..8ba8cb9a6 100644 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java +++ b/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java @@ -1,5 +1,6 @@ package stirling.software.SPDF.controller.converters; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -7,6 +8,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; import org.springframework.ui.Model; @@ -15,10 +18,58 @@ import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.multipart.MultipartFile; +import com.itextpdf.xmp.XMPException; + import stirling.software.SPDF.utils.PdfUtils; import stirling.software.SPDF.utils.ProcessExecutor; @Controller public class ConvertPDFToPDFA { - + @GetMapping("/pdf-to-pdfa") + public String pdfToPdfAForm(Model model) { + model.addAttribute("currentPage", "pdf-to-pdfa"); + return "pdf-to-pdfa"; + } + + + @PostMapping("/pdf-to-pdfa") + public ResponseEntity pdfToPdfA( + @RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException { + + + // Save the uploaded file to a temporary location + Path tempInputFile = Files.createTempFile("input_", ".pdf"); + inputFile.transferTo(tempInputFile.toFile()); + + // Prepare the output file path + Path tempOutputFile = Files.createTempFile("output_", ".pdf"); + + // Prepare the OCRmyPDF command + List command = new ArrayList<>(); + command.add("ocrmypdf"); + command.add("--skip-text"); + command.add("--tesseract-timeout=0"); + command.add("--output-type"); + command.add("pdfa"); + command.add(tempInputFile.toString()); + command.add(tempOutputFile.toString()); + + int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command); + + // Read the optimized PDF file + byte[] pdfBytes = Files.readAllBytes(tempOutputFile); + + // Clean up the temporary files + Files.delete(tempInputFile); + Files.delete(tempOutputFile); + + // Return the optimized PDF as a response + String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_PDFA.pdf"; + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_PDF); + headers.setContentDispositionFormData("attachment", outputFilename); + return ResponseEntity.ok().headers(headers).body(pdfBytes); +} + + } diff --git a/src/main/resources/messages_en_GB.properties b/src/main/resources/messages_en_GB.properties index 6288c5cb8..1cd6b66aa 100644 --- a/src/main/resources/messages_en_GB.properties +++ b/src/main/resources/messages_en_GB.properties @@ -82,6 +82,9 @@ home.ocr.desc=Scans and detects text from images within a PDF and re-adds it as home.extractImages.title=Extract Images home.extractImages.desc=Extracts all images from a PDF and saves them to zip +home.pdfToPDFA.title=Convert PDF to PDF/A +home.pdfToPDFA.desc=Convert PDF to PDF/A for long-term storage + navbar.settings=Settings settings.title=Settings @@ -113,6 +116,12 @@ ocr.help=Please read this documentation on how to use this for other languages a ocr.credit=This service uses OCRmyPDF and Tesseract for OCR. ocr.submit=Process PDF with OCR + +pdfToPDFA.title=PDF To PDF/A +pdfToPDFA.header=PDF To PDF/A +pdfToPDFA.credit=This service uses OCRmyPDF for PDF/A conversion +pdfToPDFA.submit=Convert + extractImages.title=Extract Images extractImages.header=Extract Images extractImages.selectText=Select image format to convert extracted images to diff --git a/src/main/resources/templates/convert/pdf-to-pdfa.html b/src/main/resources/templates/convert/pdf-to-pdfa.html new file mode 100644 index 000000000..2edcefba2 --- /dev/null +++ b/src/main/resources/templates/convert/pdf-to-pdfa.html @@ -0,0 +1,30 @@ + + + + + + + +
+
+
+

+
+
+
+

+
+
+
+ +
+

+
+
+
+ +
+
+
+ + \ No newline at end of file diff --git a/src/main/resources/templates/fragments/navbar.html b/src/main/resources/templates/fragments/navbar.html index c6f953728..0ed05645e 100644 --- a/src/main/resources/templates/fragments/navbar.html +++ b/src/main/resources/templates/fragments/navbar.html @@ -127,12 +127,13 @@ function compareVersions(version1, version2) { - diff --git a/src/main/resources/templates/home.html b/src/main/resources/templates/home.html index 3e3afcd3f..2a8709132 100644 --- a/src/main/resources/templates/home.html +++ b/src/main/resources/templates/home.html @@ -64,6 +64,7 @@
+