From cbfa70d8511ba75e1ecf77d0c546b268a44d5989 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Sun, 19 Mar 2023 14:45:07 +0000 Subject: [PATCH] Major changes, use libre --- Dockerfile | 51 +++++- HowToUseOCR.md | 49 ++++++ build.gradle | 12 -- .../software/SPDF/LibreOfficeListener.java | 94 +++++++++++ .../SPDF/controller/CompressController.java | 137 ++++++++-------- .../SPDF/controller/OCRController.java | 143 ++++++++++++++++ .../converters/ConvertCsvController.java | 79 --------- .../converters/ConvertDocController.java | 43 ----- .../converters/ConvertHtmlController.java | 54 ------- .../converters/ConvertOfficeController.java | 82 ++++++++++ .../converters/ConvertPPTController.java | 79 --------- .../converters/ConvertTextController.java | 63 -------- .../converters/ConvertXlsxController.java | 152 ------------------ .../software/SPDF/utils/ProcessExecutor.java | 69 ++++++++ src/main/resources/messages.properties | 11 ++ src/main/resources/messages_en_US.properties | 15 +- .../resources/templates/compress-pdf.html | 78 +++------ .../{xlsx-to-pdf.html => file-to-pdf.html} | 15 +- src/main/resources/templates/ocr-pdf.html | 51 ++++++ 19 files changed, 650 insertions(+), 627 deletions(-) create mode 100644 HowToUseOCR.md create mode 100644 src/main/java/stirling/software/SPDF/LibreOfficeListener.java create mode 100644 src/main/java/stirling/software/SPDF/controller/OCRController.java delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertCsvController.java delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertDocController.java delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertHtmlController.java create mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertPPTController.java delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertTextController.java delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertXlsxController.java create mode 100644 src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java rename src/main/resources/templates/convert/{xlsx-to-pdf.html => file-to-pdf.html} (63%) create mode 100644 src/main/resources/templates/ocr-pdf.html diff --git a/Dockerfile b/Dockerfile index c34f7e257..c6f33136d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,54 @@ +# Build jbig2enc in a separate stage +FROM debian:bullseye-slim as jbig2enc_builder + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + git \ + automake \ + autoconf \ + libtool \ + libleptonica-dev \ + pkg-config \ + ca-certificates \ + zlib1g-dev \ + make \ + g++ + +RUN git clone https://github.com/agl/jbig2enc && \ + cd jbig2enc && \ + ./autogen.sh && \ + ./configure && \ + make && \ + make install + +# Main stage FROM openjdk:17-jdk-slim + +# Install necessary dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libreoffice-core \ + libreoffice-common \ + libreoffice-writer \ + libreoffice-calc \ + libreoffice-impress \ + python3-uno \ + python3-pip \ + unoconv \ + ocrmypdf && \ + pip install --user --upgrade ocrmypdf + +# Copy the jbig2enc binary from the builder stage +COPY --from=jbig2enc_builder /usr/local/bin/jbig2 /usr/local/bin/jbig2 + +# Copy the application JAR file COPY build/libs/*.jar app.jar + +# Expose the application port EXPOSE 8080 + +# Set environment variables ENV LOG_LEVEL=INFO -ENTRYPOINT ["java","-jar","/app.jar","-Dlogging.level=${LOG_LEVEL}"] \ No newline at end of file + +# Run the application +ENTRYPOINT ["java","-jar","/app.jar","-Dlogging.level=${LOG_LEVEL}"] diff --git a/HowToUseOCR.md b/HowToUseOCR.md new file mode 100644 index 000000000..1b7d75f96 --- /dev/null +++ b/HowToUseOCR.md @@ -0,0 +1,49 @@ +# OCR Language Packs and Setup + +This document provides instructions on how to add additional language packs for the OCR tab in Stirling-PDF, both inside and outside of Docker. + +## How does the OCR Work +Stirling-PDF uses OCRmyPDF which in turn uses tesseract for its text recognition. +All credit goes to them for this awesome work! + +## Language Packs + +Tesseract OCR supports a variety of languages. You can find additional language packs in the Tesseract GitHub repositories: + +- [tessdata_fast](https://github.com/tesseract-ocr/tessdata_fast): These language packs are smaller and faster to load, but may provide lower recognition accuracy. +- [tessdata](https://github.com/tesseract-ocr/tessdata): These language packs are larger and provide better recognition accuracy, but may take longer to load. + +Depending on your requirements, you can choose the appropriate language pack for your use case. By default Stirling-PDF uses the tessdata_fast eng but this can be replaced. + +### Installing Language Packs + +1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need. +2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tesseract-ocr/4.00/tessdata` + +#### Docker + +If you are using Docker, you need to expose the Tesseract tessdata directory as a volume in order to use the additional language packs. +#### Docker Compose +Modify your `docker-compose.yml` file to include the following volume configuration: + + +```yaml +services: + your_service_name: + image: your_docker_image_name + volumes: + - /usr/share/tesseract-ocr/4.00/tessdata:/location/of/trainingData +``` + + +#### Docker run +Add the following to your existing docker run command +```bash +-v /usr/share/tesseract-ocr/4.00/tessdata:/location/of/trainingData +``` + +#### Non-Docker +If you are not using Docker, you need to install the OCR components, including the ocrmypdf app. +You can see [OCRmyPDF install guide](https://ocrmypdf.readthedocs.io/en/latest/installation.html) + + diff --git a/build.gradle b/build.gradle index 155eb50cd..1a2f1bd39 100644 --- a/build.gradle +++ b/build.gradle @@ -21,24 +21,12 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-thymeleaf' testImplementation 'org.springframework.boot:spring-boot-starter-test' - implementation 'org.apache.xmlgraphics:batik-transcoder:1.14' implementation 'org.apache.logging.log4j:log4j-core:2.20.0' //general PDF implementation 'org.apache.pdfbox:pdfbox:2.0.27' implementation 'com.itextpdf:itextpdf:5.5.13.3' - //xml conversions and others - implementation 'org.apache.poi:poi:5.2.3' - implementation 'org.apache.poi:poi-scratchpad:5.2.3' - implementation 'org.apache.poi:poi-ooxml:5.2.3' - implementation 'com.itextpdf.tool:xmlworker:5.5.13.3' - - //docx conversions - implementation('org.docx4j:docx4j:6.1.2') { - exclude group: 'org.slf4j', module: 'slf4j-reload4j' - } - implementation 'org.docx4j:docx4j-export-fo:11.2.9' } diff --git a/src/main/java/stirling/software/SPDF/LibreOfficeListener.java b/src/main/java/stirling/software/SPDF/LibreOfficeListener.java new file mode 100644 index 000000000..d2aff9040 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/LibreOfficeListener.java @@ -0,0 +1,94 @@ +package stirling.software.SPDF; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.net.Socket; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +public class LibreOfficeListener { + + private static final LibreOfficeListener INSTANCE = new LibreOfficeListener(); + + private static final long ACTIVITY_TIMEOUT = 20 * 60 * 1000; // 20 minutes + private static final int LISTENER_PORT = 2002; + + private ExecutorService executorService; + private Process process; + private long lastActivityTime; + + private LibreOfficeListener() {} + + public static LibreOfficeListener getInstance() { + return INSTANCE; + } + + public void start() throws IOException { + // Check if the listener is already running + if (process != null && process.isAlive()) { + return; + } + + // Start the listener process + process = Runtime.getRuntime().exec("unoconv --listener"); + lastActivityTime = System.currentTimeMillis(); + + // Start a background thread to monitor the activity timeout + executorService = Executors.newSingleThreadExecutor(); + executorService.submit(() -> { + while (true) { + long idleTime = System.currentTimeMillis() - lastActivityTime; + if (idleTime >= ACTIVITY_TIMEOUT) { + // If there has been no activity for too long, tear down the listener + process.destroy(); + break; + } + try { + Thread.sleep(5000); // Check for inactivity every 5 seconds + } catch (InterruptedException e) { + break; + } + } + }); + + + // Wait for the listener to start up + long startTime = System.currentTimeMillis(); + long timeout = 30000; // Timeout after 30 seconds + while (System.currentTimeMillis() - startTime < timeout) { + if (isListenerRunning()) { + + lastActivityTime = System.currentTimeMillis(); + return; + } + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } // Check every 1 second + } + } + + private boolean isListenerRunning() { + try { + System.out.println("waiting for listener to start"); + Socket socket = new Socket(); + socket.connect(new InetSocketAddress("localhost", 2002), 1000); // Timeout after 1 second + socket.close(); + return true; + } catch (IOException e) { + return false; + } + } + + public synchronized void stop() { + // Stop the activity timeout monitor thread + executorService.shutdownNow(); + + // Stop the listener process + if (process != null && process.isAlive()) { + process.destroy(); + } + } + +} diff --git a/src/main/java/stirling/software/SPDF/controller/CompressController.java b/src/main/java/stirling/software/SPDF/controller/CompressController.java index 9a9d45b1b..c2c40c2fc 100644 --- a/src/main/java/stirling/software/SPDF/controller/CompressController.java +++ b/src/main/java/stirling/software/SPDF/controller/CompressController.java @@ -31,8 +31,26 @@ import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.PdfStamper; import stirling.software.SPDF.utils.PdfUtils; +import stirling.software.SPDF.utils.ProcessExecutor; + +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.multipart.MultipartFile; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + -//import com.spire.pdf.*; @Controller public class CompressController { @@ -45,80 +63,55 @@ public class CompressController { } - @PostMapping("/compress-pdf") - public ResponseEntity compressPDF( - @RequestParam("fileInput") MultipartFile pdfFile, - @RequestParam(value = "compressPDF", defaultValue = "false") boolean compressPDF, - @RequestParam(value = "compressImages", defaultValue = "false") boolean compressImages, - @RequestParam(value = "useLossyCompression", defaultValue = "false") boolean useLossyCompression, - @RequestParam(value = "resolutionPercentage", defaultValue = "50") int resolutionPercentage) { + public ResponseEntity optimizePdf( + @RequestParam("fileInput") MultipartFile inputFile, + @RequestParam("optimizeLevel") int optimizeLevel, + @RequestParam(name = "fastWebView", required = false) Boolean fastWebView, + @RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy) throws IOException, InterruptedException { - ByteArrayOutputStream baosPDFBox = new ByteArrayOutputStream(); + // Save the uploaded file to a temporary location + Path tempInputFile = Files.createTempFile("input_", ".pdf"); + inputFile.transferTo(tempInputFile.toFile()); + + // Prepare the output file path + Path tempOutputFile = Files.createTempFile("output_", ".pdf"); + + // Prepare the OCRmyPDF command + List command = new ArrayList<>(); + command.add("ocrmypdf"); + command.add("--optimize"); + command.add(String.valueOf(optimizeLevel)); + + if (fastWebView != null && fastWebView) { + long fileSize = inputFile.getSize(); + long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size + command.add("--fast-web-view"); + command.add(String.valueOf(fastWebViewSize)); + } + + if (jbig2Lossy != null && jbig2Lossy) { + command.add("--jbig2-lossy"); + } + + command.add(tempInputFile.toString()); + command.add(tempOutputFile.toString()); + + int returnCode = ProcessExecutor.runCommandWithOutputHandling(command); - try (InputStream is = pdfFile.getInputStream(); - PDDocument document = PDDocument.load(is)) { + // Read the optimized PDF file + byte[] pdfBytes = Files.readAllBytes(tempOutputFile); - if (compressImages) { - for (PDPage page : document.getPages()) { - PDResources resources = page.getResources(); - for (COSName cosName : resources.getXObjectNames()) { - if (resources.isImageXObject(cosName)) { - PDImageXObject image = (PDImageXObject) resources.getXObject(cosName); - BufferedImage bufferedImage = image.getImage(); - BufferedImage resizedImage = resizeImage(bufferedImage, resolutionPercentage); + // Clean up the temporary files + Files.delete(tempInputFile); + Files.delete(tempOutputFile); + + // Return the optimized PDF as a response + String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf"; + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_PDF); + headers.setContentDispositionFormData("attachment", outputFilename); + return ResponseEntity.ok().headers(headers).body(pdfBytes); +} - if (useLossyCompression) { - File tempFile = File.createTempFile("pdfbox", ".jpg"); - ImageIO.write(resizedImage, "jpg", tempFile); - PDImageXObject newImage = PDImageXObject.createFromFile(tempFile.getAbsolutePath(), document); - resources.put(cosName, newImage); - } else { - File tempFile = File.createTempFile("pdfbox", ".png"); - ImageIO.write(resizedImage, "png", tempFile); - PDImageXObject newImage = PDImageXObject.createFromFile(tempFile.getAbsolutePath(), document); - resources.put(cosName, newImage); - } - } - } - } - } - - document.save(baosPDFBox); - - } catch (IOException e) { - e.printStackTrace(); - return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR); - } - - try (ByteArrayInputStream baisPDFBox = new ByteArrayInputStream(baosPDFBox.toByteArray()); - ByteArrayOutputStream baosFinal = new ByteArrayOutputStream()) { - - PdfReader reader = new PdfReader(baisPDFBox); - PdfStamper stamper = new PdfStamper(reader, baosFinal); - - if (compressPDF) { - stamper.setFullCompression(); - } - - stamper.close(); - reader.close(); - - return PdfUtils.boasToWebResponse(baosFinal, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_compressed.pdf"); - } catch (IOException | DocumentException e) { - e.printStackTrace(); - return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR); - } - } - - - private BufferedImage resizeImage(BufferedImage originalImage, int resolutionPercentage) { - int newWidth = originalImage.getWidth() * resolutionPercentage / 100; - int newHeight = originalImage.getHeight() * resolutionPercentage / 100; - BufferedImage resizedImage = new BufferedImage(newWidth, newHeight, originalImage.getType()); - Graphics2D g = resizedImage.createGraphics(); - g.drawImage(originalImage, 0, 0, newWidth, newHeight, null); - g.dispose(); - return resizedImage; - } } diff --git a/src/main/java/stirling/software/SPDF/controller/OCRController.java b/src/main/java/stirling/software/SPDF/controller/OCRController.java new file mode 100644 index 000000000..5425b2c46 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/OCRController.java @@ -0,0 +1,143 @@ +package stirling.software.SPDF.controller; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.stereotype.Controller; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.multipart.MultipartFile; +import org.springframework.web.servlet.ModelAndView; + +import stirling.software.SPDF.utils.ProcessExecutor; + +import java.io.FileOutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; +//import com.spire.pdf.*; +@Controller +public class OCRController { + + private static final Logger logger = LoggerFactory.getLogger(OCRController.class); + + @GetMapping("/ocr-pdf") + public ModelAndView ocrPdfPage() { + ModelAndView modelAndView = new ModelAndView("ocr-pdf"); + modelAndView.addObject("languages", getAvailableTesseractLanguages()); + modelAndView.addObject("currentPage", "ocr-pdf"); + return modelAndView; + } + + @PostMapping("/ocr-pdf") + public ResponseEntity processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile, + @RequestParam("languages") List selectedLanguages, + @RequestParam(name = "sidecar", required = false) Boolean sidecar) throws IOException, InterruptedException { + + //--output-type pdfa + if (selectedLanguages == null || selectedLanguages.size() < 1) { + throw new IOException("Please select at least one language."); + } + + // Save the uploaded file to a temporary location + Path tempInputFile = Files.createTempFile("input_", ".pdf"); + inputFile.transferTo(tempInputFile.toFile()); + + // Prepare the output file path + Path tempOutputFile = Files.createTempFile("output_", ".pdf"); + + // Run OCR Command + String languageOption = String.join("+", selectedLanguages); + List command = new ArrayList<>(Arrays.asList("ocrmypdf","--verbose", "2", "--language", languageOption, + tempInputFile.toString(), tempOutputFile.toString())); + String sidecarFile = tempOutputFile.toString().replace(".pdf", ".txt"); + if (sidecar != null && sidecar) { + command.add("--sidecar"); + command.add(sidecarFile); + } + int returnCode = ProcessExecutor.runCommandWithOutputHandling(command); + + // Read the OCR processed PDF file + byte[] pdfBytes = Files.readAllBytes(tempOutputFile); + + // Clean up the temporary files + Files.delete(tempInputFile); + // Return the OCR processed PDF as a response + String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf"; + + HttpHeaders headers = new HttpHeaders(); + + if (sidecar != null && sidecar) { + // Create a zip file containing both the PDF and the text file + String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip"; + Path tempZipFile = Files.createTempFile("output_", ".zip"); + + try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) { + // Add PDF file to the zip + ZipEntry pdfEntry = new ZipEntry(outputFilename); + zipOut.putNextEntry(pdfEntry); + Files.copy(tempOutputFile, zipOut); + zipOut.closeEntry(); + + // Add text file to the zip + ZipEntry txtEntry = new ZipEntry(sidecarFile); + zipOut.putNextEntry(txtEntry); + Files.copy(Paths.get(sidecarFile), zipOut); + zipOut.closeEntry(); + } + + byte[] zipBytes = Files.readAllBytes(tempZipFile); + + // Clean up the temporary zip file + Files.delete(tempZipFile); + Files.delete(tempOutputFile); + Files.delete(Paths.get(sidecarFile)); + + // Return the zip file containing both the PDF and the text file + headers.setContentType(MediaType.APPLICATION_OCTET_STREAM); + headers.setContentDispositionFormData("attachment", outputZipFilename); + return ResponseEntity.ok().headers(headers).body(zipBytes); + } else { + // Return the OCR processed PDF as a response + Files.delete(tempOutputFile); + headers.setContentType(MediaType.APPLICATION_PDF); + headers.setContentDispositionFormData("attachment", outputFilename); + return ResponseEntity.ok().headers(headers).body(pdfBytes); + } + + } + + public List getAvailableTesseractLanguages() { + String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata"; + File[] files = new File(tessdataDir).listFiles(); + if (files == null) { + return Collections.emptyList(); + } + return Arrays.stream(files) + .filter(file -> file.getName().endsWith(".traineddata")) + .map(file -> file.getName().replace(".traineddata", "")) + .filter(lang -> !lang.equalsIgnoreCase("osd")) + .collect(Collectors.toList()); + } + +} diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertCsvController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertCsvController.java deleted file mode 100644 index d374c78b4..000000000 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertCsvController.java +++ /dev/null @@ -1,79 +0,0 @@ -package stirling.software.SPDF.controller.converters; - -import java.io.BufferedReader; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; - -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import com.itextpdf.text.Document; -import com.itextpdf.text.DocumentException; -import com.itextpdf.text.Paragraph; -import com.itextpdf.text.pdf.PdfPCell; -import com.itextpdf.text.pdf.PdfPTable; -import com.itextpdf.text.pdf.PdfWriter; - -import stirling.software.SPDF.utils.PdfUtils; - -@Controller -public class ConvertCsvController { - - - @GetMapping("/csv-to-pdf") - public String cinvertToPDF(Model model) { - model.addAttribute("currentPage", "xlsx-to-pdf"); - return "convert/xlsx-to-pdf"; - } - - - - @PostMapping("/csv-to-pdf") - public ResponseEntity convertCsvToPdf(@RequestParam("fileInput") MultipartFile csvFile) throws IOException, DocumentException { - // Create PDF document - Document document = new Document(); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PdfWriter.getInstance(document, outputStream); - document.open(); - - // Read CSV file - InputStreamReader inputStreamReader = new InputStreamReader(csvFile.getInputStream(), StandardCharsets.UTF_8); - BufferedReader bufferedReader = new BufferedReader(inputStreamReader); - - // Create PDF table from CSV content - PdfPTable table = null; - String csvRow; - while ((csvRow = bufferedReader.readLine()) != null) { - String[] csvRowCells = csvRow.split(","); // Assuming comma as a delimiter - - if (table == null) { - table = new PdfPTable(csvRowCells.length); - } - - for (String cellValue : csvRowCells) { - PdfPCell pdfCell = new PdfPCell(new Paragraph(cellValue)); - table.addCell(pdfCell); - } - } - - if (table != null) { - document.add(table); - } - - // Close BufferedReader, document, and output stream - bufferedReader.close(); - document.close(); - outputStream.close(); - - return PdfUtils.boasToWebResponse(outputStream, csvFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf"); - } - - -} diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertDocController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertDocController.java deleted file mode 100644 index 10dd6c06e..000000000 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertDocController.java +++ /dev/null @@ -1,43 +0,0 @@ -package stirling.software.SPDF.controller.converters; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -import org.docx4j.Docx4J; -import org.docx4j.openpackaging.exceptions.Docx4JException; -import org.docx4j.openpackaging.packages.WordprocessingMLPackage; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import stirling.software.SPDF.utils.PdfUtils; - -@Controller -public class ConvertDocController { - - - @GetMapping("/docx-to-pdf") - public String cinvertToPDF(Model model) { - model.addAttribute("currentPage", "xlsx-to-pdf"); - return "convert/xlsx-to-pdf"; - } - - @PostMapping("/docx-to-pdf") - public ResponseEntity convertDocxToPdf(@RequestParam("fileInput") MultipartFile docxFile) throws IOException, Docx4JException { - // Load WordprocessingMLPackage - WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(docxFile.getInputStream()); - - // Create PDF output stream - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - - // Convert DOCX to PDF - Docx4J.toPDF(wordMLPackage, outputStream); - - return PdfUtils.boasToWebResponse(outputStream, docxFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf"); - } - -} diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertHtmlController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertHtmlController.java deleted file mode 100644 index fe5bc34ca..000000000 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertHtmlController.java +++ /dev/null @@ -1,54 +0,0 @@ -package stirling.software.SPDF.controller.converters; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; - -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import com.itextpdf.text.Document; -import com.itextpdf.text.DocumentException; -import com.itextpdf.text.pdf.PdfWriter; -import com.itextpdf.tool.xml.XMLWorkerHelper; - -import stirling.software.SPDF.utils.PdfUtils; - -@Controller -public class ConvertHtmlController { - - - @GetMapping("//html-to-pdf") - public String cinvertToPDF(Model model) { - model.addAttribute("currentPage", "xlsx-to-pdf"); - return "convert/xlsx-to-pdf"; - } - - @PostMapping("/html-to-pdf") - public ResponseEntity convertHtmlToPdf(@RequestParam("fileInput") MultipartFile htmlFile) throws IOException, DocumentException { - // Create PDF document - Document document = new Document(); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PdfWriter writer = PdfWriter.getInstance(document, outputStream); - document.open(); - - // Read HTML file - InputStream htmlInputStream = new ByteArrayInputStream(htmlFile.getBytes()); - - // Convert HTML content to PDF - XMLWorkerHelper.getInstance().parseXHtml(writer, document, htmlInputStream); - - // Close document and output stream - document.close(); - outputStream.close(); - - return PdfUtils.boasToWebResponse(outputStream, ""); - } - -} diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java new file mode 100644 index 000000000..00996dc06 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java @@ -0,0 +1,82 @@ +package stirling.software.SPDF.controller.converters; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.springframework.http.ResponseEntity; +import org.springframework.stereotype.Controller; +import org.springframework.ui.Model; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.multipart.MultipartFile; +import org.springframework.web.servlet.ModelAndView; + +import stirling.software.SPDF.LibreOfficeListener; +import stirling.software.SPDF.utils.PdfUtils; +import stirling.software.SPDF.utils.ProcessExecutor; +@Controller +public class ConvertOfficeController { + + + @GetMapping("/file-to-pdf") + public String convertToPdfForm(Model model) { + model.addAttribute("currentPage", "file-to-pdf"); + return "convert/file-to-pdf"; + } + + @PostMapping("/file-to-pdf") + public ResponseEntity processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException { + + //unused but can start server instance if startup time is to long + //LibreOfficeListener.getInstance().start(); + + byte[] pdfByteArray = convertToPdf(inputFile); + return PdfUtils.bytesToWebResponse(pdfByteArray, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf"); + } + + +public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException { + // Save the uploaded file to a temporary location + Path tempInputFile = Files.createTempFile("input_", "." + getFileExtension(inputFile.getOriginalFilename())); + inputFile.transferTo(tempInputFile.toFile()); + + // Prepare the output file path + Path tempOutputFile = Files.createTempFile("output_", ".pdf"); + + // Run the LibreOffice command + List command = new ArrayList<>(Arrays.asList("unoconv", "-vvv", + "-f", + "pdf", + "-o", + tempOutputFile.toString(), + tempInputFile.toString())); + int returnCode = ProcessExecutor.runCommandWithOutputHandling(command); + + // Read the converted PDF file + byte[] pdfBytes = Files.readAllBytes(tempOutputFile); + + // Clean up the temporary files + Files.delete(tempInputFile); + Files.delete(tempOutputFile); + + return pdfBytes; +} + + + +private String getFileExtension(String fileName) { + int dotIndex = fileName.lastIndexOf('.'); + if (dotIndex == -1) { + return ""; + } + return fileName.substring(dotIndex + 1); +} +} diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPPTController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertPPTController.java deleted file mode 100644 index c5818219c..000000000 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPPTController.java +++ /dev/null @@ -1,79 +0,0 @@ -package stirling.software.SPDF.controller.converters; - -import java.awt.Color; -import java.awt.Graphics2D; -import java.awt.RenderingHints; -import java.awt.geom.Rectangle2D; -import java.awt.image.BufferedImage; -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -import org.apache.poi.xslf.usermodel.XMLSlideShow; -import org.apache.poi.xslf.usermodel.XSLFSlide; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import com.itextpdf.text.Document; -import com.itextpdf.text.DocumentException; -import com.itextpdf.text.Image; -import com.itextpdf.text.PageSize; -import com.itextpdf.text.pdf.PdfWriter; - -import stirling.software.SPDF.utils.PdfUtils; - -@Controller -public class ConvertPPTController { - - - @GetMapping("/pptx-to-pdf") - public String cinvertToPDF(Model model) { - model.addAttribute("currentPage", "xlsx-to-pdf"); - return "convert/xlsx-to-pdf"; - } - - @PostMapping("/pptx-to-pdf") - public ResponseEntity convertPptxToPdf(@RequestParam("fileInput") MultipartFile pptxFile) throws IOException, DocumentException { - // Read PowerPoint presentation - XMLSlideShow ppt = new XMLSlideShow(pptxFile.getInputStream()); - - // Create PDF document - Document pdfDocument = new Document(PageSize.A4.rotate()); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PdfWriter.getInstance(pdfDocument, outputStream); - pdfDocument.open(); - - // Convert PowerPoint slides to images, then add them to the PDF - for (XSLFSlide slide : ppt.getSlides()) { - BufferedImage slideImage = new BufferedImage((int) Math.ceil(ppt.getPageSize().getWidth()), (int) Math.ceil(ppt.getPageSize().getHeight()), BufferedImage.TYPE_INT_RGB); - Graphics2D graphics = slideImage.createGraphics(); - - // Set graphics rendering hints for better quality - graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); - graphics.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); - graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); - - // Draw the slide on the graphics - graphics.setPaint(Color.white); - graphics.fill(new Rectangle2D.Float(0, 0, slideImage.getWidth(), slideImage.getHeight())); - slide.draw(graphics); - - // Add the slide image to the PDF document - Image image = Image.getInstance(slideImage, null); - image.scaleToFit(PageSize.A4.getWidth() - 72, PageSize.A4.getHeight() - 72); - pdfDocument.add(image); - } - - // Close PowerPoint and PDF documents - ppt.close(); - pdfDocument.close(); - outputStream.close(); - - return PdfUtils.boasToWebResponse(outputStream, pptxFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf"); - } - -} diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertTextController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertTextController.java deleted file mode 100644 index 29fe1ac9b..000000000 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertTextController.java +++ /dev/null @@ -1,63 +0,0 @@ -package stirling.software.SPDF.controller.converters; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.charset.StandardCharsets; - -import org.apache.commons.io.FilenameUtils; -import org.apache.poi.hwpf.HWPFDocument; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import com.itextpdf.text.Document; -import com.itextpdf.text.DocumentException; -import com.itextpdf.text.Paragraph; -import com.itextpdf.text.pdf.PdfWriter; - -import stirling.software.SPDF.utils.PdfUtils; - -@Controller -public class ConvertTextController { - - - @GetMapping("/txt-rtf-to-pdf") - public String cinvertToPDF(Model model) { - model.addAttribute("currentPage", "xlsx-to-pdf"); - return "convert/xlsx-to-pdf"; - } - - @PostMapping("/txt-rtf-to-pdf") - public ResponseEntity convertTxtRtfToPdf(@RequestParam("fileInput") MultipartFile txtRtfFile) throws IOException, DocumentException { - // Create PDF document - Document document = new Document(); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PdfWriter.getInstance(document, outputStream); - document.open(); - - // Read TXT/RTF file content - String fileContent; - String fileExtension = FilenameUtils.getExtension(txtRtfFile.getOriginalFilename()); - if (fileExtension.equalsIgnoreCase("rtf")) { - HWPFDocument hwpfDocument = new HWPFDocument(new POIFSFileSystem(txtRtfFile.getInputStream())); - fileContent = hwpfDocument.getText().toString(); - } else { - fileContent = new String(txtRtfFile.getBytes(), StandardCharsets.UTF_8); - } - - // Add content to PDF - document.add(new Paragraph(fileContent)); - - // Close document and output stream - document.close(); - outputStream.close(); - - return PdfUtils.boasToWebResponse(outputStream, txtRtfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf"); - } - -} diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertXlsxController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertXlsxController.java deleted file mode 100644 index b6731d848..000000000 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertXlsxController.java +++ /dev/null @@ -1,152 +0,0 @@ -package stirling.software.SPDF.controller.converters; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -import org.apache.poi.ss.usermodel.Cell; -import org.apache.poi.ss.usermodel.DataFormatter; -import org.apache.poi.ss.usermodel.Row; -import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.ss.usermodel.WorkbookFactory; -import org.apache.poi.xssf.usermodel.XSSFCellStyle; -import org.apache.poi.xssf.usermodel.XSSFColor; -import org.apache.poi.xssf.usermodel.XSSFFont; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import com.itextpdf.text.BaseColor; -import com.itextpdf.text.Document; -import com.itextpdf.text.DocumentException; -import com.itextpdf.text.Font; -import com.itextpdf.text.Paragraph; -import com.itextpdf.text.pdf.PdfPCell; -import com.itextpdf.text.pdf.PdfPTable; -import com.itextpdf.text.pdf.PdfWriter; - -import stirling.software.SPDF.utils.PdfUtils; - -@Controller -public class ConvertXlsxController { - - - @GetMapping("/xlsx-to-pdf") - public String cinvertToPDF(Model model) { - model.addAttribute("currentPage", "xlsx-to-pdf"); - return "convert/xlsx-to-pdf"; - } - - @PostMapping("/xlsx-to-pdf") - public ResponseEntity convertToPDF(@RequestParam("fileInput") MultipartFile xlsx) throws IOException, DocumentException { - // Load Excel file - Workbook workbook = WorkbookFactory.create(xlsx.getInputStream()); - - // Create PDF document - Document document = new Document(); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PdfWriter.getInstance(document, outputStream); - document.open(); - - // Convert each sheet in Excel to a separate page in PDF - for (int i = 0; i < workbook.getNumberOfSheets(); i++) { - Sheet sheet = workbook.getSheetAt(i); - int numOfColumns = sheet.getRow(0).getPhysicalNumberOfCells(); - PdfPTable table = new PdfPTable(numOfColumns); - - for (int row = 0; row < sheet.getPhysicalNumberOfRows(); row++) { - Row excelRow = sheet.getRow(row); - if (excelRow == null) { - continue; // Skip this row if it's null - } - for (int cell = 0; cell < excelRow.getPhysicalNumberOfCells(); cell++) { - Cell excelCell = excelRow.getCell(cell); - - // Check if the cell is null - if (excelCell == null) { - table.addCell(""); // Add an empty cell to the PDF table - continue; - } - - // Convert cell to string - DataFormatter dataFormatter = new DataFormatter(); - String cellValue = dataFormatter.formatCellValue(excelCell); - System.out.println("Cell Value: " + cellValue); - // Get Excel cell font - Font cellFont = getFontFromExcelCell(workbook, excelCell); - - // Create PDF cell with Excel cell font - PdfPCell pdfCell = new PdfPCell(new Paragraph(cellValue, cellFont)); - - // Set cell height and width - float height = sheet.getRow(row).getHeightInPoints(); - System.out.print(height); - pdfCell.setFixedHeight(30f); - - - // Copy cell style, borders, and background color - XSSFCellStyle cellStyle = (XSSFCellStyle) excelCell.getCellStyle(); - if (cellStyle != null) { - XSSFColor bottomBorderColor = cellStyle.getBottomBorderXSSFColor(); - if (bottomBorderColor != null) { - pdfCell.setBorderColor(new BaseColor(bottomBorderColor.getRGB()[0] & 0xFF, bottomBorderColor.getRGB()[1] & 0xFF, bottomBorderColor.getRGB()[2] & 0xFF)); - } - - XSSFColor topBorderColor = cellStyle.getTopBorderXSSFColor(); - if (topBorderColor != null) { - pdfCell.setBorderColor(new BaseColor(topBorderColor.getRGB()[0] & 0xFF, topBorderColor.getRGB()[1] & 0xFF, topBorderColor.getRGB()[2] & 0xFF)); - } - - XSSFColor leftBorderColor = cellStyle.getLeftBorderXSSFColor(); - if (leftBorderColor != null) { - pdfCell.setBorderColor(new BaseColor(leftBorderColor.getRGB()[0] & 0xFF, leftBorderColor.getRGB()[1] & 0xFF, leftBorderColor.getRGB()[2] & 0xFF)); - } - - XSSFColor rightBorderColor = cellStyle.getRightBorderXSSFColor(); - if (rightBorderColor != null) { - pdfCell.setBorderColor(new BaseColor(rightBorderColor.getRGB()[0] & 0xFF, rightBorderColor.getRGB()[1] & 0xFF, rightBorderColor.getRGB()[2] & 0xFF)); - } - - XSSFColor fillForegroundColor = cellStyle.getFillForegroundXSSFColor(); - if (fillForegroundColor != null) { - pdfCell.setBackgroundColor(new BaseColor(fillForegroundColor.getRGB()[0] & 0xFF, fillForegroundColor.getRGB()[1] & 0xFF, fillForegroundColor.getRGB()[2] & 0xFF)); - } - - } - - table.addCell(pdfCell); - } - } - - // Add sheet to PDF - document.add(table); - - // Add page break if there are more sheets - if (i < workbook.getNumberOfSheets() - 1) { - document.newPage(); - } - } - - // Close document and output stream - document.close(); - outputStream.flush(); - outputStream.close(); - - // Return PDF as response - return PdfUtils.boasToWebResponse(outputStream, xlsx.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf"); - } - - private Font getFontFromExcelCell(Workbook workbook, Cell excelCell) { - XSSFFont excelFont = ((XSSFCellStyle) excelCell.getCellStyle()).getFont(); - Font.FontFamily fontFamily = Font.getFamily(excelFont.getFontName()); - float fontSize = excelFont.getFontHeightInPoints(); - int fontStyle = (excelFont.getBold() ? Font.BOLD : Font.NORMAL) | (excelFont.getItalic() ? Font.ITALIC : Font.NORMAL); - - return new Font(fontFamily, fontSize, fontStyle); - } - -} diff --git a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java new file mode 100644 index 000000000..e244d4a2a --- /dev/null +++ b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java @@ -0,0 +1,69 @@ +package stirling.software.SPDF.utils; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.List; +import java.io.BufferedReader; +import java.util.ArrayList; +public class ProcessExecutor { + public static int runCommandWithOutputHandling(List command) throws IOException, InterruptedException { + ProcessBuilder processBuilder = new ProcessBuilder(command); + Process process = processBuilder.start(); + + // Read the error stream and standard output stream concurrently + List errorLines = new ArrayList<>(); + List outputLines = new ArrayList<>(); + + Thread errorReaderThread = new Thread(() -> { + try (BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream(), StandardCharsets.UTF_8))) { + String line; + while ((line = errorReader.readLine()) != null) { + errorLines.add(line); + } + } catch (IOException e) { + e.printStackTrace(); + } + }); + + Thread outputReaderThread = new Thread(() -> { + try (BufferedReader outputReader = new BufferedReader(new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) { + String line; + while ((line = outputReader.readLine()) != null) { + outputLines.add(line); + } + } catch (IOException e) { + e.printStackTrace(); + } + }); + + errorReaderThread.start(); + outputReaderThread.start(); + + // Wait for the conversion process to complete + int exitCode = process.waitFor(); + + // Wait for the reader threads to finish + errorReaderThread.join(); + outputReaderThread.join(); + + if (outputLines.size() > 0) { + String outputMessage = String.join("\n", outputLines); + System.out.println("Command output:\n" + outputMessage); + } + + if (errorLines.size() > 0) { + String errorMessage = String.join("\n", errorLines); + System.out.println("Command error output:\n" + errorMessage); + if (exitCode != 0) { + throw new IOException("Command process failed with exit code " + exitCode + ". Error message: " + errorMessage); + } + } + + return exitCode; + } + + +} diff --git a/src/main/resources/messages.properties b/src/main/resources/messages.properties index e69de29bb..2df2fa75e 100644 --- a/src/main/resources/messages.properties +++ b/src/main/resources/messages.properties @@ -0,0 +1,11 @@ +fileToPDF.fileTypesList=Microsoft Word: (DOC, DOCX, DOT, DOTX) \ +Microsoft Excel: (CSV, XLS, XLSX, XLT, XLTX, SLK, DIF) \ +Microsoft PowerPoint: (PPT, PPTX) \ +OpenDocument Formats: (ODT, OTT, ODS, OTS, ODP, OTP, ODG, OTG) \ +Plain Text: (TXT, TEXT, XML) \ +Rich Text Format: (RTF) \ +Images: (BMP, GIF, JPEG, PNG, TIF, PBM, PGM, PPM, RAS, XBM, XPM, SVG, SVM, WMF) \ +HTML: (HTML) \ +Lotus Word Pro: (LWP) \ +StarOffice formats: (SDA, SDC, SDD, SDW, STC, STD, STI, STW, SXD, SXG, SXI, SXW) \ +Other formats: (DBF, FODS, VSD, VOR, VOR3, VOR4, UOP, PCT, PS, PDF) diff --git a/src/main/resources/messages_en_US.properties b/src/main/resources/messages_en_US.properties index 278f959b8..c3a46c67f 100644 --- a/src/main/resources/messages_en_US.properties +++ b/src/main/resources/messages_en_US.properties @@ -224,19 +224,8 @@ changeMetadata.selectText.5=Add Custom Metadata Entry changeMetadata.submit=Change - -xlsToPdf.title=Excel to PDF -xlsToPdf.header=Excel to PDF -xlsToPdf.selectText.1=Select XLS or XLSX Excel sheet to convert -xlsToPdf.convert=convert - - - - - - - - +fileToPDF.credit=This service uses LibreOffice and Unoconv for file conversion. +fileToPDF.supportedFileTypes=Supported file types should include the below however for a full updated list of supported formats, please refer to the LibreOffice documentation diff --git a/src/main/resources/templates/compress-pdf.html b/src/main/resources/templates/compress-pdf.html index 79b67cee6..dd79f5c25 100644 --- a/src/main/resources/templates/compress-pdf.html +++ b/src/main/resources/templates/compress-pdf.html @@ -14,60 +14,30 @@

-
-
- - -
-
-
- - -
-
- - -
-
-
-
- - -
-
-
-
- - -
-
-
- - -
-
- -
-
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ +
diff --git a/src/main/resources/templates/convert/xlsx-to-pdf.html b/src/main/resources/templates/convert/file-to-pdf.html similarity index 63% rename from src/main/resources/templates/convert/xlsx-to-pdf.html rename to src/main/resources/templates/convert/file-to-pdf.html index 09c8be155..cb4d8ada7 100644 --- a/src/main/resources/templates/convert/xlsx-to-pdf.html +++ b/src/main/resources/templates/convert/file-to-pdf.html @@ -1,7 +1,7 @@ - + @@ -12,18 +12,23 @@
-

+

-
+
- +


- +
+

+

+

+ +
diff --git a/src/main/resources/templates/ocr-pdf.html b/src/main/resources/templates/ocr-pdf.html new file mode 100644 index 000000000..e440e5de1 --- /dev/null +++ b/src/main/resources/templates/ocr-pdf.html @@ -0,0 +1,51 @@ + + + + + + + + +
+
+
+

+
+
+
+

+ +
+
+ + +
+
+ +
+
+ + +
+
+
+
+ + +
+ +
+

+ Please read this documentation on how to use this for other languages and/or not in docker +

+ + +
+
+
+
+
+
+ + + \ No newline at end of file