From cbfa70d8511ba75e1ecf77d0c546b268a44d5989 Mon Sep 17 00:00:00 2001
From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
Date: Sun, 19 Mar 2023 14:45:07 +0000
Subject: [PATCH] Major changes, use libre

---
 Dockerfile                                    |  51 +++++-
 HowToUseOCR.md                                |  49 ++++++
 build.gradle                                  |  12 --
 .../software/SPDF/LibreOfficeListener.java    |  94 +++++++++++
 .../SPDF/controller/CompressController.java   | 137 ++++++++--------
 .../SPDF/controller/OCRController.java        | 143 ++++++++++++++++
 .../converters/ConvertCsvController.java      |  79 ---------
 .../converters/ConvertDocController.java      |  43 -----
 .../converters/ConvertHtmlController.java     |  54 -------
 .../converters/ConvertOfficeController.java   |  82 ++++++++++
 .../converters/ConvertPPTController.java      |  79 ---------
 .../converters/ConvertTextController.java     |  63 --------
 .../converters/ConvertXlsxController.java     | 152 ------------------
 .../software/SPDF/utils/ProcessExecutor.java  |  69 ++++++++
 src/main/resources/messages.properties        |  11 ++
 src/main/resources/messages_en_US.properties  |  15 +-
 .../resources/templates/compress-pdf.html     |  78 +++------
 .../{xlsx-to-pdf.html => file-to-pdf.html}    |  15 +-
 src/main/resources/templates/ocr-pdf.html     |  51 ++++++
 19 files changed, 650 insertions(+), 627 deletions(-)
 create mode 100644 HowToUseOCR.md
 create mode 100644 src/main/java/stirling/software/SPDF/LibreOfficeListener.java
 create mode 100644 src/main/java/stirling/software/SPDF/controller/OCRController.java
 delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertCsvController.java
 delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertDocController.java
 delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertHtmlController.java
 create mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java
 delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertPPTController.java
 delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertTextController.java
 delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertXlsxController.java
 create mode 100644 src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java
 rename src/main/resources/templates/convert/{xlsx-to-pdf.html => file-to-pdf.html} (63%)
 create mode 100644 src/main/resources/templates/ocr-pdf.html

diff --git a/Dockerfile b/Dockerfile
index c34f7e257..c6f33136d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,54 @@
+# Build jbig2enc in a separate stage
+FROM debian:bullseye-slim as jbig2enc_builder
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        git \
+        automake \
+        autoconf \
+        libtool \
+        libleptonica-dev \
+        pkg-config \
+        ca-certificates \
+        zlib1g-dev \
+		make \
+		g++
+
+RUN git clone https://github.com/agl/jbig2enc && \
+    cd jbig2enc && \
+    ./autogen.sh && \
+    ./configure && \
+    make && \
+    make install
+
+# Main stage
 FROM openjdk:17-jdk-slim
+
+# Install necessary dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        libreoffice-core \
+        libreoffice-common \
+        libreoffice-writer \
+        libreoffice-calc \
+        libreoffice-impress \
+        python3-uno \
+		python3-pip \
+        unoconv \
+        ocrmypdf && \
+	pip install --user --upgrade ocrmypdf
+
+# Copy the jbig2enc binary from the builder stage
+COPY --from=jbig2enc_builder /usr/local/bin/jbig2 /usr/local/bin/jbig2
+
+# Copy the application JAR file
 COPY build/libs/*.jar app.jar
+
+# Expose the application port
 EXPOSE 8080
+
+# Set environment variables
 ENV LOG_LEVEL=INFO
-ENTRYPOINT ["java","-jar","/app.jar","-Dlogging.level=${LOG_LEVEL}"]
\ No newline at end of file
+
+# Run the application
+ENTRYPOINT ["java","-jar","/app.jar","-Dlogging.level=${LOG_LEVEL}"]
diff --git a/HowToUseOCR.md b/HowToUseOCR.md
new file mode 100644
index 000000000..1b7d75f96
--- /dev/null
+++ b/HowToUseOCR.md
@@ -0,0 +1,49 @@
+# OCR Language Packs and Setup
+
+This document provides instructions on how to add additional language packs for the OCR tab in Stirling-PDF, both inside and outside of Docker.
+
+## How does the OCR Work
+Stirling-PDF uses OCRmyPDF which in turn uses tesseract for its text recognition.
+All credit goes to them for this awesome work! 
+
+## Language Packs
+
+Tesseract OCR supports a variety of languages. You can find additional language packs in the Tesseract GitHub repositories:
+
+- [tessdata_fast](https://github.com/tesseract-ocr/tessdata_fast): These language packs are smaller and faster to load, but may provide lower recognition accuracy.
+- [tessdata](https://github.com/tesseract-ocr/tessdata): These language packs are larger and provide better recognition accuracy, but may take longer to load.
+
+Depending on your requirements, you can choose the appropriate language pack for your use case. By default Stirling-PDF uses the tessdata_fast eng but this can be replaced.
+
+### Installing Language Packs
+
+1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need.
+2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tesseract-ocr/4.00/tessdata`
+
+#### Docker
+
+If you are using Docker, you need to expose the Tesseract tessdata directory as a volume in order to use the additional language packs. 
+#### Docker Compose
+Modify your `docker-compose.yml` file to include the following volume configuration:
+
+
+```yaml
+services:
+  your_service_name:
+    image: your_docker_image_name
+    volumes:
+      - /usr/share/tesseract-ocr/4.00/tessdata:/location/of/trainingData
+```
+
+
+#### Docker run
+Add the following to your existing docker run command
+```bash
+-v /usr/share/tesseract-ocr/4.00/tessdata:/location/of/trainingData
+```
+
+#### Non-Docker
+If you are not using Docker, you need to install the OCR components, including the ocrmypdf app.
+You can see [OCRmyPDF install guide](https://ocrmypdf.readthedocs.io/en/latest/installation.html)
+
+
diff --git a/build.gradle b/build.gradle
index 155eb50cd..1a2f1bd39 100644
--- a/build.gradle
+++ b/build.gradle
@@ -21,24 +21,12 @@ dependencies {
 	implementation 'org.springframework.boot:spring-boot-starter-thymeleaf'
 	testImplementation 'org.springframework.boot:spring-boot-starter-test'
 	
-	implementation 'org.apache.xmlgraphics:batik-transcoder:1.14'
 	implementation 'org.apache.logging.log4j:log4j-core:2.20.0'
 	
 	//general PDF
     implementation 'org.apache.pdfbox:pdfbox:2.0.27'
     implementation 'com.itextpdf:itextpdf:5.5.13.3'
     
-    //xml conversions and others
-    implementation 'org.apache.poi:poi:5.2.3'
-    implementation 'org.apache.poi:poi-scratchpad:5.2.3'
-    implementation 'org.apache.poi:poi-ooxml:5.2.3'
-    implementation 'com.itextpdf.tool:xmlworker:5.5.13.3'
-    
-    //docx conversions
-    implementation('org.docx4j:docx4j:6.1.2') {
-        exclude group: 'org.slf4j', module: 'slf4j-reload4j'
-    }
-    implementation 'org.docx4j:docx4j-export-fo:11.2.9'
 
 }
 
diff --git a/src/main/java/stirling/software/SPDF/LibreOfficeListener.java b/src/main/java/stirling/software/SPDF/LibreOfficeListener.java
new file mode 100644
index 000000000..d2aff9040
--- /dev/null
+++ b/src/main/java/stirling/software/SPDF/LibreOfficeListener.java
@@ -0,0 +1,94 @@
+package stirling.software.SPDF;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.Socket;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+public class LibreOfficeListener {
+    
+    private static final LibreOfficeListener INSTANCE = new LibreOfficeListener();
+    
+    private static final long ACTIVITY_TIMEOUT = 20 * 60 * 1000; // 20 minutes
+    private static final int LISTENER_PORT = 2002;
+    
+    private ExecutorService executorService;
+    private Process process;
+    private long lastActivityTime;
+    
+    private LibreOfficeListener() {}
+    
+    public static LibreOfficeListener getInstance() {
+        return INSTANCE;
+    }
+    
+    public void start() throws IOException {
+        // Check if the listener is already running
+        if (process != null && process.isAlive()) {
+            return;
+        }
+        
+        // Start the listener process
+        process = Runtime.getRuntime().exec("unoconv --listener");
+        lastActivityTime = System.currentTimeMillis();
+        
+        // Start a background thread to monitor the activity timeout
+        executorService = Executors.newSingleThreadExecutor();
+        executorService.submit(() -> {
+            while (true) {
+                long idleTime = System.currentTimeMillis() - lastActivityTime;
+                if (idleTime >= ACTIVITY_TIMEOUT) {
+                    // If there has been no activity for too long, tear down the listener
+                    process.destroy();
+                    break;
+                }
+                try {
+                    Thread.sleep(5000); // Check for inactivity every 5 seconds
+                } catch (InterruptedException e) {
+                    break;
+                }
+            }
+        });
+        
+        
+     // Wait for the listener to start up
+        long startTime = System.currentTimeMillis();
+        long timeout = 30000; // Timeout after 30 seconds
+        while (System.currentTimeMillis() - startTime < timeout) {
+            if (isListenerRunning()) {
+            	
+                lastActivityTime = System.currentTimeMillis();
+                return;
+            }
+            try {
+				Thread.sleep(1000);
+			} catch (InterruptedException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+			} // Check every 1 second
+        }
+    }
+    
+    private boolean isListenerRunning() {
+        try {
+        	System.out.println("waiting for listener to start");
+            Socket socket = new Socket();
+            socket.connect(new InetSocketAddress("localhost", 2002), 1000); // Timeout after 1 second
+            socket.close();
+            return true;
+        } catch (IOException e) {
+            return false;
+        }
+    }
+    
+    public synchronized void stop() {
+        // Stop the activity timeout monitor thread
+        executorService.shutdownNow();
+        
+        // Stop the listener process
+        if (process != null && process.isAlive()) {
+            process.destroy();
+        }
+    }
+    
+}
diff --git a/src/main/java/stirling/software/SPDF/controller/CompressController.java b/src/main/java/stirling/software/SPDF/controller/CompressController.java
index 9a9d45b1b..c2c40c2fc 100644
--- a/src/main/java/stirling/software/SPDF/controller/CompressController.java
+++ b/src/main/java/stirling/software/SPDF/controller/CompressController.java
@@ -31,8 +31,26 @@ import com.itextpdf.text.pdf.PdfReader;
 import com.itextpdf.text.pdf.PdfStamper;
 
 import stirling.software.SPDF.utils.PdfUtils;
+import stirling.software.SPDF.utils.ProcessExecutor;
+
+import org.springframework.http.HttpHeaders;
+import org.springframework.http.MediaType;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.multipart.MultipartFile;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
 
-//import com.spire.pdf.*;
 @Controller
 public class CompressController {
 
@@ -45,80 +63,55 @@ public class CompressController {
     }
 
     
-
     @PostMapping("/compress-pdf")
-    public ResponseEntity<byte[]> compressPDF(
-            @RequestParam("fileInput") MultipartFile pdfFile,
-            @RequestParam(value = "compressPDF", defaultValue = "false") boolean compressPDF,
-            @RequestParam(value = "compressImages", defaultValue = "false") boolean compressImages,
-            @RequestParam(value = "useLossyCompression", defaultValue = "false") boolean useLossyCompression,
-            @RequestParam(value = "resolutionPercentage", defaultValue = "50") int resolutionPercentage) {
+    public ResponseEntity<byte[]> optimizePdf(
+            @RequestParam("fileInput") MultipartFile inputFile,
+            @RequestParam("optimizeLevel") int optimizeLevel,
+            @RequestParam(name = "fastWebView", required = false) Boolean fastWebView,
+            @RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy) throws IOException, InterruptedException {
 
-        ByteArrayOutputStream baosPDFBox = new ByteArrayOutputStream();
+        // Save the uploaded file to a temporary location
+        Path tempInputFile = Files.createTempFile("input_", ".pdf");
+        inputFile.transferTo(tempInputFile.toFile());
+
+        // Prepare the output file path
+        Path tempOutputFile = Files.createTempFile("output_", ".pdf");
+
+        // Prepare the OCRmyPDF command
+        List<String> command = new ArrayList<>();
+        command.add("ocrmypdf");
+        command.add("--optimize");
+        command.add(String.valueOf(optimizeLevel));
+
+        if (fastWebView != null && fastWebView) {
+            long fileSize = inputFile.getSize();
+            long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size
+            command.add("--fast-web-view");
+            command.add(String.valueOf(fastWebViewSize));
+        }
+
+        if (jbig2Lossy != null && jbig2Lossy) {
+            command.add("--jbig2-lossy");
+        }
+
+        command.add(tempInputFile.toString());
+        command.add(tempOutputFile.toString());
+
+        int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
         
-        try (InputStream is = pdfFile.getInputStream();
-             PDDocument document = PDDocument.load(is)) {
+        // Read the optimized PDF file
+        byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
 
-            if (compressImages) {
-                for (PDPage page : document.getPages()) {
-                    PDResources resources = page.getResources();
-                    for (COSName cosName : resources.getXObjectNames()) {
-                        if (resources.isImageXObject(cosName)) {
-                            PDImageXObject image = (PDImageXObject) resources.getXObject(cosName);
-                            BufferedImage bufferedImage = image.getImage();
-                            BufferedImage resizedImage = resizeImage(bufferedImage, resolutionPercentage);
+        // Clean up the temporary files
+        Files.delete(tempInputFile);
+        Files.delete(tempOutputFile);
+
+        // Return the optimized PDF as a response
+        String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf";
+        HttpHeaders headers = new HttpHeaders();
+        headers.setContentType(MediaType.APPLICATION_PDF);
+        headers.setContentDispositionFormData("attachment", outputFilename);
+        return ResponseEntity.ok().headers(headers).body(pdfBytes);
+}
 
-                            if (useLossyCompression) {
-                                File tempFile = File.createTempFile("pdfbox", ".jpg");
-                                ImageIO.write(resizedImage, "jpg", tempFile);
-                                PDImageXObject newImage = PDImageXObject.createFromFile(tempFile.getAbsolutePath(), document);
-                                resources.put(cosName, newImage);
-                            } else {
-                                File tempFile = File.createTempFile("pdfbox", ".png");
-                                ImageIO.write(resizedImage, "png", tempFile);
-                                PDImageXObject newImage = PDImageXObject.createFromFile(tempFile.getAbsolutePath(), document);
-                                resources.put(cosName, newImage);
-                            }
-                        }
-                    }
-                }
-            }
-
-            document.save(baosPDFBox);
-
-        } catch (IOException e) {
-            e.printStackTrace();
-            return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
-        }
-
-        try (ByteArrayInputStream baisPDFBox = new ByteArrayInputStream(baosPDFBox.toByteArray());
-             ByteArrayOutputStream baosFinal = new ByteArrayOutputStream()) {
-
-            PdfReader reader = new PdfReader(baisPDFBox);
-            PdfStamper stamper = new PdfStamper(reader, baosFinal);
-
-            if (compressPDF) {
-                stamper.setFullCompression();
-            }
-
-            stamper.close();
-            reader.close();
-
-            return PdfUtils.boasToWebResponse(baosFinal, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_compressed.pdf");
-        } catch (IOException | DocumentException e) {
-            e.printStackTrace();
-            return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
-        }
-    }
-
-
-    private BufferedImage resizeImage(BufferedImage originalImage, int resolutionPercentage) {
-        int newWidth = originalImage.getWidth() * resolutionPercentage / 100;
-        int newHeight = originalImage.getHeight() * resolutionPercentage / 100;
-        BufferedImage resizedImage = new BufferedImage(newWidth, newHeight, originalImage.getType());
-        Graphics2D g = resizedImage.createGraphics();
-        g.drawImage(originalImage, 0, 0, newWidth, newHeight, null);
-        g.dispose();
-        return resizedImage;
-    }
 }
diff --git a/src/main/java/stirling/software/SPDF/controller/OCRController.java b/src/main/java/stirling/software/SPDF/controller/OCRController.java
new file mode 100644
index 000000000..5425b2c46
--- /dev/null
+++ b/src/main/java/stirling/software/SPDF/controller/OCRController.java
@@ -0,0 +1,143 @@
+package stirling.software.SPDF.controller;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.http.HttpHeaders;
+import org.springframework.http.HttpStatus;
+import org.springframework.http.MediaType;
+import org.springframework.http.ResponseEntity;
+import org.springframework.stereotype.Controller;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.multipart.MultipartFile;
+import org.springframework.web.servlet.ModelAndView;
+
+import stirling.software.SPDF.utils.ProcessExecutor;
+
+import java.io.FileOutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+//import com.spire.pdf.*;
+@Controller
+public class OCRController {
+
+	private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
+
+	@GetMapping("/ocr-pdf")
+	public ModelAndView ocrPdfPage() {
+		ModelAndView modelAndView = new ModelAndView("ocr-pdf");
+		modelAndView.addObject("languages", getAvailableTesseractLanguages());
+		modelAndView.addObject("currentPage", "ocr-pdf");
+		return modelAndView;
+	}
+
+	@PostMapping("/ocr-pdf")
+	public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile,
+			@RequestParam("languages") List<String> selectedLanguages,
+			@RequestParam(name = "sidecar", required = false) Boolean sidecar) throws IOException, InterruptedException {
+
+		//--output-type pdfa
+		if (selectedLanguages == null || selectedLanguages.size() < 1) {
+			throw new IOException("Please select at least one language.");
+	    }
+		
+		// Save the uploaded file to a temporary location
+		Path tempInputFile = Files.createTempFile("input_", ".pdf");
+		inputFile.transferTo(tempInputFile.toFile());
+
+		// Prepare the output file path
+		Path tempOutputFile = Files.createTempFile("output_", ".pdf");
+
+		// Run OCR Command
+	    String languageOption = String.join("+", selectedLanguages);
+	    List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf","--verbose", "2", "--language", languageOption,
+	            tempInputFile.toString(), tempOutputFile.toString()));
+	    String sidecarFile = tempOutputFile.toString().replace(".pdf", ".txt");
+	    if (sidecar != null && sidecar) {
+	        command.add("--sidecar");
+	        command.add(sidecarFile);
+	    }
+	    int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
+
+		// Read the OCR processed PDF file
+		byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
+
+		// Clean up the temporary files
+		Files.delete(tempInputFile);
+		// Return the OCR processed PDF as a response
+		String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
+
+		HttpHeaders headers = new HttpHeaders();
+
+	    if (sidecar != null && sidecar) {
+	        // Create a zip file containing both the PDF and the text file
+	        String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
+	        Path tempZipFile = Files.createTempFile("output_", ".zip");
+
+	        try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
+	            // Add PDF file to the zip
+	            ZipEntry pdfEntry = new ZipEntry(outputFilename);
+	            zipOut.putNextEntry(pdfEntry);
+	            Files.copy(tempOutputFile, zipOut);
+	            zipOut.closeEntry();
+
+	            // Add text file to the zip
+	            ZipEntry txtEntry = new ZipEntry(sidecarFile);
+	            zipOut.putNextEntry(txtEntry);
+	            Files.copy(Paths.get(sidecarFile), zipOut);
+	            zipOut.closeEntry();
+	        }
+
+	        byte[] zipBytes = Files.readAllBytes(tempZipFile);
+
+	        // Clean up the temporary zip file
+	        Files.delete(tempZipFile);
+	        Files.delete(tempOutputFile);
+	        Files.delete(Paths.get(sidecarFile));
+	        
+	        // Return the zip file containing both the PDF and the text file
+	        headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
+	        headers.setContentDispositionFormData("attachment", outputZipFilename);
+	        return ResponseEntity.ok().headers(headers).body(zipBytes);
+	    } else {
+	        // Return the OCR processed PDF as a response
+	    	Files.delete(tempOutputFile);
+	        headers.setContentType(MediaType.APPLICATION_PDF);
+	        headers.setContentDispositionFormData("attachment", outputFilename);
+	        return ResponseEntity.ok().headers(headers).body(pdfBytes);
+	    }
+	    
+	}
+
+	public List<String> getAvailableTesseractLanguages() {
+	    String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
+	    File[] files = new File(tessdataDir).listFiles();
+	    if (files == null) {
+	        return Collections.emptyList();
+	    }
+	    return Arrays.stream(files)
+	            .filter(file -> file.getName().endsWith(".traineddata"))
+	            .map(file -> file.getName().replace(".traineddata", ""))
+	            .filter(lang -> !lang.equalsIgnoreCase("osd"))
+	            .collect(Collectors.toList());
+	}
+
+}
diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertCsvController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertCsvController.java
deleted file mode 100644
index d374c78b4..000000000
--- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertCsvController.java
+++ /dev/null
@@ -1,79 +0,0 @@
-package stirling.software.SPDF.controller.converters;
-
-import java.io.BufferedReader;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-
-import org.springframework.http.ResponseEntity;
-import org.springframework.stereotype.Controller;
-import org.springframework.ui.Model;
-import org.springframework.web.bind.annotation.GetMapping;
-import org.springframework.web.bind.annotation.PostMapping;
-import org.springframework.web.bind.annotation.RequestParam;
-import org.springframework.web.multipart.MultipartFile;
-
-import com.itextpdf.text.Document;
-import com.itextpdf.text.DocumentException;
-import com.itextpdf.text.Paragraph;
-import com.itextpdf.text.pdf.PdfPCell;
-import com.itextpdf.text.pdf.PdfPTable;
-import com.itextpdf.text.pdf.PdfWriter;
-
-import stirling.software.SPDF.utils.PdfUtils;
-
-@Controller
-public class ConvertCsvController {
-
-    
-    @GetMapping("/csv-to-pdf")
-    public String cinvertToPDF(Model model) {
-        model.addAttribute("currentPage", "xlsx-to-pdf");
-        return "convert/xlsx-to-pdf";
-    }
-
-
-
-    @PostMapping("/csv-to-pdf")
-    public ResponseEntity<byte[]> convertCsvToPdf(@RequestParam("fileInput") MultipartFile csvFile) throws IOException, DocumentException {
-        // Create PDF document
-        Document document = new Document();
-        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-        PdfWriter.getInstance(document, outputStream);
-        document.open();
-
-        // Read CSV file
-        InputStreamReader inputStreamReader = new InputStreamReader(csvFile.getInputStream(), StandardCharsets.UTF_8);
-        BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
-
-        // Create PDF table from CSV content
-        PdfPTable table = null;
-        String csvRow;
-        while ((csvRow = bufferedReader.readLine()) != null) {
-            String[] csvRowCells = csvRow.split(","); // Assuming comma as a delimiter
-
-            if (table == null) {
-                table = new PdfPTable(csvRowCells.length);
-            }
-
-            for (String cellValue : csvRowCells) {
-                PdfPCell pdfCell = new PdfPCell(new Paragraph(cellValue));
-                table.addCell(pdfCell);
-            }
-        }
-
-        if (table != null) {
-            document.add(table);
-        }
-
-        // Close BufferedReader, document, and output stream
-        bufferedReader.close();
-        document.close();
-        outputStream.close();
-
-        return PdfUtils.boasToWebResponse(outputStream, csvFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
-    }
-
-
-}
diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertDocController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertDocController.java
deleted file mode 100644
index 10dd6c06e..000000000
--- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertDocController.java
+++ /dev/null
@@ -1,43 +0,0 @@
-package stirling.software.SPDF.controller.converters;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-
-import org.docx4j.Docx4J;
-import org.docx4j.openpackaging.exceptions.Docx4JException;
-import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
-import org.springframework.http.ResponseEntity;
-import org.springframework.stereotype.Controller;
-import org.springframework.ui.Model;
-import org.springframework.web.bind.annotation.GetMapping;
-import org.springframework.web.bind.annotation.PostMapping;
-import org.springframework.web.bind.annotation.RequestParam;
-import org.springframework.web.multipart.MultipartFile;
-
-import stirling.software.SPDF.utils.PdfUtils;
-
-@Controller
-public class ConvertDocController {
-
-    
-    @GetMapping("/docx-to-pdf")
-    public String cinvertToPDF(Model model) {
-        model.addAttribute("currentPage", "xlsx-to-pdf");
-        return "convert/xlsx-to-pdf";
-    }
-
-    @PostMapping("/docx-to-pdf")
-    public ResponseEntity<byte[]> convertDocxToPdf(@RequestParam("fileInput") MultipartFile docxFile) throws IOException, Docx4JException {
-        // Load WordprocessingMLPackage
-        WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(docxFile.getInputStream());
-
-        // Create PDF output stream
-        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-
-        // Convert DOCX to PDF
-        Docx4J.toPDF(wordMLPackage, outputStream);
-
-        return PdfUtils.boasToWebResponse(outputStream, docxFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
-    }
-
-}
diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertHtmlController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertHtmlController.java
deleted file mode 100644
index fe5bc34ca..000000000
--- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertHtmlController.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package stirling.software.SPDF.controller.converters;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.springframework.http.ResponseEntity;
-import org.springframework.stereotype.Controller;
-import org.springframework.ui.Model;
-import org.springframework.web.bind.annotation.GetMapping;
-import org.springframework.web.bind.annotation.PostMapping;
-import org.springframework.web.bind.annotation.RequestParam;
-import org.springframework.web.multipart.MultipartFile;
-
-import com.itextpdf.text.Document;
-import com.itextpdf.text.DocumentException;
-import com.itextpdf.text.pdf.PdfWriter;
-import com.itextpdf.tool.xml.XMLWorkerHelper;
-
-import stirling.software.SPDF.utils.PdfUtils;
-
-@Controller
-public class ConvertHtmlController {
-
-    
-    @GetMapping("//html-to-pdf")
-    public String cinvertToPDF(Model model) {
-        model.addAttribute("currentPage", "xlsx-to-pdf");
-        return "convert/xlsx-to-pdf";
-    }
-
-    @PostMapping("/html-to-pdf")
-    public ResponseEntity<byte[]> convertHtmlToPdf(@RequestParam("fileInput") MultipartFile htmlFile) throws IOException, DocumentException {
-        // Create PDF document
-        Document document = new Document();
-        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-        PdfWriter writer = PdfWriter.getInstance(document, outputStream);
-        document.open();
-
-        // Read HTML file
-        InputStream htmlInputStream = new ByteArrayInputStream(htmlFile.getBytes());
-
-        // Convert HTML content to PDF
-        XMLWorkerHelper.getInstance().parseXHtml(writer, document, htmlInputStream);
-
-        // Close document and output stream
-        document.close();
-        outputStream.close();
-
-        return PdfUtils.boasToWebResponse(outputStream, "");
-    }
-
-}
diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java
new file mode 100644
index 000000000..00996dc06
--- /dev/null
+++ b/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java
@@ -0,0 +1,82 @@
+package stirling.software.SPDF.controller.converters;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.springframework.http.ResponseEntity;
+import org.springframework.stereotype.Controller;
+import org.springframework.ui.Model;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.multipart.MultipartFile;
+import org.springframework.web.servlet.ModelAndView;
+
+import stirling.software.SPDF.LibreOfficeListener;
+import stirling.software.SPDF.utils.PdfUtils;
+import stirling.software.SPDF.utils.ProcessExecutor;
+@Controller
+public class ConvertOfficeController {
+
+	
+	@GetMapping("/file-to-pdf")
+    public String convertToPdfForm(Model model) {
+        model.addAttribute("currentPage", "file-to-pdf");
+        return "convert/file-to-pdf";
+    }
+
+	@PostMapping("/file-to-pdf")
+	public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
+		
+		//unused but can start server instance if startup time is to long
+		//LibreOfficeListener.getInstance().start();
+		
+		byte[] pdfByteArray = convertToPdf(inputFile);
+		return PdfUtils.bytesToWebResponse(pdfByteArray, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
+	}
+	
+	
+public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
+    // Save the uploaded file to a temporary location
+    Path tempInputFile = Files.createTempFile("input_", "." + getFileExtension(inputFile.getOriginalFilename()));
+    inputFile.transferTo(tempInputFile.toFile());
+
+    // Prepare the output file path
+    Path tempOutputFile = Files.createTempFile("output_", ".pdf");
+
+ // Run the LibreOffice command
+    List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv",
+            "-f",
+            "pdf",
+            "-o",
+            tempOutputFile.toString(),
+            tempInputFile.toString()));
+    int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
+
+    // Read the converted PDF file
+    byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
+
+    // Clean up the temporary files
+    Files.delete(tempInputFile);
+    Files.delete(tempOutputFile);
+
+    return pdfBytes;
+}
+
+
+
+private String getFileExtension(String fileName) {
+    int dotIndex = fileName.lastIndexOf('.');
+    if (dotIndex == -1) {
+        return "";
+    }
+    return fileName.substring(dotIndex + 1);
+}
+}
diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPPTController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertPPTController.java
deleted file mode 100644
index c5818219c..000000000
--- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPPTController.java
+++ /dev/null
@@ -1,79 +0,0 @@
-package stirling.software.SPDF.controller.converters;
-
-import java.awt.Color;
-import java.awt.Graphics2D;
-import java.awt.RenderingHints;
-import java.awt.geom.Rectangle2D;
-import java.awt.image.BufferedImage;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-
-import org.apache.poi.xslf.usermodel.XMLSlideShow;
-import org.apache.poi.xslf.usermodel.XSLFSlide;
-import org.springframework.http.ResponseEntity;
-import org.springframework.stereotype.Controller;
-import org.springframework.ui.Model;
-import org.springframework.web.bind.annotation.GetMapping;
-import org.springframework.web.bind.annotation.PostMapping;
-import org.springframework.web.bind.annotation.RequestParam;
-import org.springframework.web.multipart.MultipartFile;
-
-import com.itextpdf.text.Document;
-import com.itextpdf.text.DocumentException;
-import com.itextpdf.text.Image;
-import com.itextpdf.text.PageSize;
-import com.itextpdf.text.pdf.PdfWriter;
-
-import stirling.software.SPDF.utils.PdfUtils;
-
-@Controller
-public class ConvertPPTController {
-
-    
-    @GetMapping("/pptx-to-pdf")
-    public String cinvertToPDF(Model model) {
-        model.addAttribute("currentPage", "xlsx-to-pdf");
-        return "convert/xlsx-to-pdf";
-    }
-
-    @PostMapping("/pptx-to-pdf")
-    public ResponseEntity<byte[]> convertPptxToPdf(@RequestParam("fileInput") MultipartFile pptxFile) throws IOException, DocumentException {
-        // Read PowerPoint presentation
-        XMLSlideShow ppt = new XMLSlideShow(pptxFile.getInputStream());
-
-        // Create PDF document
-        Document pdfDocument = new Document(PageSize.A4.rotate());
-        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-        PdfWriter.getInstance(pdfDocument, outputStream);
-        pdfDocument.open();
-
-        // Convert PowerPoint slides to images, then add them to the PDF
-        for (XSLFSlide slide : ppt.getSlides()) {
-            BufferedImage slideImage = new BufferedImage((int) Math.ceil(ppt.getPageSize().getWidth()), (int) Math.ceil(ppt.getPageSize().getHeight()), BufferedImage.TYPE_INT_RGB);
-            Graphics2D graphics = slideImage.createGraphics();
-
-            // Set graphics rendering hints for better quality
-            graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
-            graphics.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
-            graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
-
-            // Draw the slide on the graphics
-            graphics.setPaint(Color.white);
-            graphics.fill(new Rectangle2D.Float(0, 0, slideImage.getWidth(), slideImage.getHeight()));
-            slide.draw(graphics);
-
-            // Add the slide image to the PDF document
-            Image image = Image.getInstance(slideImage, null);
-            image.scaleToFit(PageSize.A4.getWidth() - 72, PageSize.A4.getHeight() - 72);
-            pdfDocument.add(image);
-        }
-
-        // Close PowerPoint and PDF documents
-        ppt.close();
-        pdfDocument.close();
-        outputStream.close();
-
-        return PdfUtils.boasToWebResponse(outputStream, pptxFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
-    }
-
-}
diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertTextController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertTextController.java
deleted file mode 100644
index 29fe1ac9b..000000000
--- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertTextController.java
+++ /dev/null
@@ -1,63 +0,0 @@
-package stirling.software.SPDF.controller.converters;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-
-import org.apache.commons.io.FilenameUtils;
-import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.springframework.http.ResponseEntity;
-import org.springframework.stereotype.Controller;
-import org.springframework.ui.Model;
-import org.springframework.web.bind.annotation.GetMapping;
-import org.springframework.web.bind.annotation.PostMapping;
-import org.springframework.web.bind.annotation.RequestParam;
-import org.springframework.web.multipart.MultipartFile;
-
-import com.itextpdf.text.Document;
-import com.itextpdf.text.DocumentException;
-import com.itextpdf.text.Paragraph;
-import com.itextpdf.text.pdf.PdfWriter;
-
-import stirling.software.SPDF.utils.PdfUtils;
-
-@Controller
-public class ConvertTextController {
-
-    
-    @GetMapping("/txt-rtf-to-pdf")
-    public String cinvertToPDF(Model model) {
-        model.addAttribute("currentPage", "xlsx-to-pdf");
-        return "convert/xlsx-to-pdf";
-    }
-
-    @PostMapping("/txt-rtf-to-pdf")
-    public ResponseEntity<byte[]> convertTxtRtfToPdf(@RequestParam("fileInput") MultipartFile txtRtfFile) throws IOException, DocumentException {
-        // Create PDF document
-        Document document = new Document();
-        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-        PdfWriter.getInstance(document, outputStream);
-        document.open();
-
-        // Read TXT/RTF file content
-        String fileContent;
-        String fileExtension = FilenameUtils.getExtension(txtRtfFile.getOriginalFilename());
-        if (fileExtension.equalsIgnoreCase("rtf")) {
-            HWPFDocument hwpfDocument = new HWPFDocument(new POIFSFileSystem(txtRtfFile.getInputStream()));
-            fileContent = hwpfDocument.getText().toString();
-        } else {
-            fileContent = new String(txtRtfFile.getBytes(), StandardCharsets.UTF_8);
-        }
-
-        // Add content to PDF
-        document.add(new Paragraph(fileContent));
-
-        // Close document and output stream
-        document.close();
-        outputStream.close();
-
-        return PdfUtils.boasToWebResponse(outputStream, txtRtfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
-    }
-
-}
diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertXlsxController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertXlsxController.java
deleted file mode 100644
index b6731d848..000000000
--- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertXlsxController.java
+++ /dev/null
@@ -1,152 +0,0 @@
-package stirling.software.SPDF.controller.converters;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-
-import org.apache.poi.ss.usermodel.Cell;
-import org.apache.poi.ss.usermodel.DataFormatter;
-import org.apache.poi.ss.usermodel.Row;
-import org.apache.poi.ss.usermodel.Sheet;
-import org.apache.poi.ss.usermodel.Workbook;
-import org.apache.poi.ss.usermodel.WorkbookFactory;
-import org.apache.poi.xssf.usermodel.XSSFCellStyle;
-import org.apache.poi.xssf.usermodel.XSSFColor;
-import org.apache.poi.xssf.usermodel.XSSFFont;
-import org.springframework.http.ResponseEntity;
-import org.springframework.stereotype.Controller;
-import org.springframework.ui.Model;
-import org.springframework.web.bind.annotation.GetMapping;
-import org.springframework.web.bind.annotation.PostMapping;
-import org.springframework.web.bind.annotation.RequestParam;
-import org.springframework.web.multipart.MultipartFile;
-
-import com.itextpdf.text.BaseColor;
-import com.itextpdf.text.Document;
-import com.itextpdf.text.DocumentException;
-import com.itextpdf.text.Font;
-import com.itextpdf.text.Paragraph;
-import com.itextpdf.text.pdf.PdfPCell;
-import com.itextpdf.text.pdf.PdfPTable;
-import com.itextpdf.text.pdf.PdfWriter;
-
-import stirling.software.SPDF.utils.PdfUtils;
-
-@Controller
-public class ConvertXlsxController {
-
-    
-    @GetMapping("/xlsx-to-pdf")
-    public String cinvertToPDF(Model model) {
-        model.addAttribute("currentPage", "xlsx-to-pdf");
-        return "convert/xlsx-to-pdf";
-    }
-
-    @PostMapping("/xlsx-to-pdf")
-    public ResponseEntity<byte[]> convertToPDF(@RequestParam("fileInput") MultipartFile xlsx) throws IOException, DocumentException {
-        // Load Excel file
-        Workbook workbook = WorkbookFactory.create(xlsx.getInputStream());
-
-        // Create PDF document
-        Document document = new Document();
-        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-        PdfWriter.getInstance(document, outputStream);
-        document.open();
-
-        // Convert each sheet in Excel to a separate page in PDF
-        for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
-            Sheet sheet = workbook.getSheetAt(i);
-            int numOfColumns = sheet.getRow(0).getPhysicalNumberOfCells();
-            PdfPTable table = new PdfPTable(numOfColumns);
-
-            for (int row = 0; row < sheet.getPhysicalNumberOfRows(); row++) {
-                Row excelRow = sheet.getRow(row);
-                if (excelRow == null) {
-                    continue; // Skip this row if it's null
-                }
-                for (int cell = 0; cell < excelRow.getPhysicalNumberOfCells(); cell++) {
-                    Cell excelCell = excelRow.getCell(cell);
-
-                    // Check if the cell is null
-                    if (excelCell == null) {
-                        table.addCell(""); // Add an empty cell to the PDF table
-                        continue;
-                    }
-                    
-                    // Convert cell to string
-                    DataFormatter dataFormatter = new DataFormatter();
-                    String cellValue = dataFormatter.formatCellValue(excelCell);
-                    System.out.println("Cell Value: " + cellValue); 
-                    // Get Excel cell font
-                    Font cellFont = getFontFromExcelCell(workbook, excelCell);
-
-                    // Create PDF cell with Excel cell font
-                    PdfPCell pdfCell = new PdfPCell(new Paragraph(cellValue, cellFont));
-                    
-                    // Set cell height and width
-                    float height = sheet.getRow(row).getHeightInPoints();
-                    System.out.print(height);
-                    pdfCell.setFixedHeight(30f);
-
-
-                    // Copy cell style, borders, and background color
-                    XSSFCellStyle cellStyle = (XSSFCellStyle) excelCell.getCellStyle();
-                    if (cellStyle != null) {
-                        XSSFColor bottomBorderColor = cellStyle.getBottomBorderXSSFColor();
-                        if (bottomBorderColor != null) {
-                            pdfCell.setBorderColor(new BaseColor(bottomBorderColor.getRGB()[0] & 0xFF, bottomBorderColor.getRGB()[1] & 0xFF, bottomBorderColor.getRGB()[2] & 0xFF));
-                        }
-
-                        XSSFColor topBorderColor = cellStyle.getTopBorderXSSFColor();
-                        if (topBorderColor != null) {
-                            pdfCell.setBorderColor(new BaseColor(topBorderColor.getRGB()[0] & 0xFF, topBorderColor.getRGB()[1] & 0xFF, topBorderColor.getRGB()[2] & 0xFF));
-                        }
-
-                        XSSFColor leftBorderColor = cellStyle.getLeftBorderXSSFColor();
-                        if (leftBorderColor != null) {
-                            pdfCell.setBorderColor(new BaseColor(leftBorderColor.getRGB()[0] & 0xFF, leftBorderColor.getRGB()[1] & 0xFF, leftBorderColor.getRGB()[2] & 0xFF));
-                        }
-
-                        XSSFColor rightBorderColor = cellStyle.getRightBorderXSSFColor();
-                        if (rightBorderColor != null) {
-                            pdfCell.setBorderColor(new BaseColor(rightBorderColor.getRGB()[0] & 0xFF, rightBorderColor.getRGB()[1] & 0xFF, rightBorderColor.getRGB()[2] & 0xFF));
-                        }
-                        
-                        XSSFColor fillForegroundColor = cellStyle.getFillForegroundXSSFColor();
-                        if (fillForegroundColor != null) {
-                            pdfCell.setBackgroundColor(new BaseColor(fillForegroundColor.getRGB()[0] & 0xFF, fillForegroundColor.getRGB()[1] & 0xFF, fillForegroundColor.getRGB()[2] & 0xFF));
-                        }
-
-                    }
-
-                    table.addCell(pdfCell);
-                }
-            }
-
-            // Add sheet to PDF
-            document.add(table);
-
-            // Add page break if there are more sheets
-            if (i < workbook.getNumberOfSheets() - 1) {
-                document.newPage();
-            }
-        }
-
-        // Close document and output stream
-        document.close();
-        outputStream.flush();
-        outputStream.close();   
-
-        // Return PDF as response
-        return PdfUtils.boasToWebResponse(outputStream, xlsx.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
-    }
-
-    private Font getFontFromExcelCell(Workbook workbook, Cell excelCell) {
-        XSSFFont excelFont = ((XSSFCellStyle) excelCell.getCellStyle()).getFont();
-        Font.FontFamily fontFamily = Font.getFamily(excelFont.getFontName());
-        float fontSize = excelFont.getFontHeightInPoints();
-        int fontStyle = (excelFont.getBold() ? Font.BOLD : Font.NORMAL) | (excelFont.getItalic() ? Font.ITALIC : Font.NORMAL);
-
-        return new Font(fontFamily, fontSize, fontStyle);
-    }
-
-}
diff --git a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java
new file mode 100644
index 000000000..e244d4a2a
--- /dev/null
+++ b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java
@@ -0,0 +1,69 @@
+package stirling.software.SPDF.utils;
+
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.List;
+import java.io.BufferedReader;
+import java.util.ArrayList;
+public class ProcessExecutor {
+	public static int runCommandWithOutputHandling(List<String> command) throws IOException, InterruptedException {
+	    ProcessBuilder processBuilder = new ProcessBuilder(command);
+	    Process process = processBuilder.start();
+
+	    // Read the error stream and standard output stream concurrently
+	    List<String> errorLines = new ArrayList<>();
+	    List<String> outputLines = new ArrayList<>();
+
+	    Thread errorReaderThread = new Thread(() -> {
+	        try (BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream(), StandardCharsets.UTF_8))) {
+	            String line;
+	            while ((line = errorReader.readLine()) != null) {
+	                errorLines.add(line);
+	            }
+	        } catch (IOException e) {
+	            e.printStackTrace();
+	        }
+	    });
+
+	    Thread outputReaderThread = new Thread(() -> {
+	        try (BufferedReader outputReader = new BufferedReader(new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
+	            String line;
+	            while ((line = outputReader.readLine()) != null) {
+	                outputLines.add(line);
+	            }
+	        } catch (IOException e) {
+	            e.printStackTrace();
+	        }
+	    });
+
+	    errorReaderThread.start();
+	    outputReaderThread.start();
+
+	    // Wait for the conversion process to complete
+	    int exitCode = process.waitFor();
+
+	    // Wait for the reader threads to finish
+	    errorReaderThread.join();
+	    outputReaderThread.join();
+
+	    if (outputLines.size() > 0) {
+	        String outputMessage = String.join("\n", outputLines);
+	        System.out.println("Command output:\n" + outputMessage);
+	    }
+
+	    if (errorLines.size() > 0) {
+	        String errorMessage = String.join("\n", errorLines);
+	        System.out.println("Command error output:\n" + errorMessage);
+	        if (exitCode != 0) {
+	            throw new IOException("Command process failed with exit code " + exitCode + ". Error message: " + errorMessage);
+	        }
+	    }
+
+	    return exitCode;
+	}
+   		
+	    		
+}
diff --git a/src/main/resources/messages.properties b/src/main/resources/messages.properties
index e69de29bb..2df2fa75e 100644
--- a/src/main/resources/messages.properties
+++ b/src/main/resources/messages.properties
@@ -0,0 +1,11 @@
+fileToPDF.fileTypesList=Microsoft Word: (DOC, DOCX, DOT, DOTX) \
+Microsoft Excel: (CSV, XLS, XLSX, XLT, XLTX, SLK, DIF) \
+Microsoft PowerPoint: (PPT, PPTX) \
+OpenDocument Formats: (ODT, OTT, ODS, OTS, ODP, OTP, ODG, OTG) \
+Plain Text: (TXT, TEXT, XML) \
+Rich Text Format: (RTF) \
+Images: (BMP, GIF, JPEG, PNG, TIF, PBM, PGM, PPM, RAS, XBM, XPM, SVG, SVM, WMF) \
+HTML: (HTML) \
+Lotus Word Pro: (LWP) \
+StarOffice formats: (SDA, SDC, SDD, SDW, STC, STD, STI, STW, SXD, SXG, SXI, SXW) \
+Other formats: (DBF, FODS, VSD, VOR, VOR3, VOR4, UOP, PCT, PS, PDF)
diff --git a/src/main/resources/messages_en_US.properties b/src/main/resources/messages_en_US.properties
index 278f959b8..c3a46c67f 100644
--- a/src/main/resources/messages_en_US.properties
+++ b/src/main/resources/messages_en_US.properties
@@ -224,19 +224,8 @@ changeMetadata.selectText.5=Add Custom Metadata Entry
 changeMetadata.submit=Change
 
 
-
-xlsToPdf.title=Excel to PDF
-xlsToPdf.header=Excel to PDF
-xlsToPdf.selectText.1=Select XLS or XLSX Excel sheet to convert
-xlsToPdf.convert=convert
-
-
-
-
-
-
-
-
+fileToPDF.credit=This service uses LibreOffice and Unoconv for file conversion.
+fileToPDF.supportedFileTypes=Supported file types should include the below however for a full updated list of supported formats, please refer to the LibreOffice documentation
 
 
 
diff --git a/src/main/resources/templates/compress-pdf.html b/src/main/resources/templates/compress-pdf.html
index 79b67cee6..dd79f5c25 100644
--- a/src/main/resources/templates/compress-pdf.html
+++ b/src/main/resources/templates/compress-pdf.html
@@ -14,60 +14,30 @@
                 <div class="row justify-content-center">
                     <div class="col-md-6">
                         <h2 th:text="#{compress.header}"></h2>
-                        <form method="post" enctype="multipart/form-data" th:action="@{/compress-pdf}">
-      <div class="form-group">
-        <label for="fileInput">Select a PDF file to compress:</label>
-        <input type="file" class="form-control-file" id="fileInput" name="fileInput" accept=".pdf">
-      </div>
-      <div class="form-group">
-        <div class="form-check">
-          <input class="form-check-input" type="checkbox" id="compressPDF" name="compressPDF" checked>
-          <label class="form-check-label" for="compressPDF">Compress PDF?</label>
-        </div>
-        <div class="form-group">
-          <label for="pdfCompressionLevel">PDF Compression Level:</label>
-          <select class="form-control" id="pdfCompressionLevel" name="pdfCompressionLevel">
-            <option value="0">0 (No compression)</option>
-            <option value="1">1 (Lowest compression)</option>
-            <option value="2">2</option>
-            <option value="3">3</option>
-            <option value="4">4</option>
-            <option value="5" selected>5 (Default compression)</option>
-            <option value="6">6</option>
-            <option value="7">7</option>
-            <option value="8">8</option>
-            <option value="9">9 (Maximum compression)</option>
-          </select>
-        </div>
-      </div>
-      <div class="form-group">
-        <div class="form-check">
-          <input class="form-check-input" type="checkbox" id="compressImages" name="compressImages" checked>
-          <label class="form-check-label" for="compressImages">Compress Images?</label>
-        </div>
-        </div>
-        <div class="form-group">
-          <div class="form-check">
-            <input class="form-check-input" type="checkbox" id="useLossyCompression" name="useLossyCompression">
-            <label class="form-check-label" for="useLossyCompression">Use Lossy Compression for Images?</label>
-          </div>
-          </div>
-          <div class="form-group">
-            <label for="imageCompressionLevel">Image Compression Level:</label>
-            <select class="form-control" id="imageCompressionLevel" name="imageCompressionLevel">
-              <option value="0">0 (No compression)</option>
-              <option value="10">10 (Lowest quality)</option>
-              <option value="25">25</option>
-              <option value="50" selected>50 (Default quality)</option>
-              <option value="75">75</option>
-              <option value="90">90 (High quality)</option>
-              <option value="100">100 (Best quality)</option>
-             </select> 
-             </div>
-          <div class="form-group">
-            <button type="submit" class="btn btn-primary">Compress PDF</button>
-          </div>
-        </form>
+                        <form action="#" th:action="@{/compress-pdf}" method="post" enctype="multipart/form-data">
+					        <div>
+					            <label for="fileInput">Choose a PDF file</label>
+					            <input type="file" name="fileInput" id="fileInput" accept="application/pdf" required>
+					        </div>
+					        <div>
+					            <label for="optimizeLevel">Optimization level:</label>
+					            <select name="optimizeLevel" id="optimizeLevel">
+					                <option value="0">-O0 (No optimization)</option>
+					                <option value="1" selected>-O1 (Default, lossless optimization)</option>
+					                <option value="2">-O2 (Lossy optimization)</option>
+					                <option value="3">-O3 (Lossy optimization, more aggressive)</option>
+					            </select>
+					        </div>
+					        <div>
+					            <input type="checkbox" name="fastWebView" id="fastWebView" checked>
+					            <label for="fastWebView">Enable fast web view (linearize PDF)</label>
+					        </div>
+					        <div>
+					            <input type="checkbox" name="jbig2Lossy" id="jbig2Lossy">
+					            <label for="jbig2Lossy">Enable lossy JBIG2 encoding</label>
+					        </div>
+					        <button type="submit">Optimize PDF</button>
+					    </form>
              
                         <th:block th:insert="~{fragments/common :: filelist}"></th:block>
 
diff --git a/src/main/resources/templates/convert/xlsx-to-pdf.html b/src/main/resources/templates/convert/file-to-pdf.html
similarity index 63%
rename from src/main/resources/templates/convert/xlsx-to-pdf.html
rename to src/main/resources/templates/convert/file-to-pdf.html
index 09c8be155..cb4d8ada7 100644
--- a/src/main/resources/templates/convert/xlsx-to-pdf.html
+++ b/src/main/resources/templates/convert/file-to-pdf.html
@@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html th:lang="${#locale.language}" th:lang-direction="#{language.direction}" xmlns:th="http://www.thymeleaf.org">
 
-<th:block th:insert="~{fragments/common :: head(title=#{xlsToPdf.title})}"></th:block>
+<th:block th:insert="~{fragments/common :: head(title=#{fileToPDF.title})}"></th:block>
 
 
 <body>
@@ -12,18 +12,23 @@
             <div class="container">
                 <div class="row justify-content-center">
                     <div class="col-md-6">
-                        <h2 th:text="#{xlsToPdf.header}"></h2>
+                        <h2 th:text="#{fileToPDF.header}"></h2>
 
-                        <form method="post" enctype="multipart/form-data" th:action="@{xlsx-to-pdf}">
+                        <form method="post" enctype="multipart/form-data" th:action="@{file-to-pdf}">
                             <div class="custom-file">
                                 <input type="file" class="custom-file-input" id="fileInput" name="fileInput" required> 
-                                <label class="custom-file-label" for="fileInput" th:text="#{xlsToPdf.selectText.1}"></label>
+                                <label class="custom-file-label" for="fileInput" th:text="#{filePrompt}"></label>
                             </div>
                             <br> <br>
-                            <button type="submit" class="btn btn-primary" th:text="#{imageToPDF.submit}"></button>
+                            <button type="submit" class="btn btn-primary" th:text="#{fileToPDF.submit}"></button>
 
                         </form>
                         <th:block th:insert="~{fragments/common :: filelist}"></th:block>
+                        <p class="mt-3" th:text="#{fileToPDF.credit}"></p>
+                        <p class="mt-3" th:text="#{fileToPDF.supportedFileTypes}"></p>
+                        <p th:utext="#{fileToPDF.fileTypesList}"></p>
+                        <a href="https://help.libreoffice.org/latest/en-US/text/shared/guide/supported_formats.html"></a>
+                        
                     </div>
                 </div>
             </div>
diff --git a/src/main/resources/templates/ocr-pdf.html b/src/main/resources/templates/ocr-pdf.html
new file mode 100644
index 000000000..e440e5de1
--- /dev/null
+++ b/src/main/resources/templates/ocr-pdf.html
@@ -0,0 +1,51 @@
+<!DOCTYPE html>
+<html th:lang="${#locale.language}" th:lang-direction="#{language.direction}" xmlns:th="http://www.thymeleaf.org">
+
+
+<th:block th:insert="~{fragments/common :: head(title=#{addImage.title})}"></th:block>
+
+
+<body>
+    <div id="page-container">
+        <div id="content-wrap">
+            <div th:insert="~{fragments/navbar.html :: navbar}"></div>
+            <br> <br>
+            <div class="container">
+                <div class="row justify-content-center">
+                    <div class="col-md-6">
+                        <h2 th:text="#{ocrPDF.header}"></h2>
+
+						 <form action="#" th:action="@{/ocr-pdf}" method="post" enctype="multipart/form-data" class="mb-3">
+				            <div class="mb-3">
+				                <label for="fileInput" class="form-label">Choose a PDF file</label>
+				                <input type="file" name="fileInput" id="fileInput" accept="application/pdf" required class="form-control">
+				            </div>
+				            <div class="mb-3">
+							    <label for="languages" class="form-label">Select languages that are to be detected within the PDF (Ones listed are the ones currently detected):</label>
+							    <div id="languages">
+							        <div th:each="language: ${languages}">
+							            <input type="checkbox" class="form-check-input" th:name="languages" th:value="${language}" th:id="${'language-' + language}" />
+							            <label class="form-check-label" th:for="${'language-' + language}" th:text="${language}"></label>
+							        </div>
+							    </div>
+							</div>
+							<div class="mb-3">
+						        <input type="checkbox" class="form-check-input" name="sidecar" id="sidecar" />
+						        <label class="form-check-label" for="sidecar">Produce text file containing OCR text alongside the OCR'ed PDF</label>
+						    </div>
+				            <button type="submit" class="btn btn-primary">Process PDF with OCR</button>
+				        </form>
+				        <p>
+				            Please read this documentation on how to use this for other languages and/or not in docker
+				        </p>
+
+                        <th:block th:insert="~{fragments/common :: filelist}"></th:block>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <div th:insert="~{fragments/footer.html :: footer}"></div>
+    </div>
+</body>
+
+</html>
\ No newline at end of file