mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-03 17:52:30 +02:00
Major changes, use libre
This commit is contained in:
parent
946df5b545
commit
cbfa70d851
51
Dockerfile
51
Dockerfile
@ -1,5 +1,54 @@
|
||||
# Build jbig2enc in a separate stage
|
||||
FROM debian:bullseye-slim as jbig2enc_builder
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
git \
|
||||
automake \
|
||||
autoconf \
|
||||
libtool \
|
||||
libleptonica-dev \
|
||||
pkg-config \
|
||||
ca-certificates \
|
||||
zlib1g-dev \
|
||||
make \
|
||||
g++
|
||||
|
||||
RUN git clone https://github.com/agl/jbig2enc && \
|
||||
cd jbig2enc && \
|
||||
./autogen.sh && \
|
||||
./configure && \
|
||||
make && \
|
||||
make install
|
||||
|
||||
# Main stage
|
||||
FROM openjdk:17-jdk-slim
|
||||
|
||||
# Install necessary dependencies
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libreoffice-core \
|
||||
libreoffice-common \
|
||||
libreoffice-writer \
|
||||
libreoffice-calc \
|
||||
libreoffice-impress \
|
||||
python3-uno \
|
||||
python3-pip \
|
||||
unoconv \
|
||||
ocrmypdf && \
|
||||
pip install --user --upgrade ocrmypdf
|
||||
|
||||
# Copy the jbig2enc binary from the builder stage
|
||||
COPY --from=jbig2enc_builder /usr/local/bin/jbig2 /usr/local/bin/jbig2
|
||||
|
||||
# Copy the application JAR file
|
||||
COPY build/libs/*.jar app.jar
|
||||
|
||||
# Expose the application port
|
||||
EXPOSE 8080
|
||||
|
||||
# Set environment variables
|
||||
ENV LOG_LEVEL=INFO
|
||||
ENTRYPOINT ["java","-jar","/app.jar","-Dlogging.level=${LOG_LEVEL}"]
|
||||
|
||||
# Run the application
|
||||
ENTRYPOINT ["java","-jar","/app.jar","-Dlogging.level=${LOG_LEVEL}"]
|
||||
|
49
HowToUseOCR.md
Normal file
49
HowToUseOCR.md
Normal file
@ -0,0 +1,49 @@
|
||||
# OCR Language Packs and Setup
|
||||
|
||||
This document provides instructions on how to add additional language packs for the OCR tab in Stirling-PDF, both inside and outside of Docker.
|
||||
|
||||
## How does the OCR Work
|
||||
Stirling-PDF uses OCRmyPDF which in turn uses tesseract for its text recognition.
|
||||
All credit goes to them for this awesome work!
|
||||
|
||||
## Language Packs
|
||||
|
||||
Tesseract OCR supports a variety of languages. You can find additional language packs in the Tesseract GitHub repositories:
|
||||
|
||||
- [tessdata_fast](https://github.com/tesseract-ocr/tessdata_fast): These language packs are smaller and faster to load, but may provide lower recognition accuracy.
|
||||
- [tessdata](https://github.com/tesseract-ocr/tessdata): These language packs are larger and provide better recognition accuracy, but may take longer to load.
|
||||
|
||||
Depending on your requirements, you can choose the appropriate language pack for your use case. By default Stirling-PDF uses the tessdata_fast eng but this can be replaced.
|
||||
|
||||
### Installing Language Packs
|
||||
|
||||
1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need.
|
||||
2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tesseract-ocr/4.00/tessdata`
|
||||
|
||||
#### Docker
|
||||
|
||||
If you are using Docker, you need to expose the Tesseract tessdata directory as a volume in order to use the additional language packs.
|
||||
#### Docker Compose
|
||||
Modify your `docker-compose.yml` file to include the following volume configuration:
|
||||
|
||||
|
||||
```yaml
|
||||
services:
|
||||
your_service_name:
|
||||
image: your_docker_image_name
|
||||
volumes:
|
||||
- /usr/share/tesseract-ocr/4.00/tessdata:/location/of/trainingData
|
||||
```
|
||||
|
||||
|
||||
#### Docker run
|
||||
Add the following to your existing docker run command
|
||||
```bash
|
||||
-v /usr/share/tesseract-ocr/4.00/tessdata:/location/of/trainingData
|
||||
```
|
||||
|
||||
#### Non-Docker
|
||||
If you are not using Docker, you need to install the OCR components, including the ocrmypdf app.
|
||||
You can see [OCRmyPDF install guide](https://ocrmypdf.readthedocs.io/en/latest/installation.html)
|
||||
|
||||
|
12
build.gradle
12
build.gradle
@ -21,24 +21,12 @@ dependencies {
|
||||
implementation 'org.springframework.boot:spring-boot-starter-thymeleaf'
|
||||
testImplementation 'org.springframework.boot:spring-boot-starter-test'
|
||||
|
||||
implementation 'org.apache.xmlgraphics:batik-transcoder:1.14'
|
||||
implementation 'org.apache.logging.log4j:log4j-core:2.20.0'
|
||||
|
||||
//general PDF
|
||||
implementation 'org.apache.pdfbox:pdfbox:2.0.27'
|
||||
implementation 'com.itextpdf:itextpdf:5.5.13.3'
|
||||
|
||||
//xml conversions and others
|
||||
implementation 'org.apache.poi:poi:5.2.3'
|
||||
implementation 'org.apache.poi:poi-scratchpad:5.2.3'
|
||||
implementation 'org.apache.poi:poi-ooxml:5.2.3'
|
||||
implementation 'com.itextpdf.tool:xmlworker:5.5.13.3'
|
||||
|
||||
//docx conversions
|
||||
implementation('org.docx4j:docx4j:6.1.2') {
|
||||
exclude group: 'org.slf4j', module: 'slf4j-reload4j'
|
||||
}
|
||||
implementation 'org.docx4j:docx4j-export-fo:11.2.9'
|
||||
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,94 @@
|
||||
package stirling.software.SPDF;
|
||||
import java.io.IOException;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.Socket;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
public class LibreOfficeListener {
|
||||
|
||||
private static final LibreOfficeListener INSTANCE = new LibreOfficeListener();
|
||||
|
||||
private static final long ACTIVITY_TIMEOUT = 20 * 60 * 1000; // 20 minutes
|
||||
private static final int LISTENER_PORT = 2002;
|
||||
|
||||
private ExecutorService executorService;
|
||||
private Process process;
|
||||
private long lastActivityTime;
|
||||
|
||||
private LibreOfficeListener() {}
|
||||
|
||||
public static LibreOfficeListener getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
public void start() throws IOException {
|
||||
// Check if the listener is already running
|
||||
if (process != null && process.isAlive()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Start the listener process
|
||||
process = Runtime.getRuntime().exec("unoconv --listener");
|
||||
lastActivityTime = System.currentTimeMillis();
|
||||
|
||||
// Start a background thread to monitor the activity timeout
|
||||
executorService = Executors.newSingleThreadExecutor();
|
||||
executorService.submit(() -> {
|
||||
while (true) {
|
||||
long idleTime = System.currentTimeMillis() - lastActivityTime;
|
||||
if (idleTime >= ACTIVITY_TIMEOUT) {
|
||||
// If there has been no activity for too long, tear down the listener
|
||||
process.destroy();
|
||||
break;
|
||||
}
|
||||
try {
|
||||
Thread.sleep(5000); // Check for inactivity every 5 seconds
|
||||
} catch (InterruptedException e) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
// Wait for the listener to start up
|
||||
long startTime = System.currentTimeMillis();
|
||||
long timeout = 30000; // Timeout after 30 seconds
|
||||
while (System.currentTimeMillis() - startTime < timeout) {
|
||||
if (isListenerRunning()) {
|
||||
|
||||
lastActivityTime = System.currentTimeMillis();
|
||||
return;
|
||||
}
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
} // Check every 1 second
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isListenerRunning() {
|
||||
try {
|
||||
System.out.println("waiting for listener to start");
|
||||
Socket socket = new Socket();
|
||||
socket.connect(new InetSocketAddress("localhost", 2002), 1000); // Timeout after 1 second
|
||||
socket.close();
|
||||
return true;
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void stop() {
|
||||
// Stop the activity timeout monitor thread
|
||||
executorService.shutdownNow();
|
||||
|
||||
// Stop the listener process
|
||||
if (process != null && process.isAlive()) {
|
||||
process.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -31,8 +31,26 @@ import com.itextpdf.text.pdf.PdfReader;
|
||||
import com.itextpdf.text.pdf.PdfStamper;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
//import com.spire.pdf.*;
|
||||
@Controller
|
||||
public class CompressController {
|
||||
|
||||
@ -45,80 +63,55 @@ public class CompressController {
|
||||
}
|
||||
|
||||
|
||||
|
||||
@PostMapping("/compress-pdf")
|
||||
public ResponseEntity<byte[]> compressPDF(
|
||||
@RequestParam("fileInput") MultipartFile pdfFile,
|
||||
@RequestParam(value = "compressPDF", defaultValue = "false") boolean compressPDF,
|
||||
@RequestParam(value = "compressImages", defaultValue = "false") boolean compressImages,
|
||||
@RequestParam(value = "useLossyCompression", defaultValue = "false") boolean useLossyCompression,
|
||||
@RequestParam(value = "resolutionPercentage", defaultValue = "50") int resolutionPercentage) {
|
||||
public ResponseEntity<byte[]> optimizePdf(
|
||||
@RequestParam("fileInput") MultipartFile inputFile,
|
||||
@RequestParam("optimizeLevel") int optimizeLevel,
|
||||
@RequestParam(name = "fastWebView", required = false) Boolean fastWebView,
|
||||
@RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy) throws IOException, InterruptedException {
|
||||
|
||||
ByteArrayOutputStream baosPDFBox = new ByteArrayOutputStream();
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Prepare the OCRmyPDF command
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("ocrmypdf");
|
||||
command.add("--optimize");
|
||||
command.add(String.valueOf(optimizeLevel));
|
||||
|
||||
if (fastWebView != null && fastWebView) {
|
||||
long fileSize = inputFile.getSize();
|
||||
long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size
|
||||
command.add("--fast-web-view");
|
||||
command.add(String.valueOf(fastWebViewSize));
|
||||
}
|
||||
|
||||
if (jbig2Lossy != null && jbig2Lossy) {
|
||||
command.add("--jbig2-lossy");
|
||||
}
|
||||
|
||||
command.add(tempInputFile.toString());
|
||||
command.add(tempOutputFile.toString());
|
||||
|
||||
int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
|
||||
|
||||
try (InputStream is = pdfFile.getInputStream();
|
||||
PDDocument document = PDDocument.load(is)) {
|
||||
// Read the optimized PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
if (compressImages) {
|
||||
for (PDPage page : document.getPages()) {
|
||||
PDResources resources = page.getResources();
|
||||
for (COSName cosName : resources.getXObjectNames()) {
|
||||
if (resources.isImageXObject(cosName)) {
|
||||
PDImageXObject image = (PDImageXObject) resources.getXObject(cosName);
|
||||
BufferedImage bufferedImage = image.getImage();
|
||||
BufferedImage resizedImage = resizeImage(bufferedImage, resolutionPercentage);
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
// Return the optimized PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf";
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_PDF);
|
||||
headers.setContentDispositionFormData("attachment", outputFilename);
|
||||
return ResponseEntity.ok().headers(headers).body(pdfBytes);
|
||||
}
|
||||
|
||||
if (useLossyCompression) {
|
||||
File tempFile = File.createTempFile("pdfbox", ".jpg");
|
||||
ImageIO.write(resizedImage, "jpg", tempFile);
|
||||
PDImageXObject newImage = PDImageXObject.createFromFile(tempFile.getAbsolutePath(), document);
|
||||
resources.put(cosName, newImage);
|
||||
} else {
|
||||
File tempFile = File.createTempFile("pdfbox", ".png");
|
||||
ImageIO.write(resizedImage, "png", tempFile);
|
||||
PDImageXObject newImage = PDImageXObject.createFromFile(tempFile.getAbsolutePath(), document);
|
||||
resources.put(cosName, newImage);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
document.save(baosPDFBox);
|
||||
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
|
||||
try (ByteArrayInputStream baisPDFBox = new ByteArrayInputStream(baosPDFBox.toByteArray());
|
||||
ByteArrayOutputStream baosFinal = new ByteArrayOutputStream()) {
|
||||
|
||||
PdfReader reader = new PdfReader(baisPDFBox);
|
||||
PdfStamper stamper = new PdfStamper(reader, baosFinal);
|
||||
|
||||
if (compressPDF) {
|
||||
stamper.setFullCompression();
|
||||
}
|
||||
|
||||
stamper.close();
|
||||
reader.close();
|
||||
|
||||
return PdfUtils.boasToWebResponse(baosFinal, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_compressed.pdf");
|
||||
} catch (IOException | DocumentException e) {
|
||||
e.printStackTrace();
|
||||
return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private BufferedImage resizeImage(BufferedImage originalImage, int resolutionPercentage) {
|
||||
int newWidth = originalImage.getWidth() * resolutionPercentage / 100;
|
||||
int newHeight = originalImage.getHeight() * resolutionPercentage / 100;
|
||||
BufferedImage resizedImage = new BufferedImage(newWidth, newHeight, originalImage.getType());
|
||||
Graphics2D g = resizedImage.createGraphics();
|
||||
g.drawImage(originalImage, 0, 0, newWidth, newHeight, null);
|
||||
g.dispose();
|
||||
return resizedImage;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,143 @@
|
||||
package stirling.software.SPDF.controller;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.ModelAndView;
|
||||
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
//import com.spire.pdf.*;
|
||||
@Controller
|
||||
public class OCRController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
|
||||
|
||||
@GetMapping("/ocr-pdf")
|
||||
public ModelAndView ocrPdfPage() {
|
||||
ModelAndView modelAndView = new ModelAndView("ocr-pdf");
|
||||
modelAndView.addObject("languages", getAvailableTesseractLanguages());
|
||||
modelAndView.addObject("currentPage", "ocr-pdf");
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
@PostMapping("/ocr-pdf")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile,
|
||||
@RequestParam("languages") List<String> selectedLanguages,
|
||||
@RequestParam(name = "sidecar", required = false) Boolean sidecar) throws IOException, InterruptedException {
|
||||
|
||||
//--output-type pdfa
|
||||
if (selectedLanguages == null || selectedLanguages.size() < 1) {
|
||||
throw new IOException("Please select at least one language.");
|
||||
}
|
||||
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Run OCR Command
|
||||
String languageOption = String.join("+", selectedLanguages);
|
||||
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf","--verbose", "2", "--language", languageOption,
|
||||
tempInputFile.toString(), tempOutputFile.toString()));
|
||||
String sidecarFile = tempOutputFile.toString().replace(".pdf", ".txt");
|
||||
if (sidecar != null && sidecar) {
|
||||
command.add("--sidecar");
|
||||
command.add(sidecarFile);
|
||||
}
|
||||
int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the OCR processed PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
// Return the OCR processed PDF as a response
|
||||
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
|
||||
if (sidecar != null && sidecar) {
|
||||
// Create a zip file containing both the PDF and the text file
|
||||
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
|
||||
Path tempZipFile = Files.createTempFile("output_", ".zip");
|
||||
|
||||
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
|
||||
// Add PDF file to the zip
|
||||
ZipEntry pdfEntry = new ZipEntry(outputFilename);
|
||||
zipOut.putNextEntry(pdfEntry);
|
||||
Files.copy(tempOutputFile, zipOut);
|
||||
zipOut.closeEntry();
|
||||
|
||||
// Add text file to the zip
|
||||
ZipEntry txtEntry = new ZipEntry(sidecarFile);
|
||||
zipOut.putNextEntry(txtEntry);
|
||||
Files.copy(Paths.get(sidecarFile), zipOut);
|
||||
zipOut.closeEntry();
|
||||
}
|
||||
|
||||
byte[] zipBytes = Files.readAllBytes(tempZipFile);
|
||||
|
||||
// Clean up the temporary zip file
|
||||
Files.delete(tempZipFile);
|
||||
Files.delete(tempOutputFile);
|
||||
Files.delete(Paths.get(sidecarFile));
|
||||
|
||||
// Return the zip file containing both the PDF and the text file
|
||||
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
|
||||
headers.setContentDispositionFormData("attachment", outputZipFilename);
|
||||
return ResponseEntity.ok().headers(headers).body(zipBytes);
|
||||
} else {
|
||||
// Return the OCR processed PDF as a response
|
||||
Files.delete(tempOutputFile);
|
||||
headers.setContentType(MediaType.APPLICATION_PDF);
|
||||
headers.setContentDispositionFormData("attachment", outputFilename);
|
||||
return ResponseEntity.ok().headers(headers).body(pdfBytes);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public List<String> getAvailableTesseractLanguages() {
|
||||
String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
|
||||
File[] files = new File(tessdataDir).listFiles();
|
||||
if (files == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return Arrays.stream(files)
|
||||
.filter(file -> file.getName().endsWith(".traineddata"))
|
||||
.map(file -> file.getName().replace(".traineddata", ""))
|
||||
.filter(lang -> !lang.equalsIgnoreCase("osd"))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
@ -1,79 +0,0 @@
|
||||
package stirling.software.SPDF.controller.converters;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.Paragraph;
|
||||
import com.itextpdf.text.pdf.PdfPCell;
|
||||
import com.itextpdf.text.pdf.PdfPTable;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
|
||||
@Controller
|
||||
public class ConvertCsvController {
|
||||
|
||||
|
||||
@GetMapping("/csv-to-pdf")
|
||||
public String cinvertToPDF(Model model) {
|
||||
model.addAttribute("currentPage", "xlsx-to-pdf");
|
||||
return "convert/xlsx-to-pdf";
|
||||
}
|
||||
|
||||
|
||||
|
||||
@PostMapping("/csv-to-pdf")
|
||||
public ResponseEntity<byte[]> convertCsvToPdf(@RequestParam("fileInput") MultipartFile csvFile) throws IOException, DocumentException {
|
||||
// Create PDF document
|
||||
Document document = new Document();
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
PdfWriter.getInstance(document, outputStream);
|
||||
document.open();
|
||||
|
||||
// Read CSV file
|
||||
InputStreamReader inputStreamReader = new InputStreamReader(csvFile.getInputStream(), StandardCharsets.UTF_8);
|
||||
BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
|
||||
|
||||
// Create PDF table from CSV content
|
||||
PdfPTable table = null;
|
||||
String csvRow;
|
||||
while ((csvRow = bufferedReader.readLine()) != null) {
|
||||
String[] csvRowCells = csvRow.split(","); // Assuming comma as a delimiter
|
||||
|
||||
if (table == null) {
|
||||
table = new PdfPTable(csvRowCells.length);
|
||||
}
|
||||
|
||||
for (String cellValue : csvRowCells) {
|
||||
PdfPCell pdfCell = new PdfPCell(new Paragraph(cellValue));
|
||||
table.addCell(pdfCell);
|
||||
}
|
||||
}
|
||||
|
||||
if (table != null) {
|
||||
document.add(table);
|
||||
}
|
||||
|
||||
// Close BufferedReader, document, and output stream
|
||||
bufferedReader.close();
|
||||
document.close();
|
||||
outputStream.close();
|
||||
|
||||
return PdfUtils.boasToWebResponse(outputStream, csvFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
package stirling.software.SPDF.controller.converters;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.docx4j.Docx4J;
|
||||
import org.docx4j.openpackaging.exceptions.Docx4JException;
|
||||
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
|
||||
@Controller
|
||||
public class ConvertDocController {
|
||||
|
||||
|
||||
@GetMapping("/docx-to-pdf")
|
||||
public String cinvertToPDF(Model model) {
|
||||
model.addAttribute("currentPage", "xlsx-to-pdf");
|
||||
return "convert/xlsx-to-pdf";
|
||||
}
|
||||
|
||||
@PostMapping("/docx-to-pdf")
|
||||
public ResponseEntity<byte[]> convertDocxToPdf(@RequestParam("fileInput") MultipartFile docxFile) throws IOException, Docx4JException {
|
||||
// Load WordprocessingMLPackage
|
||||
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(docxFile.getInputStream());
|
||||
|
||||
// Create PDF output stream
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
|
||||
// Convert DOCX to PDF
|
||||
Docx4J.toPDF(wordMLPackage, outputStream);
|
||||
|
||||
return PdfUtils.boasToWebResponse(outputStream, docxFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
|
||||
}
|
||||
|
||||
}
|
@ -1,54 +0,0 @@
|
||||
package stirling.software.SPDF.controller.converters;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
import com.itextpdf.tool.xml.XMLWorkerHelper;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
|
||||
@Controller
|
||||
public class ConvertHtmlController {
|
||||
|
||||
|
||||
@GetMapping("//html-to-pdf")
|
||||
public String cinvertToPDF(Model model) {
|
||||
model.addAttribute("currentPage", "xlsx-to-pdf");
|
||||
return "convert/xlsx-to-pdf";
|
||||
}
|
||||
|
||||
@PostMapping("/html-to-pdf")
|
||||
public ResponseEntity<byte[]> convertHtmlToPdf(@RequestParam("fileInput") MultipartFile htmlFile) throws IOException, DocumentException {
|
||||
// Create PDF document
|
||||
Document document = new Document();
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
PdfWriter writer = PdfWriter.getInstance(document, outputStream);
|
||||
document.open();
|
||||
|
||||
// Read HTML file
|
||||
InputStream htmlInputStream = new ByteArrayInputStream(htmlFile.getBytes());
|
||||
|
||||
// Convert HTML content to PDF
|
||||
XMLWorkerHelper.getInstance().parseXHtml(writer, document, htmlInputStream);
|
||||
|
||||
// Close document and output stream
|
||||
document.close();
|
||||
outputStream.close();
|
||||
|
||||
return PdfUtils.boasToWebResponse(outputStream, "");
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,82 @@
|
||||
package stirling.software.SPDF.controller.converters;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.ModelAndView;
|
||||
|
||||
import stirling.software.SPDF.LibreOfficeListener;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
@Controller
|
||||
public class ConvertOfficeController {
|
||||
|
||||
|
||||
@GetMapping("/file-to-pdf")
|
||||
public String convertToPdfForm(Model model) {
|
||||
model.addAttribute("currentPage", "file-to-pdf");
|
||||
return "convert/file-to-pdf";
|
||||
}
|
||||
|
||||
@PostMapping("/file-to-pdf")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
|
||||
//unused but can start server instance if startup time is to long
|
||||
//LibreOfficeListener.getInstance().start();
|
||||
|
||||
byte[] pdfByteArray = convertToPdf(inputFile);
|
||||
return PdfUtils.bytesToWebResponse(pdfByteArray, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
|
||||
}
|
||||
|
||||
|
||||
public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", "." + getFileExtension(inputFile.getOriginalFilename()));
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
// Run the LibreOffice command
|
||||
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv",
|
||||
"-f",
|
||||
"pdf",
|
||||
"-o",
|
||||
tempOutputFile.toString(),
|
||||
tempInputFile.toString()));
|
||||
int returnCode = ProcessExecutor.runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the converted PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
// Clean up the temporary files
|
||||
Files.delete(tempInputFile);
|
||||
Files.delete(tempOutputFile);
|
||||
|
||||
return pdfBytes;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private String getFileExtension(String fileName) {
|
||||
int dotIndex = fileName.lastIndexOf('.');
|
||||
if (dotIndex == -1) {
|
||||
return "";
|
||||
}
|
||||
return fileName.substring(dotIndex + 1);
|
||||
}
|
||||
}
|
@ -1,79 +0,0 @@
|
||||
package stirling.software.SPDF.controller.converters;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.Graphics2D;
|
||||
import java.awt.RenderingHints;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.Image;
|
||||
import com.itextpdf.text.PageSize;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
|
||||
@Controller
|
||||
public class ConvertPPTController {
|
||||
|
||||
|
||||
@GetMapping("/pptx-to-pdf")
|
||||
public String cinvertToPDF(Model model) {
|
||||
model.addAttribute("currentPage", "xlsx-to-pdf");
|
||||
return "convert/xlsx-to-pdf";
|
||||
}
|
||||
|
||||
@PostMapping("/pptx-to-pdf")
|
||||
public ResponseEntity<byte[]> convertPptxToPdf(@RequestParam("fileInput") MultipartFile pptxFile) throws IOException, DocumentException {
|
||||
// Read PowerPoint presentation
|
||||
XMLSlideShow ppt = new XMLSlideShow(pptxFile.getInputStream());
|
||||
|
||||
// Create PDF document
|
||||
Document pdfDocument = new Document(PageSize.A4.rotate());
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
PdfWriter.getInstance(pdfDocument, outputStream);
|
||||
pdfDocument.open();
|
||||
|
||||
// Convert PowerPoint slides to images, then add them to the PDF
|
||||
for (XSLFSlide slide : ppt.getSlides()) {
|
||||
BufferedImage slideImage = new BufferedImage((int) Math.ceil(ppt.getPageSize().getWidth()), (int) Math.ceil(ppt.getPageSize().getHeight()), BufferedImage.TYPE_INT_RGB);
|
||||
Graphics2D graphics = slideImage.createGraphics();
|
||||
|
||||
// Set graphics rendering hints for better quality
|
||||
graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
|
||||
graphics.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
|
||||
graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
|
||||
|
||||
// Draw the slide on the graphics
|
||||
graphics.setPaint(Color.white);
|
||||
graphics.fill(new Rectangle2D.Float(0, 0, slideImage.getWidth(), slideImage.getHeight()));
|
||||
slide.draw(graphics);
|
||||
|
||||
// Add the slide image to the PDF document
|
||||
Image image = Image.getInstance(slideImage, null);
|
||||
image.scaleToFit(PageSize.A4.getWidth() - 72, PageSize.A4.getHeight() - 72);
|
||||
pdfDocument.add(image);
|
||||
}
|
||||
|
||||
// Close PowerPoint and PDF documents
|
||||
ppt.close();
|
||||
pdfDocument.close();
|
||||
outputStream.close();
|
||||
|
||||
return PdfUtils.boasToWebResponse(outputStream, pptxFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
|
||||
}
|
||||
|
||||
}
|
@ -1,63 +0,0 @@
|
||||
package stirling.software.SPDF.controller.converters;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.Paragraph;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
|
||||
@Controller
|
||||
public class ConvertTextController {
|
||||
|
||||
|
||||
@GetMapping("/txt-rtf-to-pdf")
|
||||
public String cinvertToPDF(Model model) {
|
||||
model.addAttribute("currentPage", "xlsx-to-pdf");
|
||||
return "convert/xlsx-to-pdf";
|
||||
}
|
||||
|
||||
@PostMapping("/txt-rtf-to-pdf")
|
||||
public ResponseEntity<byte[]> convertTxtRtfToPdf(@RequestParam("fileInput") MultipartFile txtRtfFile) throws IOException, DocumentException {
|
||||
// Create PDF document
|
||||
Document document = new Document();
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
PdfWriter.getInstance(document, outputStream);
|
||||
document.open();
|
||||
|
||||
// Read TXT/RTF file content
|
||||
String fileContent;
|
||||
String fileExtension = FilenameUtils.getExtension(txtRtfFile.getOriginalFilename());
|
||||
if (fileExtension.equalsIgnoreCase("rtf")) {
|
||||
HWPFDocument hwpfDocument = new HWPFDocument(new POIFSFileSystem(txtRtfFile.getInputStream()));
|
||||
fileContent = hwpfDocument.getText().toString();
|
||||
} else {
|
||||
fileContent = new String(txtRtfFile.getBytes(), StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
// Add content to PDF
|
||||
document.add(new Paragraph(fileContent));
|
||||
|
||||
// Close document and output stream
|
||||
document.close();
|
||||
outputStream.close();
|
||||
|
||||
return PdfUtils.boasToWebResponse(outputStream, txtRtfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
|
||||
}
|
||||
|
||||
}
|
@ -1,152 +0,0 @@
|
||||
package stirling.software.SPDF.controller.converters;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.DataFormatter;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
||||
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
|
||||
import org.apache.poi.xssf.usermodel.XSSFColor;
|
||||
import org.apache.poi.xssf.usermodel.XSSFFont;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.itextpdf.text.BaseColor;
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.Font;
|
||||
import com.itextpdf.text.Paragraph;
|
||||
import com.itextpdf.text.pdf.PdfPCell;
|
||||
import com.itextpdf.text.pdf.PdfPTable;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
|
||||
@Controller
|
||||
public class ConvertXlsxController {
|
||||
|
||||
|
||||
@GetMapping("/xlsx-to-pdf")
|
||||
public String cinvertToPDF(Model model) {
|
||||
model.addAttribute("currentPage", "xlsx-to-pdf");
|
||||
return "convert/xlsx-to-pdf";
|
||||
}
|
||||
|
||||
@PostMapping("/xlsx-to-pdf")
|
||||
public ResponseEntity<byte[]> convertToPDF(@RequestParam("fileInput") MultipartFile xlsx) throws IOException, DocumentException {
|
||||
// Load Excel file
|
||||
Workbook workbook = WorkbookFactory.create(xlsx.getInputStream());
|
||||
|
||||
// Create PDF document
|
||||
Document document = new Document();
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
PdfWriter.getInstance(document, outputStream);
|
||||
document.open();
|
||||
|
||||
// Convert each sheet in Excel to a separate page in PDF
|
||||
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
|
||||
Sheet sheet = workbook.getSheetAt(i);
|
||||
int numOfColumns = sheet.getRow(0).getPhysicalNumberOfCells();
|
||||
PdfPTable table = new PdfPTable(numOfColumns);
|
||||
|
||||
for (int row = 0; row < sheet.getPhysicalNumberOfRows(); row++) {
|
||||
Row excelRow = sheet.getRow(row);
|
||||
if (excelRow == null) {
|
||||
continue; // Skip this row if it's null
|
||||
}
|
||||
for (int cell = 0; cell < excelRow.getPhysicalNumberOfCells(); cell++) {
|
||||
Cell excelCell = excelRow.getCell(cell);
|
||||
|
||||
// Check if the cell is null
|
||||
if (excelCell == null) {
|
||||
table.addCell(""); // Add an empty cell to the PDF table
|
||||
continue;
|
||||
}
|
||||
|
||||
// Convert cell to string
|
||||
DataFormatter dataFormatter = new DataFormatter();
|
||||
String cellValue = dataFormatter.formatCellValue(excelCell);
|
||||
System.out.println("Cell Value: " + cellValue);
|
||||
// Get Excel cell font
|
||||
Font cellFont = getFontFromExcelCell(workbook, excelCell);
|
||||
|
||||
// Create PDF cell with Excel cell font
|
||||
PdfPCell pdfCell = new PdfPCell(new Paragraph(cellValue, cellFont));
|
||||
|
||||
// Set cell height and width
|
||||
float height = sheet.getRow(row).getHeightInPoints();
|
||||
System.out.print(height);
|
||||
pdfCell.setFixedHeight(30f);
|
||||
|
||||
|
||||
// Copy cell style, borders, and background color
|
||||
XSSFCellStyle cellStyle = (XSSFCellStyle) excelCell.getCellStyle();
|
||||
if (cellStyle != null) {
|
||||
XSSFColor bottomBorderColor = cellStyle.getBottomBorderXSSFColor();
|
||||
if (bottomBorderColor != null) {
|
||||
pdfCell.setBorderColor(new BaseColor(bottomBorderColor.getRGB()[0] & 0xFF, bottomBorderColor.getRGB()[1] & 0xFF, bottomBorderColor.getRGB()[2] & 0xFF));
|
||||
}
|
||||
|
||||
XSSFColor topBorderColor = cellStyle.getTopBorderXSSFColor();
|
||||
if (topBorderColor != null) {
|
||||
pdfCell.setBorderColor(new BaseColor(topBorderColor.getRGB()[0] & 0xFF, topBorderColor.getRGB()[1] & 0xFF, topBorderColor.getRGB()[2] & 0xFF));
|
||||
}
|
||||
|
||||
XSSFColor leftBorderColor = cellStyle.getLeftBorderXSSFColor();
|
||||
if (leftBorderColor != null) {
|
||||
pdfCell.setBorderColor(new BaseColor(leftBorderColor.getRGB()[0] & 0xFF, leftBorderColor.getRGB()[1] & 0xFF, leftBorderColor.getRGB()[2] & 0xFF));
|
||||
}
|
||||
|
||||
XSSFColor rightBorderColor = cellStyle.getRightBorderXSSFColor();
|
||||
if (rightBorderColor != null) {
|
||||
pdfCell.setBorderColor(new BaseColor(rightBorderColor.getRGB()[0] & 0xFF, rightBorderColor.getRGB()[1] & 0xFF, rightBorderColor.getRGB()[2] & 0xFF));
|
||||
}
|
||||
|
||||
XSSFColor fillForegroundColor = cellStyle.getFillForegroundXSSFColor();
|
||||
if (fillForegroundColor != null) {
|
||||
pdfCell.setBackgroundColor(new BaseColor(fillForegroundColor.getRGB()[0] & 0xFF, fillForegroundColor.getRGB()[1] & 0xFF, fillForegroundColor.getRGB()[2] & 0xFF));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
table.addCell(pdfCell);
|
||||
}
|
||||
}
|
||||
|
||||
// Add sheet to PDF
|
||||
document.add(table);
|
||||
|
||||
// Add page break if there are more sheets
|
||||
if (i < workbook.getNumberOfSheets() - 1) {
|
||||
document.newPage();
|
||||
}
|
||||
}
|
||||
|
||||
// Close document and output stream
|
||||
document.close();
|
||||
outputStream.flush();
|
||||
outputStream.close();
|
||||
|
||||
// Return PDF as response
|
||||
return PdfUtils.boasToWebResponse(outputStream, xlsx.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
|
||||
}
|
||||
|
||||
private Font getFontFromExcelCell(Workbook workbook, Cell excelCell) {
|
||||
XSSFFont excelFont = ((XSSFCellStyle) excelCell.getCellStyle()).getFont();
|
||||
Font.FontFamily fontFamily = Font.getFamily(excelFont.getFontName());
|
||||
float fontSize = excelFont.getFontHeightInPoints();
|
||||
int fontStyle = (excelFont.getBold() ? Font.BOLD : Font.NORMAL) | (excelFont.getItalic() ? Font.ITALIC : Font.NORMAL);
|
||||
|
||||
return new Font(fontFamily, fontSize, fontStyle);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,69 @@
|
||||
package stirling.software.SPDF.utils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.io.BufferedReader;
|
||||
import java.util.ArrayList;
|
||||
public class ProcessExecutor {
|
||||
public static int runCommandWithOutputHandling(List<String> command) throws IOException, InterruptedException {
|
||||
ProcessBuilder processBuilder = new ProcessBuilder(command);
|
||||
Process process = processBuilder.start();
|
||||
|
||||
// Read the error stream and standard output stream concurrently
|
||||
List<String> errorLines = new ArrayList<>();
|
||||
List<String> outputLines = new ArrayList<>();
|
||||
|
||||
Thread errorReaderThread = new Thread(() -> {
|
||||
try (BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream(), StandardCharsets.UTF_8))) {
|
||||
String line;
|
||||
while ((line = errorReader.readLine()) != null) {
|
||||
errorLines.add(line);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
});
|
||||
|
||||
Thread outputReaderThread = new Thread(() -> {
|
||||
try (BufferedReader outputReader = new BufferedReader(new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) {
|
||||
String line;
|
||||
while ((line = outputReader.readLine()) != null) {
|
||||
outputLines.add(line);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
});
|
||||
|
||||
errorReaderThread.start();
|
||||
outputReaderThread.start();
|
||||
|
||||
// Wait for the conversion process to complete
|
||||
int exitCode = process.waitFor();
|
||||
|
||||
// Wait for the reader threads to finish
|
||||
errorReaderThread.join();
|
||||
outputReaderThread.join();
|
||||
|
||||
if (outputLines.size() > 0) {
|
||||
String outputMessage = String.join("\n", outputLines);
|
||||
System.out.println("Command output:\n" + outputMessage);
|
||||
}
|
||||
|
||||
if (errorLines.size() > 0) {
|
||||
String errorMessage = String.join("\n", errorLines);
|
||||
System.out.println("Command error output:\n" + errorMessage);
|
||||
if (exitCode != 0) {
|
||||
throw new IOException("Command process failed with exit code " + exitCode + ". Error message: " + errorMessage);
|
||||
}
|
||||
}
|
||||
|
||||
return exitCode;
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
fileToPDF.fileTypesList=Microsoft Word: (DOC, DOCX, DOT, DOTX) \
|
||||
Microsoft Excel: (CSV, XLS, XLSX, XLT, XLTX, SLK, DIF) \
|
||||
Microsoft PowerPoint: (PPT, PPTX) \
|
||||
OpenDocument Formats: (ODT, OTT, ODS, OTS, ODP, OTP, ODG, OTG) \
|
||||
Plain Text: (TXT, TEXT, XML) \
|
||||
Rich Text Format: (RTF) \
|
||||
Images: (BMP, GIF, JPEG, PNG, TIF, PBM, PGM, PPM, RAS, XBM, XPM, SVG, SVM, WMF) \
|
||||
HTML: (HTML) \
|
||||
Lotus Word Pro: (LWP) \
|
||||
StarOffice formats: (SDA, SDC, SDD, SDW, STC, STD, STI, STW, SXD, SXG, SXI, SXW) \
|
||||
Other formats: (DBF, FODS, VSD, VOR, VOR3, VOR4, UOP, PCT, PS, PDF)
|
@ -224,19 +224,8 @@ changeMetadata.selectText.5=Add Custom Metadata Entry
|
||||
changeMetadata.submit=Change
|
||||
|
||||
|
||||
|
||||
xlsToPdf.title=Excel to PDF
|
||||
xlsToPdf.header=Excel to PDF
|
||||
xlsToPdf.selectText.1=Select XLS or XLSX Excel sheet to convert
|
||||
xlsToPdf.convert=convert
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
fileToPDF.credit=This service uses LibreOffice and Unoconv for file conversion.
|
||||
fileToPDF.supportedFileTypes=Supported file types should include the below however for a full updated list of supported formats, please refer to the LibreOffice documentation
|
||||
|
||||
|
||||
|
||||
|
@ -14,60 +14,30 @@
|
||||
<div class="row justify-content-center">
|
||||
<div class="col-md-6">
|
||||
<h2 th:text="#{compress.header}"></h2>
|
||||
<form method="post" enctype="multipart/form-data" th:action="@{/compress-pdf}">
|
||||
<div class="form-group">
|
||||
<label for="fileInput">Select a PDF file to compress:</label>
|
||||
<input type="file" class="form-control-file" id="fileInput" name="fileInput" accept=".pdf">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" id="compressPDF" name="compressPDF" checked>
|
||||
<label class="form-check-label" for="compressPDF">Compress PDF?</label>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="pdfCompressionLevel">PDF Compression Level:</label>
|
||||
<select class="form-control" id="pdfCompressionLevel" name="pdfCompressionLevel">
|
||||
<option value="0">0 (No compression)</option>
|
||||
<option value="1">1 (Lowest compression)</option>
|
||||
<option value="2">2</option>
|
||||
<option value="3">3</option>
|
||||
<option value="4">4</option>
|
||||
<option value="5" selected>5 (Default compression)</option>
|
||||
<option value="6">6</option>
|
||||
<option value="7">7</option>
|
||||
<option value="8">8</option>
|
||||
<option value="9">9 (Maximum compression)</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" id="compressImages" name="compressImages" checked>
|
||||
<label class="form-check-label" for="compressImages">Compress Images?</label>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" id="useLossyCompression" name="useLossyCompression">
|
||||
<label class="form-check-label" for="useLossyCompression">Use Lossy Compression for Images?</label>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="imageCompressionLevel">Image Compression Level:</label>
|
||||
<select class="form-control" id="imageCompressionLevel" name="imageCompressionLevel">
|
||||
<option value="0">0 (No compression)</option>
|
||||
<option value="10">10 (Lowest quality)</option>
|
||||
<option value="25">25</option>
|
||||
<option value="50" selected>50 (Default quality)</option>
|
||||
<option value="75">75</option>
|
||||
<option value="90">90 (High quality)</option>
|
||||
<option value="100">100 (Best quality)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<button type="submit" class="btn btn-primary">Compress PDF</button>
|
||||
</div>
|
||||
</form>
|
||||
<form action="#" th:action="@{/compress-pdf}" method="post" enctype="multipart/form-data">
|
||||
<div>
|
||||
<label for="fileInput">Choose a PDF file</label>
|
||||
<input type="file" name="fileInput" id="fileInput" accept="application/pdf" required>
|
||||
</div>
|
||||
<div>
|
||||
<label for="optimizeLevel">Optimization level:</label>
|
||||
<select name="optimizeLevel" id="optimizeLevel">
|
||||
<option value="0">-O0 (No optimization)</option>
|
||||
<option value="1" selected>-O1 (Default, lossless optimization)</option>
|
||||
<option value="2">-O2 (Lossy optimization)</option>
|
||||
<option value="3">-O3 (Lossy optimization, more aggressive)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<input type="checkbox" name="fastWebView" id="fastWebView" checked>
|
||||
<label for="fastWebView">Enable fast web view (linearize PDF)</label>
|
||||
</div>
|
||||
<div>
|
||||
<input type="checkbox" name="jbig2Lossy" id="jbig2Lossy">
|
||||
<label for="jbig2Lossy">Enable lossy JBIG2 encoding</label>
|
||||
</div>
|
||||
<button type="submit">Optimize PDF</button>
|
||||
</form>
|
||||
|
||||
<th:block th:insert="~{fragments/common :: filelist}"></th:block>
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
<!DOCTYPE html>
|
||||
<html th:lang="${#locale.language}" th:lang-direction="#{language.direction}" xmlns:th="http://www.thymeleaf.org">
|
||||
|
||||
<th:block th:insert="~{fragments/common :: head(title=#{xlsToPdf.title})}"></th:block>
|
||||
<th:block th:insert="~{fragments/common :: head(title=#{fileToPDF.title})}"></th:block>
|
||||
|
||||
|
||||
<body>
|
||||
@ -12,18 +12,23 @@
|
||||
<div class="container">
|
||||
<div class="row justify-content-center">
|
||||
<div class="col-md-6">
|
||||
<h2 th:text="#{xlsToPdf.header}"></h2>
|
||||
<h2 th:text="#{fileToPDF.header}"></h2>
|
||||
|
||||
<form method="post" enctype="multipart/form-data" th:action="@{xlsx-to-pdf}">
|
||||
<form method="post" enctype="multipart/form-data" th:action="@{file-to-pdf}">
|
||||
<div class="custom-file">
|
||||
<input type="file" class="custom-file-input" id="fileInput" name="fileInput" required>
|
||||
<label class="custom-file-label" for="fileInput" th:text="#{xlsToPdf.selectText.1}"></label>
|
||||
<label class="custom-file-label" for="fileInput" th:text="#{filePrompt}"></label>
|
||||
</div>
|
||||
<br> <br>
|
||||
<button type="submit" class="btn btn-primary" th:text="#{imageToPDF.submit}"></button>
|
||||
<button type="submit" class="btn btn-primary" th:text="#{fileToPDF.submit}"></button>
|
||||
|
||||
</form>
|
||||
<th:block th:insert="~{fragments/common :: filelist}"></th:block>
|
||||
<p class="mt-3" th:text="#{fileToPDF.credit}"></p>
|
||||
<p class="mt-3" th:text="#{fileToPDF.supportedFileTypes}"></p>
|
||||
<p th:utext="#{fileToPDF.fileTypesList}"></p>
|
||||
<a href="https://help.libreoffice.org/latest/en-US/text/shared/guide/supported_formats.html"></a>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
51
src/main/resources/templates/ocr-pdf.html
Normal file
51
src/main/resources/templates/ocr-pdf.html
Normal file
@ -0,0 +1,51 @@
|
||||
<!DOCTYPE html>
|
||||
<html th:lang="${#locale.language}" th:lang-direction="#{language.direction}" xmlns:th="http://www.thymeleaf.org">
|
||||
|
||||
|
||||
<th:block th:insert="~{fragments/common :: head(title=#{addImage.title})}"></th:block>
|
||||
|
||||
|
||||
<body>
|
||||
<div id="page-container">
|
||||
<div id="content-wrap">
|
||||
<div th:insert="~{fragments/navbar.html :: navbar}"></div>
|
||||
<br> <br>
|
||||
<div class="container">
|
||||
<div class="row justify-content-center">
|
||||
<div class="col-md-6">
|
||||
<h2 th:text="#{ocrPDF.header}"></h2>
|
||||
|
||||
<form action="#" th:action="@{/ocr-pdf}" method="post" enctype="multipart/form-data" class="mb-3">
|
||||
<div class="mb-3">
|
||||
<label for="fileInput" class="form-label">Choose a PDF file</label>
|
||||
<input type="file" name="fileInput" id="fileInput" accept="application/pdf" required class="form-control">
|
||||
</div>
|
||||
<div class="mb-3">
|
||||
<label for="languages" class="form-label">Select languages that are to be detected within the PDF (Ones listed are the ones currently detected):</label>
|
||||
<div id="languages">
|
||||
<div th:each="language: ${languages}">
|
||||
<input type="checkbox" class="form-check-input" th:name="languages" th:value="${language}" th:id="${'language-' + language}" />
|
||||
<label class="form-check-label" th:for="${'language-' + language}" th:text="${language}"></label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="mb-3">
|
||||
<input type="checkbox" class="form-check-input" name="sidecar" id="sidecar" />
|
||||
<label class="form-check-label" for="sidecar">Produce text file containing OCR text alongside the OCR'ed PDF</label>
|
||||
</div>
|
||||
<button type="submit" class="btn btn-primary">Process PDF with OCR</button>
|
||||
</form>
|
||||
<p>
|
||||
Please read this documentation on how to use this for other languages and/or not in docker
|
||||
</p>
|
||||
|
||||
<th:block th:insert="~{fragments/common :: filelist}"></th:block>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div th:insert="~{fragments/footer.html :: footer}"></div>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
Loading…
Reference in New Issue
Block a user