diff --git a/Dockerfile b/Dockerfile index 424257e1..08ef7664 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,6 +30,7 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et tini \ bash \ curl \ + qpdf \ shadow \ su-exec \ openssl \ @@ -40,7 +41,6 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et # pdftohtml poppler-utils \ # OCR MY PDF (unpaper for descew and other advanced features) - ocrmypdf \ tesseract-ocr-data-eng \ # CV py3-opencv \ diff --git a/build.gradle b/build.gradle index 676e8b16..8224cb19 100644 --- a/build.gradle +++ b/build.gradle @@ -24,7 +24,7 @@ ext { } group = "stirling.software" -version = "0.33.2" +version = "0.33.1" java { // 17 is lowest but we support and recommend 21 diff --git a/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java b/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java index 64f56045..4a6cc1e4 100644 --- a/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java +++ b/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java @@ -188,7 +188,7 @@ public class EndpointConfiguration { addEndpointToGroup("OpenCV", "extract-image-scans"); // LibreOffice - addEndpointToGroup("LibreOffice", "repair"); + addEndpointToGroup("qpdf", "repair"); addEndpointToGroup("LibreOffice", "file-to-pdf"); addEndpointToGroup("LibreOffice", "pdf-to-word"); addEndpointToGroup("LibreOffice", "pdf-to-presentation"); @@ -199,10 +199,10 @@ public class EndpointConfiguration { // Unoconv addEndpointToGroup("Unoconv", "file-to-pdf"); - // OCRmyPDF - addEndpointToGroup("OCRmyPDF", "compress-pdf"); - addEndpointToGroup("OCRmyPDF", "pdf-to-pdfa"); - addEndpointToGroup("OCRmyPDF", "ocr-pdf"); + // qpdf + addEndpointToGroup("qpdf", "compress-pdf"); + addEndpointToGroup("qpdf", "pdf-to-pdfa"); + addEndpointToGroup("qpdf", "ocr-pdf"); // Java addEndpointToGroup("Java", "merge-pdfs"); @@ -249,9 +249,9 @@ public class EndpointConfiguration { addEndpointToGroup("Javascript", "adjust-contrast"); // Ghostscript dependent endpoints - addEndpointToGroup("Ghostscript", "compress-pdf"); - addEndpointToGroup("Ghostscript", "pdf-to-pdfa"); - addEndpointToGroup("Ghostscript", "repair"); + addEndpointToGroup("qpdf", "compress-pdf"); + addEndpointToGroup("qpdf", "pdf-to-pdfa"); + addEndpointToGroup("qpdf", "repair"); // Weasyprint dependent endpoints addEndpointToGroup("Weasyprint", "html-to-pdf"); diff --git a/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java b/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java index f8ab43ad..7fff2bc0 100644 --- a/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java +++ b/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java @@ -37,12 +37,11 @@ public class ExternalAppDepConfig { private final Map> commandToGroupMapping = new HashMap<>() { { - put("gs", List.of("Ghostscript")); put("soffice", List.of("LibreOffice")); - put("ocrmypdf", List.of("OCRmyPDF")); put("weasyprint", List.of("Weasyprint")); put("pdftohtml", List.of("Pdftohtml")); put("unoconv", List.of("Unoconv")); + put("qpdf", List.of("qpdf")); } }; @@ -97,9 +96,8 @@ public class ExternalAppDepConfig { public void checkDependencies() { // Check core dependencies - checkDependencyAndDisableGroup("gs"); checkDependencyAndDisableGroup("soffice"); - checkDependencyAndDisableGroup("ocrmypdf"); + checkDependencyAndDisableGroup("qpdf"); checkDependencyAndDisableGroup("weasyprint"); checkDependencyAndDisableGroup("pdftohtml"); checkDependencyAndDisableGroup("unoconv"); diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java index c437c4cb..c72ddf0f 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java @@ -1,12 +1,13 @@ package stirling.software.SPDF.controller.api.converters; -import java.io.FileOutputStream; -import java.io.OutputStream; +import java.io.File; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.http.MediaType; @@ -37,59 +38,90 @@ public class ConvertPDFToPDFA { @Operation( summary = "Convert a PDF to a PDF/A", description = - "This endpoint converts a PDF file to a PDF/A file. PDF/A is a format designed for long-term archiving of digital documents. Input:PDF Output:PDF Type:SISO") + "This endpoint converts a PDF file to a PDF/A file using LibreOffice. PDF/A is a format designed for long-term archiving of digital documents. Input:PDF Output:PDF Type:SISO") public ResponseEntity pdfToPdfA(@ModelAttribute PdfToPdfARequest request) throws Exception { MultipartFile inputFile = request.getFileInput(); String outputFormat = request.getOutputFormat(); - // Convert MultipartFile to byte[] - byte[] pdfBytes = inputFile.getBytes(); - - // Save the uploaded file to a temporary location - Path tempInputFile = Files.createTempFile("input_", ".pdf"); - try (OutputStream outputStream = new FileOutputStream(tempInputFile.toFile())) { - outputStream.write(pdfBytes); + // Validate input file type + if (!"application/pdf".equals(inputFile.getContentType())) { + logger.error("Invalid input file type: {}", inputFile.getContentType()); + throw new IllegalArgumentException("Input file must be a PDF"); } - // Prepare the output file path - Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - - // Prepare the ghostscript command - List command = new ArrayList<>(); - command.add("gs"); - command.add("-dPDFA=" + ("pdfa".equals(outputFormat) ? "2" : "1")); - command.add("-dNOPAUSE"); - command.add("-dBATCH"); - command.add("-sColorConversionStrategy=sRGB"); - command.add("-sDEVICE=pdfwrite"); - command.add("-dPDFACompatibilityPolicy=2"); - command.add("-o"); - command.add(tempOutputFile.toString()); - command.add(tempInputFile.toString()); - - ProcessExecutorResult returnCode = - ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) - .runCommandWithOutputHandling(command); - - if (returnCode.getRc() != 0) { - logger.info( - outputFormat + " conversion failed with return code: " + returnCode.getRc()); + // Get the original filename without extension + String originalFileName = Filenames.toSimpleFileName(inputFile.getOriginalFilename()); + if (originalFileName == null || originalFileName.trim().isEmpty()) { + originalFileName = "output.pdf"; } + String baseFileName = + originalFileName.contains(".") + ? originalFileName.substring(0, originalFileName.lastIndexOf('.')) + : originalFileName; + + Path tempInputFile = null; + Path tempOutputDir = null; + byte[] fileBytes; try { - byte[] pdfBytesOutput = Files.readAllBytes(tempOutputFile); - // Return the optimized PDF as a response - String outputFilename = - Filenames.toSimpleFileName(inputFile.getOriginalFilename()) - .replaceFirst("[.][^.]+$", "") - + "_PDFA.pdf"; + // Save uploaded file to temp location + tempInputFile = Files.createTempFile("input_", ".pdf"); + inputFile.transferTo(tempInputFile); + + // Create temp output directory + tempOutputDir = Files.createTempDirectory("output_"); + + // Determine PDF/A filter based on requested format + String pdfFilter = + "pdfa".equals(outputFormat) + ? "writer_pdf_Export:{'SelectPdfVersion':{'Value':'2'}}:writer_pdf_Export" + : "writer_pdf_Export:{'SelectPdfVersion':{'Value':'1'}}:writer_pdf_Export"; + + // Prepare LibreOffice command + List command = + new ArrayList<>( + Arrays.asList( + "soffice", + "--headless", + "--nologo", + "--convert-to", + "pdf:" + pdfFilter, + "--outdir", + tempOutputDir.toString(), + tempInputFile.toString())); + + ProcessExecutorResult returnCode = + ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE) + .runCommandWithOutputHandling(command); + + if (returnCode.getRc() != 0) { + logger.error("PDF/A conversion failed with return code: {}", returnCode.getRc()); + throw new RuntimeException("PDF/A conversion failed"); + } + + // Get the output file + File[] outputFiles = tempOutputDir.toFile().listFiles(); + if (outputFiles == null || outputFiles.length != 1) { + throw new RuntimeException( + "Expected exactly one output file but found " + + (outputFiles == null ? "none" : outputFiles.length)); + } + + fileBytes = FileUtils.readFileToByteArray(outputFiles[0]); + String outputFilename = baseFileName + "_PDFA.pdf"; + return WebResponseUtils.bytesToWebResponse( - pdfBytesOutput, outputFilename, MediaType.APPLICATION_PDF); + fileBytes, outputFilename, MediaType.APPLICATION_PDF); + } finally { - // Clean up the temporary files - Files.deleteIfExists(tempInputFile); - Files.deleteIfExists(tempOutputFile); + // Clean up temporary files + if (tempInputFile != null) { + Files.deleteIfExists(tempInputFile); + } + if (tempOutputDir != null) { + FileUtils.deleteDirectory(tempOutputDir.toFile()); + } } } } diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java index a6415bfc..f86b7aa3 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java @@ -20,7 +20,7 @@ import org.springframework.web.bind.annotation.RestController; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; -import stirling.software.SPDF.controller.api.CropController; + import stirling.software.SPDF.model.api.extract.PDFFilePage; import stirling.software.SPDF.pdf.FlexibleCSVWriter; import technology.tabula.ObjectExtractor; @@ -37,11 +37,15 @@ public class ExtractCSVController { private static final Logger logger = LoggerFactory.getLogger(ExtractCSVController.class); @PostMapping(value = "/pdf/csv", consumes = "multipart/form-data") - @Operation(summary = "Extracts a CSV document from a PDF", description = "This operation takes an input PDF file and returns CSV file of whole page. Input:PDF Output:CSV Type:SISO") + @Operation( + summary = "Extracts a CSV document from a PDF", + description = + "This operation takes an input PDF file and returns CSV file of whole page. Input:PDF Output:CSV Type:SISO") public ResponseEntity PdfToCsv(@ModelAttribute PDFFilePage form) throws Exception { StringWriter writer = new StringWriter(); try (PDDocument document = Loader.loadPDF(form.getFileInput().getBytes())) { - CSVFormat format = CSVFormat.EXCEL.builder().setEscape('"').setQuoteMode(QuoteMode.ALL).build(); + CSVFormat format = + CSVFormat.EXCEL.builder().setEscape('"').setQuoteMode(QuoteMode.ALL).build(); Writer csvWriter = new FlexibleCSVWriter(format); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); try (ObjectExtractor extractor = new ObjectExtractor(document)) { @@ -56,8 +60,8 @@ public class ExtractCSVController { ContentDisposition.builder("attachment") .filename( form.getFileInput() - .getOriginalFilename() - .replaceFirst("[.][^.]+$", "") + .getOriginalFilename() + .replaceFirst("[.][^.]+$", "") + "_extracted.csv") .build()); headers.setContentType(MediaType.parseMediaType("text/csv")); diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java index 3f55a4f5..723bd371 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java @@ -10,7 +10,6 @@ import java.util.List; import javax.imageio.ImageIO; -import org.apache.commons.io.FileUtils; import org.apache.pdfbox.Loader; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; @@ -53,6 +52,54 @@ public class CompressController { this.pdfDocumentFactory = pdfDocumentFactory; } + private void compressImagesInPDF(Path pdfFile, double initialScaleFactor) throws Exception { + byte[] fileBytes = Files.readAllBytes(pdfFile); + try (PDDocument doc = Loader.loadPDF(fileBytes)) { + double scaleFactor = initialScaleFactor; + + for (PDPage page : doc.getPages()) { + PDResources res = page.getResources(); + if (res != null && res.getXObjectNames() != null) { + for (COSName name : res.getXObjectNames()) { + PDXObject xobj = res.getXObject(name); + if (xobj instanceof PDImageXObject) { + PDImageXObject image = (PDImageXObject) xobj; + BufferedImage bufferedImage = image.getImage(); + + int newWidth = (int) (bufferedImage.getWidth() * scaleFactor); + int newHeight = (int) (bufferedImage.getHeight() * scaleFactor); + + if (newWidth == 0 || newHeight == 0) { + continue; + } + + Image scaledImage = + bufferedImage.getScaledInstance( + newWidth, newHeight, Image.SCALE_SMOOTH); + + BufferedImage scaledBufferedImage = + new BufferedImage( + newWidth, newHeight, BufferedImage.TYPE_INT_RGB); + scaledBufferedImage.getGraphics().drawImage(scaledImage, 0, 0, null); + + ByteArrayOutputStream compressedImageStream = + new ByteArrayOutputStream(); + ImageIO.write(scaledBufferedImage, "jpeg", compressedImageStream); + byte[] imageBytes = compressedImageStream.toByteArray(); + compressedImageStream.close(); + + PDImageXObject compressedImage = + PDImageXObject.createFromByteArray( + doc, imageBytes, image.getCOSObject().toString()); + res.put(name, compressedImage); + } + } + } + } + doc.save(pdfFile.toString()); + } + } + @PostMapping(consumes = "multipart/form-data", value = "/compress-pdf") @Operation( summary = "Optimize PDF file", @@ -75,209 +122,92 @@ public class CompressController { autoMode = true; } - // Save the uploaded file to a temporary location Path tempInputFile = Files.createTempFile("input_", ".pdf"); inputFile.transferTo(tempInputFile.toFile()); long inputFileSize = Files.size(tempInputFile); - // Prepare the output file path - Path tempOutputFile = null; byte[] pdfBytes; try { tempOutputFile = Files.createTempFile("output_", ".pdf"); - // Determine initial optimization level based on expected size reduction, only if in - // autoMode + if (autoMode) { double sizeReductionRatio = expectedOutputSize / (double) inputFileSize; - if (sizeReductionRatio > 0.7) { - optimizeLevel = 1; - } else if (sizeReductionRatio > 0.5) { - optimizeLevel = 2; - } else if (sizeReductionRatio > 0.35) { - optimizeLevel = 3; - } else { - optimizeLevel = 3; - } + optimizeLevel = determineOptimizeLevel(sizeReductionRatio); } boolean sizeMet = false; - while (!sizeMet && optimizeLevel <= 4) { - // Prepare the Ghostscript command - List command = new ArrayList<>(); - command.add("gs"); - command.add("-sDEVICE=pdfwrite"); - command.add("-dCompatibilityLevel=1.5"); + while (!sizeMet && optimizeLevel <= 9) { - switch (optimizeLevel) { - case 1: - command.add("-dPDFSETTINGS=/prepress"); - break; - case 2: - command.add("-dPDFSETTINGS=/printer"); - break; - case 3: - command.add("-dPDFSETTINGS=/ebook"); - break; - case 4: - command.add("-dPDFSETTINGS=/screen"); - break; - default: - command.add("-dPDFSETTINGS=/default"); + // Apply additional image compression for levels 6-9 + if (optimizeLevel >= 6) { + // Calculate scale factor based on optimization level + double scaleFactor = + switch (optimizeLevel) { + case 6 -> 0.9; // 90% of original size + case 7 -> 0.8; // 80% of original size + case 8 -> 0.65; // 70% of original size + case 9 -> 0.5; // 60% of original size + default -> 1.0; + }; + compressImagesInPDF(tempInputFile, scaleFactor); } - command.add("-dNOPAUSE"); - command.add("-dQUIET"); - command.add("-dBATCH"); - command.add("-sOutputFile=" + tempOutputFile.toString()); + // Run QPDF optimization + List command = new ArrayList<>(); + command.add("qpdf"); + if (request.getNormalize()) { + command.add("--normalize-content=y"); + } + if (request.getLinearize()) { + command.add("--linearize"); + } + command.add("--optimize-images"); + command.add("--recompress-flate"); + command.add("--compression-level=" + optimizeLevel); + command.add("--compress-streams=y"); + command.add("--object-streams=generate"); command.add(tempInputFile.toString()); + command.add(tempOutputFile.toString()); - ProcessExecutorResult returnCode = - ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) - .runCommandWithOutputHandling(command); + ProcessExecutorResult returnCode = null; + try { + returnCode = + ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF) + .runCommandWithOutputHandling(command); + } catch (Exception e) { + if (returnCode != null && returnCode.getRc() != 3) { + throw e; + } + } - // Check if file size is within expected size or not auto mode so instantly finish + // Check if file size is within expected size or not auto mode long outputFileSize = Files.size(tempOutputFile); if (outputFileSize <= expectedOutputSize || !autoMode) { sizeMet = true; } else { - // Increase optimization level for next iteration - optimizeLevel++; - if (autoMode && optimizeLevel > 4) { - logger.info("Skipping level 5 due to bad results in auto mode"); + optimizeLevel = + incrementOptimizeLevel( + optimizeLevel, outputFileSize, expectedOutputSize); + if (autoMode && optimizeLevel > 9) { + logger.info("Maximum compression level reached in auto mode"); sizeMet = true; - } else { - logger.info( - "Increasing ghostscript optimisation level to " + optimizeLevel); } } } - if (expectedOutputSize != null && autoMode) { - long outputFileSize = Files.size(tempOutputFile); - byte[] fileBytes = Files.readAllBytes(tempOutputFile); - if (outputFileSize > expectedOutputSize) { - try (PDDocument doc = Loader.loadPDF(fileBytes)) { - long previousFileSize = 0; - double scaleFactorConst = 0.9f; - double scaleFactor = 0.9f; - while (true) { - for (PDPage page : doc.getPages()) { - PDResources res = page.getResources(); - if (res != null && res.getXObjectNames() != null) { - for (COSName name : res.getXObjectNames()) { - PDXObject xobj = res.getXObject(name); - if (xobj != null && xobj instanceof PDImageXObject) { - PDImageXObject image = (PDImageXObject) xobj; - - // Get the image in BufferedImage format - BufferedImage bufferedImage = image.getImage(); - - // Calculate the new dimensions - int newWidth = - (int) - (bufferedImage.getWidth() - * scaleFactorConst); - int newHeight = - (int) - (bufferedImage.getHeight() - * scaleFactorConst); - - // If the new dimensions are zero, skip this iteration - if (newWidth == 0 || newHeight == 0) { - continue; - } - - // Otherwise, proceed with the scaling - Image scaledImage = - bufferedImage.getScaledInstance( - newWidth, - newHeight, - Image.SCALE_SMOOTH); - - // Convert the scaled image back to a BufferedImage - BufferedImage scaledBufferedImage = - new BufferedImage( - newWidth, - newHeight, - BufferedImage.TYPE_INT_RGB); - scaledBufferedImage - .getGraphics() - .drawImage(scaledImage, 0, 0, null); - - // Compress the scaled image - ByteArrayOutputStream compressedImageStream = - new ByteArrayOutputStream(); - ImageIO.write( - scaledBufferedImage, - "jpeg", - compressedImageStream); - byte[] imageBytes = compressedImageStream.toByteArray(); - compressedImageStream.close(); - - PDImageXObject compressedImage = - PDImageXObject.createFromByteArray( - doc, - imageBytes, - image.getCOSObject().toString()); - - // Replace the image in the resources with the - // compressed - // version - res.put(name, compressedImage); - } - } - } - } - - // save the document to tempOutputFile again - doc.save(tempOutputFile.toString()); - - long currentSize = Files.size(tempOutputFile); - // Check if the overall PDF size is still larger than expectedOutputSize - if (currentSize > expectedOutputSize) { - // Log the current file size and scaleFactor - - logger.info( - "Current file size: " - + FileUtils.byteCountToDisplaySize(currentSize)); - logger.info("Current scale factor: " + scaleFactor); - - // The file is still too large, reduce scaleFactor and try again - scaleFactor *= 0.9f; // reduce scaleFactor by 10% - // Avoid scaleFactor being too small, causing the image to shrink to - // 0 - if (scaleFactor < 0.2f || previousFileSize == currentSize) { - throw new RuntimeException( - "Could not reach the desired size without excessively degrading image quality, lowest size recommended is " - + FileUtils.byteCountToDisplaySize(currentSize) - + ", " - + currentSize - + " bytes"); - } - previousFileSize = currentSize; - } else { - // The file is small enough, break the loop - break; - } - } - } - } - } // Read the optimized PDF file pdfBytes = Files.readAllBytes(tempOutputFile); Path finalFile = tempOutputFile; + // Check if optimized file is larger than the original if (pdfBytes.length > inputFileSize) { - // Log the occurrence logger.warn( "Optimized file is larger than the original. Returning the original file instead."); - - // Read the original file again finalFile = tempInputFile; } - // Return the optimized PDF as a response + String outputFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename()) .replaceFirst("[.][^.]+$", "") @@ -286,10 +216,31 @@ public class CompressController { pdfDocumentFactory.load(finalFile.toFile()), outputFilename); } finally { - // Clean up the temporary files - // deleted by multipart file handler deu to transferTo? - // Files.deleteIfExists(tempInputFile); Files.deleteIfExists(tempOutputFile); } } + + private int determineOptimizeLevel(double sizeReductionRatio) { + if (sizeReductionRatio > 0.9) return 1; + if (sizeReductionRatio > 0.8) return 2; + if (sizeReductionRatio > 0.7) return 3; + if (sizeReductionRatio > 0.6) return 4; + if (sizeReductionRatio > 0.5) return 5; + if (sizeReductionRatio > 0.4) return 6; + if (sizeReductionRatio > 0.3) return 7; + if (sizeReductionRatio > 0.2) return 8; + return 9; + } + + private int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) { + double currentRatio = currentSize / (double) targetSize; + logger.info("Current compression ratio: {}", String.format("%.2f", currentRatio)); + + if (currentRatio > 2.0) { + return Math.min(9, currentLevel + 3); + } else if (currentRatio > 1.5) { + return Math.min(9, currentLevel + 2); + } + return Math.min(9, currentLevel + 1); + } } diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java index 96cabb60..6089c870 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java @@ -1,19 +1,29 @@ package stirling.software.SPDF.controller.api.misc; -import java.io.ByteArrayInputStream; +import java.awt.image.BufferedImage; +import java.io.BufferedReader; import java.io.File; -import java.io.FileOutputStream; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStreamReader; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.stream.Collectors; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; +import javax.imageio.ImageIO; + +import org.apache.pdfbox.multipdf.PDFMergerUtility; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.text.PDFTextStripper; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; @@ -23,24 +33,29 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; -import io.github.pixee.security.Filenames; -import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; +import lombok.extern.slf4j.Slf4j; import stirling.software.SPDF.model.ApplicationProperties; import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest; import stirling.software.SPDF.service.CustomPDDocumentFactory; -import stirling.software.SPDF.utils.ProcessExecutor; -import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; -import stirling.software.SPDF.utils.WebResponseUtils; @RestController @RequestMapping("/api/v1/misc") @Tag(name = "Misc", description = "Miscellaneous APIs") +@Slf4j public class OCRController { - @Autowired ApplicationProperties applicationProperties; + @Autowired private ApplicationProperties applicationProperties; + private final CustomPDDocumentFactory pdfDocumentFactory; + + @Autowired + public OCRController(CustomPDDocumentFactory pdfDocumentFactory) { + this.pdfDocumentFactory = pdfDocumentFactory; + } + + /** Gets the list of available Tesseract languages from the tessdata directory */ public List getAvailableTesseractLanguages() { String tessdataDir = applicationProperties.getSystem().getTessdataDir(); File[] files = new File(tessdataDir).listFiles(); @@ -54,196 +69,161 @@ public class OCRController { .collect(Collectors.toList()); } - private final CustomPDDocumentFactory pdfDocumentFactory; - - @Autowired - public OCRController(CustomPDDocumentFactory pdfDocumentFactory) { - this.pdfDocumentFactory = pdfDocumentFactory; - } - @PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf") - @Operation( - summary = "Process a PDF file with OCR", - description = - "This endpoint processes a PDF file using OCR (Optical Character Recognition). Users can specify languages, sidecar, deskew, clean, cleanFinal, ocrType, ocrRenderType, and removeImagesAfter options. Input:PDF Output:PDF Type:SI-Conditional") public ResponseEntity processPdfWithOCR( @ModelAttribute ProcessPdfWithOcrRequest request) throws IOException, InterruptedException { MultipartFile inputFile = request.getFileInput(); - List selectedLanguages = request.getLanguages(); - Boolean sidecar = request.isSidecar(); - Boolean deskew = request.isDeskew(); - Boolean clean = request.isClean(); - Boolean cleanFinal = request.isCleanFinal(); + List languages = request.getLanguages(); String ocrType = request.getOcrType(); - String ocrRenderType = request.getOcrRenderType(); - Boolean removeImagesAfter = request.isRemoveImagesAfter(); - // --output-type pdfa - if (selectedLanguages == null || selectedLanguages.isEmpty()) { - throw new IOException("Please select at least one language."); - } - if (!"hocr".equals(ocrRenderType) && !"sandwich".equals(ocrRenderType)) { - throw new IOException("ocrRenderType wrong"); - } + Path tempDir = Files.createTempDirectory("ocr_process"); + Path tempInputFile = tempDir.resolve("input.pdf"); + Path tempOutputDir = tempDir.resolve("output"); + Path tempImagesDir = tempDir.resolve("images"); + Path finalOutputFile = tempDir.resolve("final_output.pdf"); - // Get available Tesseract languages - List availableLanguages = getAvailableTesseractLanguages(); - - // Validate selected languages - selectedLanguages = - selectedLanguages.stream().filter(availableLanguages::contains).toList(); - - if (selectedLanguages.isEmpty()) { - throw new IOException("None of the selected languages are valid."); - } - // Save the uploaded file to a temporary location - Path tempInputFile = Files.createTempFile("input_", ".pdf"); - Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - Path sidecarTextPath = null; + Files.createDirectories(tempOutputDir); + Files.createDirectories(tempImagesDir); try { + // Save input file inputFile.transferTo(tempInputFile.toFile()); + PDFMergerUtility merger = new PDFMergerUtility(); + merger.setDestinationFileName(finalOutputFile.toString()); - // Run OCR Command - String languageOption = String.join("+", selectedLanguages); + try (PDDocument document = pdfDocumentFactory.load(tempInputFile.toFile())) { + PDFRenderer pdfRenderer = new PDFRenderer(document); + int pageCount = document.getNumberOfPages(); - List command = - new ArrayList<>( - Arrays.asList( - "ocrmypdf", - "--verbose", - "2", - "--output-type", - "pdf", - "--pdf-renderer", - ocrRenderType)); + for (int pageNum = 0; pageNum < pageCount; pageNum++) { + PDPage page = document.getPage(pageNum); + boolean hasText = false; - if (sidecar != null && sidecar) { - sidecarTextPath = Files.createTempFile("sidecar", ".txt"); - command.add("--sidecar"); - command.add(sidecarTextPath.toString()); - } + // Check for existing text + try (PDDocument tempDoc = new PDDocument()) { + tempDoc.addPage(page); + PDFTextStripper stripper = new PDFTextStripper(); + hasText = !stripper.getText(tempDoc).trim().isEmpty(); + } - if (deskew != null && deskew) { - command.add("--deskew"); - } - if (clean != null && clean) { - command.add("--clean"); - } - if (cleanFinal != null && cleanFinal) { - command.add("--clean-final"); - } - if (ocrType != null && !"".equals(ocrType)) { - if ("skip-text".equals(ocrType)) { - command.add("--skip-text"); - } else if ("force-ocr".equals(ocrType)) { - command.add("--force-ocr"); - } else if ("Normal".equals(ocrType)) { + boolean shouldOcr = + switch (ocrType) { + case "skip-text" -> !hasText; + case "force-ocr" -> true; + default -> true; + }; - } - } + Path pageOutputPath = + tempOutputDir.resolve(String.format("page_%d.pdf", pageNum)); - command.addAll( - Arrays.asList( - "--language", - languageOption, - tempInputFile.toString(), - tempOutputFile.toString())); + if (shouldOcr) { + // Convert page to image + BufferedImage image = pdfRenderer.renderImageWithDPI(pageNum, 300); + Path imagePath = + tempImagesDir.resolve(String.format("page_%d.png", pageNum)); + ImageIO.write(image, "png", imagePath.toFile()); - // Run CLI command - ProcessExecutorResult result = - ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF) - .runCommandWithOutputHandling(command); - if (result.getRc() != 0 - && result.getMessages().contains("multiprocessing/synchronize.py") - && result.getMessages() - .contains("OSError: [Errno 38] Function not implemented")) { - command.add("--jobs"); - command.add("1"); - result = - ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF) - .runCommandWithOutputHandling(command); - } + // Build OCR command + List command = new ArrayList<>(); + command.add("tesseract"); + command.add(imagePath.toString()); + command.add( + tempOutputDir + .resolve(String.format("page_%d", pageNum)) + .toString()); + command.add("-l"); + command.add(String.join("+", languages)); + command.add("pdf"); // Always output PDF - // Remove images from the OCR processed PDF if the flag is set to true - if (removeImagesAfter != null && removeImagesAfter) { - Path tempPdfWithoutImages = Files.createTempFile("output_", "_no_images.pdf"); + ProcessBuilder pb = new ProcessBuilder(command); + Process process = pb.start(); - List gsCommand = - Arrays.asList( - "gs", - "-sDEVICE=pdfwrite", - "-dFILTERIMAGE", - "-o", - tempPdfWithoutImages.toString(), - tempOutputFile.toString()); + // Capture any error output + try (BufferedReader reader = + new BufferedReader( + new InputStreamReader(process.getErrorStream()))) { + String line; + while ((line = reader.readLine()) != null) { + log.debug("Tesseract: {}", line); + } + } - ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) - .runCommandWithOutputHandling(gsCommand); - tempOutputFile = tempPdfWithoutImages; - } - // Read the OCR processed PDF file - byte[] pdfBytes = pdfDocumentFactory.loadToBytes(tempOutputFile.toFile()); + int exitCode = process.waitFor(); + if (exitCode != 0) { + throw new RuntimeException( + "Tesseract failed with exit code: " + exitCode); + } - // Return the OCR processed PDF as a response - String outputFilename = - Filenames.toSimpleFileName(inputFile.getOriginalFilename()) - .replaceFirst("[.][^.]+$", "") - + "_OCR.pdf"; - - if (sidecar != null && sidecar) { - // Create a zip file containing both the PDF and the text file - String outputZipFilename = - Filenames.toSimpleFileName(inputFile.getOriginalFilename()) - .replaceFirst("[.][^.]+$", "") - + "_OCR.zip"; - Path tempZipFile = Files.createTempFile("output_", ".zip"); - - try (ZipOutputStream zipOut = - new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) { - // Add PDF file to the zip - ZipEntry pdfEntry = new ZipEntry(outputFilename); - zipOut.putNextEntry(pdfEntry); - try (ByteArrayInputStream pdfInputStream = new ByteArrayInputStream(pdfBytes)) { - byte[] buffer = new byte[1024]; - int length; - while ((length = pdfInputStream.read(buffer)) != -1) { - zipOut.write(buffer, 0, length); + // Add OCR'd PDF to merger + merger.addSource(pageOutputPath.toFile()); + } else { + // Save original page without OCR + try (PDDocument pageDoc = new PDDocument()) { + pageDoc.addPage(page); + pageDoc.save(pageOutputPath.toFile()); + merger.addSource(pageOutputPath.toFile()); } } - zipOut.closeEntry(); - - // Add text file to the zip - ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt")); - zipOut.putNextEntry(txtEntry); - Files.copy(sidecarTextPath, zipOut); - zipOut.closeEntry(); } - - byte[] zipBytes = Files.readAllBytes(tempZipFile); - - // Clean up the temporary zip file - Files.deleteIfExists(tempZipFile); - Files.deleteIfExists(tempOutputFile); - Files.deleteIfExists(sidecarTextPath); - - // Return the zip file containing both the PDF and the text file - return WebResponseUtils.bytesToWebResponse( - zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM); - } else { - // Return the OCR processed PDF as a response - Files.deleteIfExists(tempOutputFile); - return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); } + + // Merge all pages into final PDF + merger.mergeDocuments(null); + + // Read the final PDF file + byte[] pdfContent = Files.readAllBytes(finalOutputFile); + String outputFilename = + inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf"; + + return ResponseEntity.ok() + .header( + "Content-Disposition", + "attachment; filename=\"" + outputFilename + "\"") + .contentType(MediaType.APPLICATION_PDF) + .body(pdfContent); + } finally { - // Clean up the temporary files - Files.deleteIfExists(tempOutputFile); - // Comment out as transferTo makes multipart handle cleanup - // Files.deleteIfExists(tempInputFile); - if (sidecarTextPath != null) { - Files.deleteIfExists(sidecarTextPath); + // Clean up temporary files + deleteDirectory(tempDir); + } + } + + private void addFileToZip(File file, String filename, ZipOutputStream zipOut) + throws IOException { + if (!file.exists()) { + log.warn("File {} does not exist, skipping", file); + return; + } + + try (FileInputStream fis = new FileInputStream(file)) { + ZipEntry zipEntry = new ZipEntry(filename); + zipOut.putNextEntry(zipEntry); + + byte[] buffer = new byte[1024]; + int length; + while ((length = fis.read(buffer)) >= 0) { + zipOut.write(buffer, 0, length); } + + zipOut.closeEntry(); + } + } + + private void deleteDirectory(Path directory) { + try { + Files.walk(directory) + .sorted(Comparator.reverseOrder()) + .forEach( + path -> { + try { + Files.delete(path); + } catch (IOException e) { + log.error("Error deleting {}: {}", path, e.getMessage()); + } + }); + } catch (IOException e) { + log.error("Error walking directory {}: {}", directory, e.getMessage()); } } } diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java index be0827cc..9b3f0632 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java @@ -56,14 +56,15 @@ public class RepairController { try { List command = new ArrayList<>(); - command.add("gs"); - command.add("-o"); - command.add(tempOutputFile.toString()); - command.add("-sDEVICE=pdfwrite"); + command.add("qpdf"); + command.add("--replace-input"); // Automatically fixes problems it can + command.add("--qdf"); // Linearizes and normalizes PDF structure + command.add("--object-streams=disable"); // Can help with some corruptions command.add(tempInputFile.toString()); + command.add(tempOutputFile.toString()); ProcessExecutorResult returnCode = - ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) + ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF) .runCommandWithOutputHandling(command); // Read the optimized PDF file diff --git a/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java b/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java index cb5eef10..27262feb 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java @@ -98,10 +98,10 @@ public class CertSignController { public CreateSignature(KeyStore keystore, char[] pin) throws KeyStoreException, - UnrecoverableKeyException, - NoSuchAlgorithmException, - IOException, - CertificateException { + UnrecoverableKeyException, + NoSuchAlgorithmException, + IOException, + CertificateException { super(keystore, pin); ClassPathResource resource = new ClassPathResource("static/images/signature.png"); try (InputStream is = resource.getInputStream()) { @@ -160,7 +160,8 @@ public class CertSignController { extState.setNonStrokingAlphaConstant(0.5f); cs.setGraphicsStateParameters(extState); cs.transform(Matrix.getScaleInstance(0.08f, 0.08f)); - PDImageXObject img = PDImageXObject.createFromFileByExtension(logoFile, doc); + PDImageXObject img = + PDImageXObject.createFromFileByExtension(logoFile, doc); cs.drawImage(img, 100, 0); cs.restoreGraphicsState(); } @@ -208,7 +209,10 @@ public class CertSignController { } @PostMapping(consumes = "multipart/form-data", value = "/cert-sign") - @Operation(summary = "Sign PDF with a Digital Certificate", description = "This endpoint accepts a PDF file, a digital certificate and related information to sign the PDF. It then returns the digitally signed PDF file. Input:PDF Output:PDF Type:SISO") + @Operation( + summary = "Sign PDF with a Digital Certificate", + description = + "This endpoint accepts a PDF file, a digital certificate and related information to sign the PDF. It then returns the digitally signed PDF file. Input:PDF Output:PDF Type:SISO") public ResponseEntity signPDFWithCert(@ModelAttribute SignPDFWithCertRequest request) throws Exception { MultipartFile pdf = request.getFileInput(); @@ -238,7 +242,7 @@ public class CertSignController { PrivateKey privateKey = getPrivateKeyFromPEM(privateKeyFile.getBytes(), password); Certificate cert = (Certificate) getCertificateFromPEM(certFile.getBytes()); ks.setKeyEntry( - "alias", privateKey, password.toCharArray(), new Certificate[] { cert }); + "alias", privateKey, password.toCharArray(), new Certificate[] {cert}); break; case "PKCS12": ks = KeyStore.getInstance("PKCS12"); @@ -310,19 +314,22 @@ public class CertSignController { private PrivateKey getPrivateKeyFromPEM(byte[] pemBytes, String password) throws IOException, OperatorCreationException, PKCSException { - try (PEMParser pemParser = new PEMParser(new InputStreamReader(new ByteArrayInputStream(pemBytes)))) { + try (PEMParser pemParser = + new PEMParser(new InputStreamReader(new ByteArrayInputStream(pemBytes)))) { Object pemObject = pemParser.readObject(); JcaPEMKeyConverter converter = new JcaPEMKeyConverter().setProvider("BC"); PrivateKeyInfo pkInfo; if (pemObject instanceof PKCS8EncryptedPrivateKeyInfo) { - InputDecryptorProvider decProv = new JceOpenSSLPKCS8DecryptorProviderBuilder() - .build(password.toCharArray()); + InputDecryptorProvider decProv = + new JceOpenSSLPKCS8DecryptorProviderBuilder().build(password.toCharArray()); pkInfo = ((PKCS8EncryptedPrivateKeyInfo) pemObject).decryptPrivateKeyInfo(decProv); } else if (pemObject instanceof PEMEncryptedKeyPair) { - PEMDecryptorProvider decProv = new JcePEMDecryptorProviderBuilder().build(password.toCharArray()); - pkInfo = ((PEMEncryptedKeyPair) pemObject) - .decryptKeyPair(decProv) - .getPrivateKeyInfo(); + PEMDecryptorProvider decProv = + new JcePEMDecryptorProviderBuilder().build(password.toCharArray()); + pkInfo = + ((PEMEncryptedKeyPair) pemObject) + .decryptKeyPair(decProv) + .getPrivateKeyInfo(); } else { pkInfo = ((PEMKeyPair) pemObject).getPrivateKeyInfo(); } diff --git a/src/main/java/stirling/software/SPDF/controller/web/HomeWebController.java b/src/main/java/stirling/software/SPDF/controller/web/HomeWebController.java index 5ade5668..41dca2fd 100644 --- a/src/main/java/stirling/software/SPDF/controller/web/HomeWebController.java +++ b/src/main/java/stirling/software/SPDF/controller/web/HomeWebController.java @@ -55,6 +55,11 @@ public class HomeWebController { return "licenses"; } + @GetMapping("/releases") + public String getReleaseNotes(Model model) { + return "releases"; + } + @GetMapping("/") public String home(Model model) { model.addAttribute("currentPage", "home"); diff --git a/src/main/java/stirling/software/SPDF/model/api/misc/OptimizePdfRequest.java b/src/main/java/stirling/software/SPDF/model/api/misc/OptimizePdfRequest.java index 96a787f3..3cd7f7be 100644 --- a/src/main/java/stirling/software/SPDF/model/api/misc/OptimizePdfRequest.java +++ b/src/main/java/stirling/software/SPDF/model/api/misc/OptimizePdfRequest.java @@ -18,4 +18,15 @@ public class OptimizePdfRequest extends PDFFile { @Schema(description = "The expected output size, e.g. '100MB', '25KB', etc.") private String expectedOutputSize; + + @Schema( + description = "Whether to linearize the PDF for faster web viewing. Default is false.", + defaultValue = "false") + private Boolean linearize = false; + + @Schema( + description = + "Whether to normalize the PDF content for better compatibility. Default is true.", + defaultValue = "true") + private Boolean normalize = true; } diff --git a/src/main/java/stirling/software/SPDF/model/api/misc/ProcessPdfWithOcrRequest.java b/src/main/java/stirling/software/SPDF/model/api/misc/ProcessPdfWithOcrRequest.java index 7d3de3e6..e26741c9 100644 --- a/src/main/java/stirling/software/SPDF/model/api/misc/ProcessPdfWithOcrRequest.java +++ b/src/main/java/stirling/software/SPDF/model/api/misc/ProcessPdfWithOcrRequest.java @@ -15,18 +15,6 @@ public class ProcessPdfWithOcrRequest extends PDFFile { @Schema(description = "List of languages to use in OCR processing") private List languages; - @Schema(description = "Include OCR text in a sidecar text file if set to true") - private boolean sidecar; - - @Schema(description = "Deskew the input file if set to true") - private boolean deskew; - - @Schema(description = "Clean the input file if set to true") - private boolean clean; - - @Schema(description = "Clean the final output if set to true") - private boolean cleanFinal; - @Schema( description = "Specify the OCR type, e.g., 'skip-text', 'force-ocr', or 'Normal'", allowableValues = {"skip-text", "force-ocr", "Normal"}) @@ -37,7 +25,4 @@ public class ProcessPdfWithOcrRequest extends PDFFile { allowableValues = {"hocr", "sandwich"}, defaultValue = "hocr") private String ocrRenderType = "hocr"; - - @Schema(description = "Remove images from the output PDF if set to true") - private boolean removeImagesAfter; } diff --git a/src/main/java/stirling/software/SPDF/service/MetricsAggregatorService.java b/src/main/java/stirling/software/SPDF/service/MetricsAggregatorService.java index 084480c1..9cfacdd1 100644 --- a/src/main/java/stirling/software/SPDF/service/MetricsAggregatorService.java +++ b/src/main/java/stirling/software/SPDF/service/MetricsAggregatorService.java @@ -34,17 +34,15 @@ public class MetricsAggregatorService { counter -> { String method = counter.getId().getTag("method"); String uri = counter.getId().getTag("uri"); - + // Skip if either method or uri is null if (method == null || uri == null) { return; } - - String key = String.format( - "http_requests_%s_%s", - method, - uri.replace("/", "_") - ); + + String key = + String.format( + "http_requests_%s_%s", method, uri.replace("/", "_")); double currentCount = counter.count(); double lastCount = lastSentMetrics.getOrDefault(key, 0.0); diff --git a/src/main/java/stirling/software/SPDF/service/PostHogService.java b/src/main/java/stirling/software/SPDF/service/PostHogService.java index 5046d6cd..a2d3d485 100644 --- a/src/main/java/stirling/software/SPDF/service/PostHogService.java +++ b/src/main/java/stirling/software/SPDF/service/PostHogService.java @@ -31,7 +31,7 @@ public class PostHogService { private final ApplicationProperties applicationProperties; private final UserServiceInterface userService; private final Environment env; - + @Autowired public PostHogService( PostHog postHog, @@ -71,16 +71,16 @@ public class PostHogService { Map metrics = new HashMap<>(); try { - //Application version - metrics.put("app_version", appVersion); - String deploymentType = "JAR"; // default - if ("true".equalsIgnoreCase(env.getProperty("BROWSER_OPEN"))) { - deploymentType = "EXE"; - } else if (isRunningInDocker()) { - deploymentType = "DOCKER"; - } - metrics.put("deployment_type", deploymentType); - + // Application version + metrics.put("app_version", appVersion); + String deploymentType = "JAR"; // default + if ("true".equalsIgnoreCase(env.getProperty("BROWSER_OPEN"))) { + deploymentType = "EXE"; + } else if (isRunningInDocker()) { + deploymentType = "DOCKER"; + } + metrics.put("deployment_type", deploymentType); + // System info metrics.put("os_name", System.getProperty("os.name")); metrics.put("os_version", System.getProperty("os.version")); diff --git a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java index 0947714f..a3d877c0 100644 --- a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java +++ b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java @@ -29,12 +29,13 @@ public class ProcessExecutor { public enum Processes { LIBRE_OFFICE, PDFTOHTML, - OCR_MY_PDF, PYTHON_OPENCV, - GHOSTSCRIPT, WEASYPRINT, INSTALL_APP, - CALIBRE + CALIBRE, + IMAGEMAGICK, + TESSERACT, + QPDF } private static final Map instances = new ConcurrentHashMap<>(); @@ -59,21 +60,11 @@ public class ProcessExecutor { .getProcessExecutor() .getSessionLimit() .getPdfToHtmlSessionLimit(); - case OCR_MY_PDF -> - applicationProperties - .getProcessExecutor() - .getSessionLimit() - .getOcrMyPdfSessionLimit(); case PYTHON_OPENCV -> applicationProperties .getProcessExecutor() .getSessionLimit() .getPythonOpenCvSessionLimit(); - case GHOSTSCRIPT -> - applicationProperties - .getProcessExecutor() - .getSessionLimit() - .getGhostScriptSessionLimit(); case WEASYPRINT -> applicationProperties .getProcessExecutor() @@ -84,7 +75,7 @@ public class ProcessExecutor { .getProcessExecutor() .getSessionLimit() .getInstallAppSessionLimit(); - case CALIBRE -> + case CALIBRE, IMAGEMAGICK, TESSERACT, QPDF -> applicationProperties .getProcessExecutor() .getSessionLimit() @@ -103,21 +94,11 @@ public class ProcessExecutor { .getProcessExecutor() .getTimeoutMinutes() .getPdfToHtmlTimeoutMinutes(); - case OCR_MY_PDF -> - applicationProperties - .getProcessExecutor() - .getTimeoutMinutes() - .getOcrMyPdfTimeoutMinutes(); case PYTHON_OPENCV -> applicationProperties .getProcessExecutor() .getTimeoutMinutes() .getPythonOpenCvTimeoutMinutes(); - case GHOSTSCRIPT -> - applicationProperties - .getProcessExecutor() - .getTimeoutMinutes() - .getGhostScriptTimeoutMinutes(); case WEASYPRINT -> applicationProperties .getProcessExecutor() @@ -128,7 +109,7 @@ public class ProcessExecutor { .getProcessExecutor() .getTimeoutMinutes() .getInstallAppTimeoutMinutes(); - case CALIBRE -> + case CALIBRE, IMAGEMAGICK, TESSERACT, QPDF -> applicationProperties .getProcessExecutor() .getTimeoutMinutes() diff --git a/src/main/resources/messages_en_GB.properties b/src/main/resources/messages_en_GB.properties index 15c3a153..1d6d7ed3 100644 --- a/src/main/resources/messages_en_GB.properties +++ b/src/main/resources/messages_en_GB.properties @@ -1260,3 +1260,10 @@ splitByChapters.desc.2=Bookmark Level: Choose the level of bookmarks to use for splitByChapters.desc.3=Include Metadata: If checked, the original PDF's metadata will be included in each split PDF. splitByChapters.desc.4=Allow Duplicates: If checked, allows multiple bookmarks on the same page to create separate PDFs. splitByChapters.submit=Split PDF + + +#release notes +releases.title=Release Notes +releases.header=Release Notes +releases.current.version=Current Release +releases.note=All release notes are only available in english diff --git a/src/main/resources/templates/misc/compress-pdf.html b/src/main/resources/templates/misc/compress-pdf.html index 99dda30d..e22d1ef4 100644 --- a/src/main/resources/templates/misc/compress-pdf.html +++ b/src/main/resources/templates/misc/compress-pdf.html @@ -29,9 +29,14 @@ diff --git a/src/main/resources/templates/misc/ocr-pdf.html b/src/main/resources/templates/misc/ocr-pdf.html index 9b8a8793..d37c1c2a 100644 --- a/src/main/resources/templates/misc/ocr-pdf.html +++ b/src/main/resources/templates/misc/ocr-pdf.html @@ -62,26 +62,6 @@
-
- - -
-
- - -
-
- - -
-
- - -
-
- - -