From 65f943863933e9bb48ce45f75bc6a071a3bd3c33 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com.> Date: Mon, 27 May 2024 17:53:18 +0100 Subject: [PATCH] deletion changes --- .github/workflows/build.yml | 8 +- .../converters/ConvertOfficeController.java | 48 ++-- .../api/converters/ConvertPDFToPDFA.java | 4 +- .../api/misc/CompressController.java | 28 +- .../api/misc/ExtractImageScansController.java | 15 +- .../controller/api/misc/OCRController.java | 241 +++++++++--------- .../controller/api/misc/RepairController.java | 40 +-- .../software/SPDF/utils/FileToPdf.java | 4 +- .../software/SPDF/utils/PDFToFile.java | 11 +- 9 files changed, 198 insertions(+), 201 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f4d37599..de3b3569 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,14 +3,8 @@ name: "Build repo" on: push: branches: ["main"] - paths-ignore: - - ".github/**" - - "**/*.md" pull_request: branches: ["main"] - paths-ignore: - - ".github/**" - - "**/*.md" jobs: build: @@ -36,7 +30,7 @@ jobs: - uses: gradle/actions/setup-gradle@v3 with: - gradle-version: 7.6 + gradle-version: 8.7 - name: Build with Gradle run: ./gradlew build --no-build-cache diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java index c6740b5f..5e6b3dfd 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java @@ -3,7 +3,6 @@ package stirling.software.SPDF.controller.api.converters; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.StandardCopyOption; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -41,34 +40,35 @@ public class ConvertOfficeController { // Save the uploaded file to a temporary location Path tempInputFile = Files.createTempFile("input_", "." + FilenameUtils.getExtension(originalFilename)); - Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING); + inputFile.transferTo(tempInputFile); // Prepare the output file path Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - // Run the LibreOffice command - List command = - new ArrayList<>( - Arrays.asList( - "unoconv", - "-vvv", - "-f", - "pdf", - "-o", - tempOutputFile.toString(), - tempInputFile.toString())); - ProcessExecutorResult returnCode = - ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE) - .runCommandWithOutputHandling(command); + try { + // Run the LibreOffice command + List command = + new ArrayList<>( + Arrays.asList( + "unoconv", + "-vvv", + "-f", + "pdf", + "-o", + tempOutputFile.toString(), + tempInputFile.toString())); + ProcessExecutorResult returnCode = + ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE) + .runCommandWithOutputHandling(command); - // Read the converted PDF file - byte[] pdfBytes = Files.readAllBytes(tempOutputFile); - - // Clean up the temporary files - Files.delete(tempInputFile); - Files.delete(tempOutputFile); - - return pdfBytes; + // Read the converted PDF file + byte[] pdfBytes = Files.readAllBytes(tempOutputFile); + return pdfBytes; + } finally { + // Clean up the temporary files + if (tempInputFile != null) Files.deleteIfExists(tempInputFile); + Files.deleteIfExists(tempOutputFile); + } } private boolean isValidFileExtension(String fileExtension) { diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java index aec4e347..e3578826 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java @@ -61,8 +61,8 @@ public class ConvertPDFToPDFA { byte[] pdfBytes = Files.readAllBytes(tempOutputFile); // Clean up the temporary files - Files.delete(tempInputFile); - Files.delete(tempOutputFile); + Files.deleteIfExists(tempInputFile); + Files.deleteIfExists(tempOutputFile); // Return the optimized PDF as a response String outputFilename = diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java index 9e3d6a99..12dd625c 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java @@ -136,10 +136,10 @@ public class CompressController { // Increase optimization level for next iteration optimizeLevel++; if (autoMode && optimizeLevel > 4) { - System.out.println("Skipping level 5 due to bad results in auto mode"); + logger.info("Skipping level 5 due to bad results in auto mode"); sizeMet = true; } else { - System.out.println( + logger.info( "Increasing ghostscript optimisation level to " + optimizeLevel); } } @@ -230,10 +230,10 @@ public class CompressController { if (currentSize > expectedOutputSize) { // Log the current file size and scaleFactor - System.out.println( + logger.info( "Current file size: " + FileUtils.byteCountToDisplaySize(currentSize)); - System.out.println("Current scale factor: " + scaleFactor); + logger.info("Current scale factor: " + scaleFactor); // The file is still too large, reduce scaleFactor and try again scaleFactor *= 0.9f; // reduce scaleFactor by 10% @@ -256,7 +256,6 @@ public class CompressController { } } } - // Read the optimized PDF file pdfBytes = Files.readAllBytes(tempOutputFile); @@ -269,17 +268,18 @@ public class CompressController { // Read the original file again pdfBytes = Files.readAllBytes(tempInputFile); } + // Return the optimized PDF as a response + String outputFilename = + Filenames.toSimpleFileName(inputFile.getOriginalFilename()) + .replaceFirst("[.][^.]+$", "") + + "_Optimized.pdf"; + return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); + } finally { // Clean up the temporary files - Files.delete(tempInputFile); - Files.delete(tempOutputFile); + // deleted by multipart file handler deu to transferTo? + // Files.deleteIfExists(tempInputFile); + Files.deleteIfExists(tempOutputFile); } - - // Return the optimized PDF as a response - String outputFilename = - Filenames.toSimpleFileName(inputFile.getOriginalFilename()) - .replaceFirst("[.][^.]+$", "") - + "_Optimized.pdf"; - return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); } } diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImageScansController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImageScansController.java index 9ed00dd8..4a6acaee 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImageScansController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImageScansController.java @@ -5,7 +5,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.StandardCopyOption; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -103,10 +102,7 @@ public class ExtractImageScansController { } } else { tempInputFile = Files.createTempFile("input_", "." + extension); - Files.copy( - form.getFileInput().getInputStream(), - tempInputFile, - StandardCopyOption.REPLACE_EXISTING); + form.getFileInput().transferTo(tempInputFile); // Add input file path to images list images.add(tempInputFile.toString()); } @@ -176,11 +172,14 @@ public class ExtractImageScansController { byte[] zipBytes = Files.readAllBytes(tempZipFile); // Clean up the temporary zip file - Files.delete(tempZipFile); + Files.deleteIfExists(tempZipFile); return WebResponseUtils.bytesToWebResponse( zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM); - } else { + } if (processedImageBytes.size() == 0) { + throw new IllegalArgumentException("No images detected"); + }else { + // Return the processed image as a response byte[] imageBytes = processedImageBytes.get(0); return WebResponseUtils.bytesToWebResponse( @@ -201,7 +200,7 @@ public class ExtractImageScansController { if (tempZipFile != null && Files.exists(tempZipFile)) { try { - Files.delete(tempZipFile); + Files.deleteIfExists(tempZipFile); } catch (IOException e) { logger.error("Failed to delete temporary zip file: " + tempZipFile, e); } diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java index 07947587..bb8a18cd 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java @@ -5,7 +5,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.StandardCopyOption; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -91,139 +90,145 @@ public class OCRController { } // Save the uploaded file to a temporary location Path tempInputFile = Files.createTempFile("input_", ".pdf"); - Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING); - - // Prepare the output file path Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - - // Prepare the output file path Path sidecarTextPath = null; - // Run OCR Command - String languageOption = String.join("+", selectedLanguages); + try { + inputFile.transferTo(tempInputFile.toFile()); - List command = - new ArrayList<>( - Arrays.asList( - "ocrmypdf", - "--verbose", - "2", - "--output-type", - "pdf", - "--pdf-renderer", - ocrRenderType)); + // Run OCR Command + String languageOption = String.join("+", selectedLanguages); - if (sidecar != null && sidecar) { - sidecarTextPath = Files.createTempFile("sidecar", ".txt"); - command.add("--sidecar"); - command.add(sidecarTextPath.toString()); - } - - if (deskew != null && deskew) { - command.add("--deskew"); - } - if (clean != null && clean) { - command.add("--clean"); - } - if (cleanFinal != null && cleanFinal) { - command.add("--clean-final"); - } - if (ocrType != null && !"".equals(ocrType)) { - if ("skip-text".equals(ocrType)) { - command.add("--skip-text"); - } else if ("force-ocr".equals(ocrType)) { - command.add("--force-ocr"); - } else if ("Normal".equals(ocrType)) { + List command = + new ArrayList<>( + Arrays.asList( + "ocrmypdf", + "--verbose", + "2", + "--output-type", + "pdf", + "--pdf-renderer", + ocrRenderType)); + if (sidecar != null && sidecar) { + sidecarTextPath = Files.createTempFile("sidecar", ".txt"); + command.add("--sidecar"); + command.add(sidecarTextPath.toString()); } - } - command.addAll( - Arrays.asList( - "--language", - languageOption, - tempInputFile.toString(), - tempOutputFile.toString())); + if (deskew != null && deskew) { + command.add("--deskew"); + } + if (clean != null && clean) { + command.add("--clean"); + } + if (cleanFinal != null && cleanFinal) { + command.add("--clean-final"); + } + if (ocrType != null && !"".equals(ocrType)) { + if ("skip-text".equals(ocrType)) { + command.add("--skip-text"); + } else if ("force-ocr".equals(ocrType)) { + command.add("--force-ocr"); + } else if ("Normal".equals(ocrType)) { - // Run CLI command - ProcessExecutorResult result = - ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF) - .runCommandWithOutputHandling(command); - if (result.getRc() != 0 - && result.getMessages().contains("multiprocessing/synchronize.py") - && result.getMessages().contains("OSError: [Errno 38] Function not implemented")) { - command.add("--jobs"); - command.add("1"); - result = + } + } + + command.addAll( + Arrays.asList( + "--language", + languageOption, + tempInputFile.toString(), + tempOutputFile.toString())); + + // Run CLI command + ProcessExecutorResult result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF) .runCommandWithOutputHandling(command); - } - - // Remove images from the OCR processed PDF if the flag is set to true - if (removeImagesAfter != null && removeImagesAfter) { - Path tempPdfWithoutImages = Files.createTempFile("output_", "_no_images.pdf"); - - List gsCommand = - Arrays.asList( - "gs", - "-sDEVICE=pdfwrite", - "-dFILTERIMAGE", - "-o", - tempPdfWithoutImages.toString(), - tempOutputFile.toString()); - - ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) - .runCommandWithOutputHandling(gsCommand); - tempOutputFile = tempPdfWithoutImages; - } - // Read the OCR processed PDF file - byte[] pdfBytes = Files.readAllBytes(tempOutputFile); - // Clean up the temporary files - Files.delete(tempInputFile); - - // Return the OCR processed PDF as a response - String outputFilename = - Filenames.toSimpleFileName(inputFile.getOriginalFilename()) - .replaceFirst("[.][^.]+$", "") - + "_OCR.pdf"; - - if (sidecar != null && sidecar) { - // Create a zip file containing both the PDF and the text file - String outputZipFilename = - Filenames.toSimpleFileName(inputFile.getOriginalFilename()) - .replaceFirst("[.][^.]+$", "") - + "_OCR.zip"; - Path tempZipFile = Files.createTempFile("output_", ".zip"); - - try (ZipOutputStream zipOut = - new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) { - // Add PDF file to the zip - ZipEntry pdfEntry = new ZipEntry(outputFilename); - zipOut.putNextEntry(pdfEntry); - Files.copy(tempOutputFile, zipOut); - zipOut.closeEntry(); - - // Add text file to the zip - ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt")); - zipOut.putNextEntry(txtEntry); - Files.copy(sidecarTextPath, zipOut); - zipOut.closeEntry(); + if (result.getRc() != 0 + && result.getMessages().contains("multiprocessing/synchronize.py") + && result.getMessages() + .contains("OSError: [Errno 38] Function not implemented")) { + command.add("--jobs"); + command.add("1"); + result = + ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF) + .runCommandWithOutputHandling(command); } - byte[] zipBytes = Files.readAllBytes(tempZipFile); + // Remove images from the OCR processed PDF if the flag is set to true + if (removeImagesAfter != null && removeImagesAfter) { + Path tempPdfWithoutImages = Files.createTempFile("output_", "_no_images.pdf"); - // Clean up the temporary zip file - Files.delete(tempZipFile); - Files.delete(tempOutputFile); - Files.delete(sidecarTextPath); + List gsCommand = + Arrays.asList( + "gs", + "-sDEVICE=pdfwrite", + "-dFILTERIMAGE", + "-o", + tempPdfWithoutImages.toString(), + tempOutputFile.toString()); + + ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) + .runCommandWithOutputHandling(gsCommand); + tempOutputFile = tempPdfWithoutImages; + } + // Read the OCR processed PDF file + byte[] pdfBytes = Files.readAllBytes(tempOutputFile); - // Return the zip file containing both the PDF and the text file - return WebResponseUtils.bytesToWebResponse( - zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM); - } else { // Return the OCR processed PDF as a response - Files.delete(tempOutputFile); - return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); + String outputFilename = + Filenames.toSimpleFileName(inputFile.getOriginalFilename()) + .replaceFirst("[.][^.]+$", "") + + "_OCR.pdf"; + + if (sidecar != null && sidecar) { + // Create a zip file containing both the PDF and the text file + String outputZipFilename = + Filenames.toSimpleFileName(inputFile.getOriginalFilename()) + .replaceFirst("[.][^.]+$", "") + + "_OCR.zip"; + Path tempZipFile = Files.createTempFile("output_", ".zip"); + + try (ZipOutputStream zipOut = + new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) { + // Add PDF file to the zip + ZipEntry pdfEntry = new ZipEntry(outputFilename); + zipOut.putNextEntry(pdfEntry); + Files.copy(tempOutputFile, zipOut); + zipOut.closeEntry(); + + // Add text file to the zip + ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt")); + zipOut.putNextEntry(txtEntry); + Files.copy(sidecarTextPath, zipOut); + zipOut.closeEntry(); + } + + byte[] zipBytes = Files.readAllBytes(tempZipFile); + + // Clean up the temporary zip file + Files.deleteIfExists(tempZipFile); + Files.deleteIfExists(tempOutputFile); + Files.deleteIfExists(sidecarTextPath); + + // Return the zip file containing both the PDF and the text file + return WebResponseUtils.bytesToWebResponse( + zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM); + } else { + // Return the OCR processed PDF as a response + Files.deleteIfExists(tempOutputFile); + return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); + } + } finally { + // Clean up the temporary files + Files.deleteIfExists(tempOutputFile); + // Comment out as transferTo makes multipart handle cleanup + // Files.deleteIfExists(tempInputFile); + if (sidecarTextPath != null) { + Files.deleteIfExists(sidecarTextPath); + } } } } diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java index 85d4560c..812a8cae 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java @@ -41,34 +41,36 @@ public class RepairController { MultipartFile inputFile = request.getFileInput(); // Save the uploaded file to a temporary location Path tempInputFile = Files.createTempFile("input_", ".pdf"); - inputFile.transferTo(tempInputFile.toFile()); - - // Prepare the output file path Path tempOutputFile = Files.createTempFile("output_", ".pdf"); + byte[] pdfBytes = null; + inputFile.transferTo(tempInputFile.toFile()); + try { - List command = new ArrayList<>(); - command.add("gs"); - command.add("-o"); - command.add(tempOutputFile.toString()); - command.add("-sDEVICE=pdfwrite"); - command.add(tempInputFile.toString()); + List command = new ArrayList<>(); + command.add("gs"); + command.add("-o"); + command.add(tempOutputFile.toString()); + command.add("-sDEVICE=pdfwrite"); + command.add(tempInputFile.toString()); - ProcessExecutorResult returnCode = - ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) - .runCommandWithOutputHandling(command); - - // Read the optimized PDF file - byte[] pdfBytes = Files.readAllBytes(tempOutputFile); - - // Clean up the temporary files - Files.delete(tempInputFile); - Files.delete(tempOutputFile); + ProcessExecutorResult returnCode = + ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT) + .runCommandWithOutputHandling(command); + // Read the optimized PDF file + pdfBytes = Files.readAllBytes(tempOutputFile); + // Return the optimized PDF as a response String outputFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename()) .replaceFirst("[.][^.]+$", "") + "_repaired.pdf"; return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); + } finally { + // Clean up the temporary files + Files.deleteIfExists(tempInputFile); + Files.deleteIfExists(tempOutputFile); + } + } } diff --git a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java index da408a23..2a891c08 100644 --- a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java +++ b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java @@ -79,8 +79,8 @@ public class FileToPdf { } finally { // Clean up temporary files - Files.delete(tempOutputFile); - Files.delete(tempInputFile); + Files.deleteIfExists(tempOutputFile); + Files.deleteIfExists(tempInputFile); } return pdfBytes; diff --git a/src/main/java/stirling/software/SPDF/utils/PDFToFile.java b/src/main/java/stirling/software/SPDF/utils/PDFToFile.java index 38579aa8..f5297a10 100644 --- a/src/main/java/stirling/software/SPDF/utils/PDFToFile.java +++ b/src/main/java/stirling/software/SPDF/utils/PDFToFile.java @@ -6,7 +6,6 @@ import java.io.FileInputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.StandardCopyOption; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -47,8 +46,7 @@ public class PDFToFile { try { // Save the uploaded file to a temporary location tempInputFile = Files.createTempFile("input_", ".pdf"); - Files.copy( - inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING); + inputFile.transferTo(tempInputFile); // Prepare the output directory tempOutputDir = Files.createTempDirectory("output_"); @@ -85,7 +83,7 @@ public class PDFToFile { } finally { // Clean up the temporary files - if (tempInputFile != null) Files.delete(tempInputFile); + if (tempInputFile != null) Files.deleteIfExists(tempInputFile); if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile()); } @@ -127,8 +125,7 @@ public class PDFToFile { try { // Save the uploaded file to a temporary location tempInputFile = Files.createTempFile("input_", ".pdf"); - Files.copy( - inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING); + inputFile.transferTo(tempInputFile); // Prepare the output directory tempOutputDir = Files.createTempDirectory("output_"); @@ -180,7 +177,7 @@ public class PDFToFile { } finally { // Clean up the temporary files - if (tempInputFile != null) Files.delete(tempInputFile); + Files.deleteIfExists(tempInputFile); if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile()); } System.out.println("fileBytes=" + fileBytes.length);