feat:centralise temp-file management & cleanup across app/docker (#3797)

# Description of Changes Introduces TempFileManager, registry, and scheduled cleanup service; aligns all Docker images and runtime scripts to use a dedicated /tmp/stirling-pdf directory; updates controllers, utilities, and tests to use the new API; adds configurable system.tempFileManagement section. Closes #(issue_number) --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: a <a>
2026-04-22 23:08:53 +02:00 · 2025-06-25 18:32:28 +01:00
parent 32aa568196
commit bc9c127819
36 changed files with 2167 additions and 213 deletions
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertEmlToPDF.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertEmlToPDF.java
@@ -24,6 +24,7 @@ import stirling.software.common.configuration.RuntimePathConfig;
 import stirling.software.common.model.api.converters.EmlToPdfRequest;
 import stirling.software.common.service.CustomPDFDocumentFactory;
 import stirling.software.common.util.EmlToPdf;
+import stirling.software.common.util.TempFileManager;
 import stirling.software.common.util.WebResponseUtils;

@RestController
@@ -35,6 +36,7 @@ public class ConvertEmlToPDF {

    private final CustomPDFDocumentFactory pdfDocumentFactory;
    private final RuntimePathConfig runtimePathConfig;
+    private final TempFileManager tempFileManager;

    @PostMapping(consumes = "multipart/form-data", value = "/eml/pdf")
    @Operation(
@@ -102,7 +104,8 @@ public class ConvertEmlToPDF {
                                fileBytes,
                                originalFilename,
                                false,
-                                pdfDocumentFactory);
+                                pdfDocumentFactory,
+                                tempFileManager);

                if (pdfBytes == null || pdfBytes.length == 0) {
                    log.error("PDF conversion failed - empty output for {}", originalFilename);
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java
@@ -18,6 +18,7 @@ import stirling.software.common.model.ApplicationProperties;
 import stirling.software.common.model.api.converters.HTMLToPdfRequest;
 import stirling.software.common.service.CustomPDFDocumentFactory;
 import stirling.software.common.util.FileToPdf;
+import stirling.software.common.util.TempFileManager;
 import stirling.software.common.util.WebResponseUtils;

@RestController
@@ -32,6 +33,8 @@ public class ConvertHtmlToPDF {

    private final RuntimePathConfig runtimePathConfig;

+    private final TempFileManager tempFileManager;
+
    @PostMapping(consumes = "multipart/form-data", value = "/html/pdf")
    @Operation(
            summary = "Convert an HTML or ZIP (containing HTML and CSS) to PDF",
@@ -62,7 +65,8 @@ public class ConvertHtmlToPDF {
                        request,
                        fileInput.getBytes(),
                        originalFilename,
-                        disableSanitize);
+                        disableSanitize,
+                        tempFileManager);

        pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);

--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java
@@ -28,6 +28,7 @@ import stirling.software.common.model.ApplicationProperties;
 import stirling.software.common.model.api.GeneralFile;
 import stirling.software.common.service.CustomPDFDocumentFactory;
 import stirling.software.common.util.FileToPdf;
+import stirling.software.common.util.TempFileManager;
 import stirling.software.common.util.WebResponseUtils;

@RestController
@@ -41,6 +42,8 @@ public class ConvertMarkdownToPdf {
    private final ApplicationProperties applicationProperties;
    private final RuntimePathConfig runtimePathConfig;

+    private final TempFileManager tempFileManager;
+
    @PostMapping(consumes = "multipart/form-data", value = "/markdown/pdf")
    @Operation(
            summary = "Convert a Markdown file to PDF",
@@ -82,7 +85,8 @@ public class ConvertMarkdownToPdf {
                        null,
                        htmlContent.getBytes(),
                        "converted.html",
-                        disableSanitize);
+                        disableSanitize,
+                        tempFileManager);
        pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);
        String outputFilename =
                originalFilename.replaceFirst("[.][^.]+$", "")
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java
@@ -2,7 +2,6 @@ package stirling.software.SPDF.controller.api.misc;

 import java.awt.image.BufferedImage;
 import java.io.*;
-import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.*;
 import java.util.zip.ZipEntry;
@@ -23,7 +22,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
 import org.springframework.web.bind.annotation.RestController;
 import org.springframework.web.multipart.MultipartFile;

-import io.github.pixee.security.BoundedLineReader;
 import io.github.pixee.security.Filenames;
 import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.tags.Tag;
@@ -34,6 +32,9 @@ import lombok.extern.slf4j.Slf4j;
 import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest;
 import stirling.software.common.model.ApplicationProperties;
 import stirling.software.common.service.CustomPDFDocumentFactory;
+import stirling.software.common.util.ProcessExecutor;
+import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
+import stirling.software.common.util.TempFileManager;

@RestController
@RequestMapping("/api/v1/misc")
@@ -43,8 +44,8 @@ import stirling.software.common.service.CustomPDFDocumentFactory;
 public class OCRController {

    private final ApplicationProperties applicationProperties;
-
    private final CustomPDFDocumentFactory pdfDocumentFactory;
+    private final TempFileManager tempFileManager;

    /** Gets the list of available Tesseract languages from the tessdata directory */
    public List<String> getAvailableTesseractLanguages() {
@@ -73,93 +74,117 @@ public class OCRController {
        MultipartFile inputFile = request.getFileInput();
        List<String> languages = request.getLanguages();
        String ocrType = request.getOcrType();
-        Path tempDir = Files.createTempDirectory("ocr_process");
-        Path tempInputFile = tempDir.resolve("input.pdf");
-        Path tempOutputDir = tempDir.resolve("output");
-        Path tempImagesDir = tempDir.resolve("images");
-        Path finalOutputFile = tempDir.resolve("final_output.pdf");
-        Files.createDirectories(tempOutputDir);
-        Files.createDirectories(tempImagesDir);
-        Process process = null;
+
+        // Create a temp directory using TempFileManager directly
+        Path tempDirPath = tempFileManager.createTempDirectory();
+        File tempDir = tempDirPath.toFile();
+
        try {
+            File tempInputFile = new File(tempDir, "input.pdf");
+            File tempOutputDir = new File(tempDir, "output");
+            File tempImagesDir = new File(tempDir, "images");
+            File finalOutputFile = new File(tempDir, "final_output.pdf");
+
+            // Create directories
+            tempOutputDir.mkdirs();
+            tempImagesDir.mkdirs();
+
            // Save input file
-            inputFile.transferTo(tempInputFile.toFile());
+            inputFile.transferTo(tempInputFile);
+
            PDFMergerUtility merger = new PDFMergerUtility();
            merger.setDestinationFileName(finalOutputFile.toString());
-            try (PDDocument document = pdfDocumentFactory.load(tempInputFile.toFile())) {
+
+            try (PDDocument document = pdfDocumentFactory.load(tempInputFile)) {
                PDFRenderer pdfRenderer = new PDFRenderer(document);
                int pageCount = document.getNumberOfPages();
+
                for (int pageNum = 0; pageNum < pageCount; pageNum++) {
                    PDPage page = document.getPage(pageNum);
                    boolean hasText = false;
+
                    // Check for existing text
                    try (PDDocument tempDoc = new PDDocument()) {
                        tempDoc.addPage(page);
                        PDFTextStripper stripper = new PDFTextStripper();
                        hasText = !stripper.getText(tempDoc).trim().isEmpty();
                    }
+
                    boolean shouldOcr =
                            switch (ocrType) {
                                case "skip-text" -> !hasText;
                                case "force-ocr" -> true;
                                default -> true;
                            };
-                    Path pageOutputPath =
-                            tempOutputDir.resolve(String.format("page_%d.pdf", pageNum));
+
+                    File pageOutputPath =
+                            new File(tempOutputDir, String.format("page_%d.pdf", pageNum));
+
                    if (shouldOcr) {
                        // Convert page to image
                        BufferedImage image = pdfRenderer.renderImageWithDPI(pageNum, 300);
-                        Path imagePath =
-                                tempImagesDir.resolve(String.format("page_%d.png", pageNum));
-                        ImageIO.write(image, "png", imagePath.toFile());
+                        File imagePath =
+                                new File(tempImagesDir, String.format("page_%d.png", pageNum));
+                        ImageIO.write(image, "png", imagePath);
+
                        // Build OCR command
                        List<String> command = new ArrayList<>();
                        command.add("tesseract");
                        command.add(imagePath.toString());
                        command.add(
-                                tempOutputDir
-                                        .resolve(String.format("page_%d", pageNum))
+                                new File(tempOutputDir, String.format("page_%d", pageNum))
                                        .toString());
                        command.add("-l");
                        command.add(String.join("+", languages));
                        // Always output PDF
                        command.add("pdf");
-                        ProcessBuilder pb = new ProcessBuilder(command);
-                        process = pb.start();
-                        // Capture any error output
-                        try (BufferedReader reader =
-                                new BufferedReader(
-                                        new InputStreamReader(process.getErrorStream()))) {
-                            String line;
-                            while ((line = BoundedLineReader.readLine(reader, 5_000_000)) != null) {
-                                log.debug("Tesseract: {}", line);
+
+                        // Use ProcessExecutor to run tesseract command
+                        try {
+                            ProcessExecutorResult result =
+                                    ProcessExecutor.getInstance(ProcessExecutor.Processes.TESSERACT)
+                                            .runCommandWithOutputHandling(command);
+
+                            log.debug(
+                                    "Tesseract OCR completed for page {} with exit code {}",
+                                    pageNum,
+                                    result.getRc());
+
+                            // Add OCR'd PDF to merger
+                            merger.addSource(pageOutputPath);
+                        } catch (IOException | InterruptedException e) {
+                            log.error(
+                                    "Error processing page {} with tesseract: {}",
+                                    pageNum,
+                                    e.getMessage());
+                            // If OCR fails, fall back to the original page
+                            try (PDDocument pageDoc = new PDDocument()) {
+                                pageDoc.addPage(page);
+                                pageDoc.save(pageOutputPath);
+                                merger.addSource(pageOutputPath);
                            }
                        }
-                        int exitCode = process.waitFor();
-                        if (exitCode != 0) {
-                            throw new RuntimeException(
-                                    "Tesseract failed with exit code: " + exitCode);
-                        }
-                        // Add OCR'd PDF to merger
-                        merger.addSource(pageOutputPath.toFile());
                    } else {
                        // Save original page without OCR
                        try (PDDocument pageDoc = new PDDocument()) {
                            pageDoc.addPage(page);
-                            pageDoc.save(pageOutputPath.toFile());
-                            merger.addSource(pageOutputPath.toFile());
+                            pageDoc.save(pageOutputPath);
+                            merger.addSource(pageOutputPath);
                        }
                    }
                }
            }
+
            // Merge all pages into final PDF
            merger.mergeDocuments(null);
+
            // Read the final PDF file
-            byte[] pdfContent = Files.readAllBytes(finalOutputFile);
+            byte[] pdfContent = java.nio.file.Files.readAllBytes(finalOutputFile.toPath());
            String outputFilename =
                    Filenames.toSimpleFileName(inputFile.getOriginalFilename())
                                    .replaceFirst("[.][^.]+$", "")
                            + "_OCR.pdf";
+
            return ResponseEntity.ok()
                    .header(
                            "Content-Disposition",
@@ -167,11 +192,8 @@ public class OCRController {
                    .contentType(MediaType.APPLICATION_PDF)
                    .body(pdfContent);
        } finally {
-            if (process != null) {
-                process.destroy();
-            }
-            // Clean up temporary files
-            deleteDirectory(tempDir);
+            // Clean up the temp directory and all its contents
+            tempFileManager.deleteTempDirectory(tempDirPath);
        }
    }

@@ -192,21 +214,4 @@ public class OCRController {
            zipOut.closeEntry();
        }
    }
-
-    private void deleteDirectory(Path directory) {
-        try {
-            Files.walk(directory)
-                    .sorted(Comparator.reverseOrder())
-                    .forEach(
-                            path -> {
-                                try {
-                                    Files.delete(path);
-                                } catch (IOException e) {
-                                    log.error("Error deleting {}: {}", path, e.getMessage());
-                                }
-                            });
-        } catch (IOException e) {
-            log.error("Error walking directory {}: {}", directory, e.getMessage());
-        }
-    }
 }
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java
@@ -1,8 +1,6 @@
 package stirling.software.SPDF.controller.api.misc;

 import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;

@@ -23,6 +21,8 @@ import stirling.software.common.model.api.PDFFile;
 import stirling.software.common.service.CustomPDFDocumentFactory;
 import stirling.software.common.util.ProcessExecutor;
 import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
+import stirling.software.common.util.TempFile;
+import stirling.software.common.util.TempFileManager;
 import stirling.software.common.util.WebResponseUtils;

@RestController
@@ -32,6 +32,7 @@ import stirling.software.common.util.WebResponseUtils;
 public class RepairController {

    private final CustomPDFDocumentFactory pdfDocumentFactory;
+    private final TempFileManager tempFileManager;

    @PostMapping(consumes = "multipart/form-data", value = "/repair")
    @Operation(
@@ -43,25 +44,25 @@ public class RepairController {
    public ResponseEntity<byte[]> repairPdf(@ModelAttribute PDFFile file)
            throws IOException, InterruptedException {
        MultipartFile inputFile = file.getFileInput();
-        // Save the uploaded file to a temporary location
-        Path tempInputFile = Files.createTempFile("input_", ".pdf");
-        byte[] pdfBytes = null;
-        inputFile.transferTo(tempInputFile.toFile());
-        try {
+
+        // Use TempFile with try-with-resources for automatic cleanup
+        try (TempFile tempFile = new TempFile(tempFileManager, ".pdf")) {
+            // Save the uploaded file to the temporary location
+            inputFile.transferTo(tempFile.getFile());

            List<String> command = new ArrayList<>();
            command.add("qpdf");
            command.add("--replace-input"); // Automatically fixes problems it can
            command.add("--qdf"); // Linearizes and normalizes PDF structure
            command.add("--object-streams=disable"); // Can help with some corruptions
-            command.add(tempInputFile.toString());
+            command.add(tempFile.getFile().getAbsolutePath());

            ProcessExecutorResult returnCode =
                    ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
                            .runCommandWithOutputHandling(command);

            // Read the optimized PDF file
-            pdfBytes = pdfDocumentFactory.loadToBytes(tempInputFile.toFile());
+            byte[] pdfBytes = pdfDocumentFactory.loadToBytes(tempFile.getFile());

            // Return the optimized PDF as a response
            String outputFilename =
@@ -69,9 +70,6 @@ public class RepairController {
                                    .replaceFirst("[.][^.]+$", "")
                            + "_repaired.pdf";
            return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
-        } finally {
-            // Clean up the temporary files
-            Files.deleteIfExists(tempInputFile);
        }
    }
 }
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java
@@ -6,7 +6,6 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.nio.file.Files;
 import java.util.List;

 import javax.imageio.ImageIO;
@@ -40,6 +39,8 @@ import lombok.RequiredArgsConstructor;

 import stirling.software.SPDF.model.api.misc.AddStampRequest;
 import stirling.software.common.service.CustomPDFDocumentFactory;
+import stirling.software.common.util.TempFile;
+import stirling.software.common.util.TempFileManager;
 import stirling.software.common.util.WebResponseUtils;

@RestController
@@ -49,6 +50,7 @@ import stirling.software.common.util.WebResponseUtils;
 public class StampController {

    private final CustomPDFDocumentFactory pdfDocumentFactory;
+    private final TempFileManager tempFileManager;

    @PostMapping(consumes = "multipart/form-data", value = "/add-stamp")
    @Operation(
@@ -188,14 +190,14 @@ public class StampController {
        if (!"".equals(resourceDir)) {
            ClassPathResource classPathResource = new ClassPathResource(resourceDir);
            String fileExtension = resourceDir.substring(resourceDir.lastIndexOf("."));
-            File tempFile = Files.createTempFile("NotoSansFont", fileExtension).toFile();
-            try (InputStream is = classPathResource.getInputStream();
-                    FileOutputStream os = new FileOutputStream(tempFile)) {
-                IOUtils.copy(is, os);
-                font = PDType0Font.load(document, tempFile);
-            } finally {
-                if (tempFile != null) {
-                    Files.deleteIfExists(tempFile.toPath());
+
+            // Use TempFile with try-with-resources for automatic cleanup
+            try (TempFile tempFileWrapper = new TempFile(tempFileManager, fileExtension)) {
+                File tempFile = tempFileWrapper.getFile();
+                try (InputStream is = classPathResource.getInputStream();
+                        FileOutputStream os = new FileOutputStream(tempFile)) {
+                    IOUtils.copy(is, os);
+                    font = PDType0Font.load(document, tempFile);
                }
            }
        }