mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-04-22 23:08:53 +02:00
feat:centralise temp-file management & cleanup across app/docker (#3797)
# Description of Changes Introduces TempFileManager, registry, and scheduled cleanup service; aligns all Docker images and runtime scripts to use a dedicated /tmp/stirling-pdf directory; updates controllers, utilities, and tests to use the new API; adds configurable system.tempFileManagement section. Closes #(issue_number) --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: a <a>
This commit is contained in:
@@ -24,6 +24,7 @@ import stirling.software.common.configuration.RuntimePathConfig;
|
||||
import stirling.software.common.model.api.converters.EmlToPdfRequest;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.EmlToPdf;
|
||||
import stirling.software.common.util.TempFileManager;
|
||||
import stirling.software.common.util.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@@ -35,6 +36,7 @@ public class ConvertEmlToPDF {
|
||||
|
||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
private final RuntimePathConfig runtimePathConfig;
|
||||
private final TempFileManager tempFileManager;
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/eml/pdf")
|
||||
@Operation(
|
||||
@@ -102,7 +104,8 @@ public class ConvertEmlToPDF {
|
||||
fileBytes,
|
||||
originalFilename,
|
||||
false,
|
||||
pdfDocumentFactory);
|
||||
pdfDocumentFactory,
|
||||
tempFileManager);
|
||||
|
||||
if (pdfBytes == null || pdfBytes.length == 0) {
|
||||
log.error("PDF conversion failed - empty output for {}", originalFilename);
|
||||
|
||||
@@ -18,6 +18,7 @@ import stirling.software.common.model.ApplicationProperties;
|
||||
import stirling.software.common.model.api.converters.HTMLToPdfRequest;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.FileToPdf;
|
||||
import stirling.software.common.util.TempFileManager;
|
||||
import stirling.software.common.util.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@@ -32,6 +33,8 @@ public class ConvertHtmlToPDF {
|
||||
|
||||
private final RuntimePathConfig runtimePathConfig;
|
||||
|
||||
private final TempFileManager tempFileManager;
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/html/pdf")
|
||||
@Operation(
|
||||
summary = "Convert an HTML or ZIP (containing HTML and CSS) to PDF",
|
||||
@@ -62,7 +65,8 @@ public class ConvertHtmlToPDF {
|
||||
request,
|
||||
fileInput.getBytes(),
|
||||
originalFilename,
|
||||
disableSanitize);
|
||||
disableSanitize,
|
||||
tempFileManager);
|
||||
|
||||
pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ import stirling.software.common.model.ApplicationProperties;
|
||||
import stirling.software.common.model.api.GeneralFile;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.FileToPdf;
|
||||
import stirling.software.common.util.TempFileManager;
|
||||
import stirling.software.common.util.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@@ -41,6 +42,8 @@ public class ConvertMarkdownToPdf {
|
||||
private final ApplicationProperties applicationProperties;
|
||||
private final RuntimePathConfig runtimePathConfig;
|
||||
|
||||
private final TempFileManager tempFileManager;
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/markdown/pdf")
|
||||
@Operation(
|
||||
summary = "Convert a Markdown file to PDF",
|
||||
@@ -82,7 +85,8 @@ public class ConvertMarkdownToPdf {
|
||||
null,
|
||||
htmlContent.getBytes(),
|
||||
"converted.html",
|
||||
disableSanitize);
|
||||
disableSanitize,
|
||||
tempFileManager);
|
||||
pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);
|
||||
String outputFilename =
|
||||
originalFilename.replaceFirst("[.][^.]+$", "")
|
||||
|
||||
@@ -2,7 +2,6 @@ package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.*;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
import java.util.zip.ZipEntry;
|
||||
@@ -23,7 +22,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.github.pixee.security.BoundedLineReader;
|
||||
import io.github.pixee.security.Filenames;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
@@ -34,6 +32,9 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest;
|
||||
import stirling.software.common.model.ApplicationProperties;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.ProcessExecutor;
|
||||
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.common.util.TempFileManager;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/misc")
|
||||
@@ -43,8 +44,8 @@ import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
public class OCRController {
|
||||
|
||||
private final ApplicationProperties applicationProperties;
|
||||
|
||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
private final TempFileManager tempFileManager;
|
||||
|
||||
/** Gets the list of available Tesseract languages from the tessdata directory */
|
||||
public List<String> getAvailableTesseractLanguages() {
|
||||
@@ -73,93 +74,117 @@ public class OCRController {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
List<String> languages = request.getLanguages();
|
||||
String ocrType = request.getOcrType();
|
||||
Path tempDir = Files.createTempDirectory("ocr_process");
|
||||
Path tempInputFile = tempDir.resolve("input.pdf");
|
||||
Path tempOutputDir = tempDir.resolve("output");
|
||||
Path tempImagesDir = tempDir.resolve("images");
|
||||
Path finalOutputFile = tempDir.resolve("final_output.pdf");
|
||||
Files.createDirectories(tempOutputDir);
|
||||
Files.createDirectories(tempImagesDir);
|
||||
Process process = null;
|
||||
|
||||
// Create a temp directory using TempFileManager directly
|
||||
Path tempDirPath = tempFileManager.createTempDirectory();
|
||||
File tempDir = tempDirPath.toFile();
|
||||
|
||||
try {
|
||||
File tempInputFile = new File(tempDir, "input.pdf");
|
||||
File tempOutputDir = new File(tempDir, "output");
|
||||
File tempImagesDir = new File(tempDir, "images");
|
||||
File finalOutputFile = new File(tempDir, "final_output.pdf");
|
||||
|
||||
// Create directories
|
||||
tempOutputDir.mkdirs();
|
||||
tempImagesDir.mkdirs();
|
||||
|
||||
// Save input file
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
inputFile.transferTo(tempInputFile);
|
||||
|
||||
PDFMergerUtility merger = new PDFMergerUtility();
|
||||
merger.setDestinationFileName(finalOutputFile.toString());
|
||||
try (PDDocument document = pdfDocumentFactory.load(tempInputFile.toFile())) {
|
||||
|
||||
try (PDDocument document = pdfDocumentFactory.load(tempInputFile)) {
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
int pageCount = document.getNumberOfPages();
|
||||
|
||||
for (int pageNum = 0; pageNum < pageCount; pageNum++) {
|
||||
PDPage page = document.getPage(pageNum);
|
||||
boolean hasText = false;
|
||||
|
||||
// Check for existing text
|
||||
try (PDDocument tempDoc = new PDDocument()) {
|
||||
tempDoc.addPage(page);
|
||||
PDFTextStripper stripper = new PDFTextStripper();
|
||||
hasText = !stripper.getText(tempDoc).trim().isEmpty();
|
||||
}
|
||||
|
||||
boolean shouldOcr =
|
||||
switch (ocrType) {
|
||||
case "skip-text" -> !hasText;
|
||||
case "force-ocr" -> true;
|
||||
default -> true;
|
||||
};
|
||||
Path pageOutputPath =
|
||||
tempOutputDir.resolve(String.format("page_%d.pdf", pageNum));
|
||||
|
||||
File pageOutputPath =
|
||||
new File(tempOutputDir, String.format("page_%d.pdf", pageNum));
|
||||
|
||||
if (shouldOcr) {
|
||||
// Convert page to image
|
||||
BufferedImage image = pdfRenderer.renderImageWithDPI(pageNum, 300);
|
||||
Path imagePath =
|
||||
tempImagesDir.resolve(String.format("page_%d.png", pageNum));
|
||||
ImageIO.write(image, "png", imagePath.toFile());
|
||||
File imagePath =
|
||||
new File(tempImagesDir, String.format("page_%d.png", pageNum));
|
||||
ImageIO.write(image, "png", imagePath);
|
||||
|
||||
// Build OCR command
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("tesseract");
|
||||
command.add(imagePath.toString());
|
||||
command.add(
|
||||
tempOutputDir
|
||||
.resolve(String.format("page_%d", pageNum))
|
||||
new File(tempOutputDir, String.format("page_%d", pageNum))
|
||||
.toString());
|
||||
command.add("-l");
|
||||
command.add(String.join("+", languages));
|
||||
// Always output PDF
|
||||
command.add("pdf");
|
||||
ProcessBuilder pb = new ProcessBuilder(command);
|
||||
process = pb.start();
|
||||
// Capture any error output
|
||||
try (BufferedReader reader =
|
||||
new BufferedReader(
|
||||
new InputStreamReader(process.getErrorStream()))) {
|
||||
String line;
|
||||
while ((line = BoundedLineReader.readLine(reader, 5_000_000)) != null) {
|
||||
log.debug("Tesseract: {}", line);
|
||||
|
||||
// Use ProcessExecutor to run tesseract command
|
||||
try {
|
||||
ProcessExecutorResult result =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.TESSERACT)
|
||||
.runCommandWithOutputHandling(command);
|
||||
|
||||
log.debug(
|
||||
"Tesseract OCR completed for page {} with exit code {}",
|
||||
pageNum,
|
||||
result.getRc());
|
||||
|
||||
// Add OCR'd PDF to merger
|
||||
merger.addSource(pageOutputPath);
|
||||
} catch (IOException | InterruptedException e) {
|
||||
log.error(
|
||||
"Error processing page {} with tesseract: {}",
|
||||
pageNum,
|
||||
e.getMessage());
|
||||
// If OCR fails, fall back to the original page
|
||||
try (PDDocument pageDoc = new PDDocument()) {
|
||||
pageDoc.addPage(page);
|
||||
pageDoc.save(pageOutputPath);
|
||||
merger.addSource(pageOutputPath);
|
||||
}
|
||||
}
|
||||
int exitCode = process.waitFor();
|
||||
if (exitCode != 0) {
|
||||
throw new RuntimeException(
|
||||
"Tesseract failed with exit code: " + exitCode);
|
||||
}
|
||||
// Add OCR'd PDF to merger
|
||||
merger.addSource(pageOutputPath.toFile());
|
||||
} else {
|
||||
// Save original page without OCR
|
||||
try (PDDocument pageDoc = new PDDocument()) {
|
||||
pageDoc.addPage(page);
|
||||
pageDoc.save(pageOutputPath.toFile());
|
||||
merger.addSource(pageOutputPath.toFile());
|
||||
pageDoc.save(pageOutputPath);
|
||||
merger.addSource(pageOutputPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Merge all pages into final PDF
|
||||
merger.mergeDocuments(null);
|
||||
|
||||
// Read the final PDF file
|
||||
byte[] pdfContent = Files.readAllBytes(finalOutputFile);
|
||||
byte[] pdfContent = java.nio.file.Files.readAllBytes(finalOutputFile.toPath());
|
||||
String outputFilename =
|
||||
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
||||
.replaceFirst("[.][^.]+$", "")
|
||||
+ "_OCR.pdf";
|
||||
|
||||
return ResponseEntity.ok()
|
||||
.header(
|
||||
"Content-Disposition",
|
||||
@@ -167,11 +192,8 @@ public class OCRController {
|
||||
.contentType(MediaType.APPLICATION_PDF)
|
||||
.body(pdfContent);
|
||||
} finally {
|
||||
if (process != null) {
|
||||
process.destroy();
|
||||
}
|
||||
// Clean up temporary files
|
||||
deleteDirectory(tempDir);
|
||||
// Clean up the temp directory and all its contents
|
||||
tempFileManager.deleteTempDirectory(tempDirPath);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -192,21 +214,4 @@ public class OCRController {
|
||||
zipOut.closeEntry();
|
||||
}
|
||||
}
|
||||
|
||||
private void deleteDirectory(Path directory) {
|
||||
try {
|
||||
Files.walk(directory)
|
||||
.sorted(Comparator.reverseOrder())
|
||||
.forEach(
|
||||
path -> {
|
||||
try {
|
||||
Files.delete(path);
|
||||
} catch (IOException e) {
|
||||
log.error("Error deleting {}: {}", path, e.getMessage());
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
log.error("Error walking directory {}: {}", directory, e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@@ -23,6 +21,8 @@ import stirling.software.common.model.api.PDFFile;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.ProcessExecutor;
|
||||
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.common.util.TempFile;
|
||||
import stirling.software.common.util.TempFileManager;
|
||||
import stirling.software.common.util.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@@ -32,6 +32,7 @@ import stirling.software.common.util.WebResponseUtils;
|
||||
public class RepairController {
|
||||
|
||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
private final TempFileManager tempFileManager;
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/repair")
|
||||
@Operation(
|
||||
@@ -43,25 +44,25 @@ public class RepairController {
|
||||
public ResponseEntity<byte[]> repairPdf(@ModelAttribute PDFFile file)
|
||||
throws IOException, InterruptedException {
|
||||
MultipartFile inputFile = file.getFileInput();
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
byte[] pdfBytes = null;
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
try {
|
||||
|
||||
// Use TempFile with try-with-resources for automatic cleanup
|
||||
try (TempFile tempFile = new TempFile(tempFileManager, ".pdf")) {
|
||||
// Save the uploaded file to the temporary location
|
||||
inputFile.transferTo(tempFile.getFile());
|
||||
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("qpdf");
|
||||
command.add("--replace-input"); // Automatically fixes problems it can
|
||||
command.add("--qdf"); // Linearizes and normalizes PDF structure
|
||||
command.add("--object-streams=disable"); // Can help with some corruptions
|
||||
command.add(tempInputFile.toString());
|
||||
command.add(tempFile.getFile().getAbsolutePath());
|
||||
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
|
||||
.runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the optimized PDF file
|
||||
pdfBytes = pdfDocumentFactory.loadToBytes(tempInputFile.toFile());
|
||||
byte[] pdfBytes = pdfDocumentFactory.loadToBytes(tempFile.getFile());
|
||||
|
||||
// Return the optimized PDF as a response
|
||||
String outputFilename =
|
||||
@@ -69,9 +70,6 @@ public class RepairController {
|
||||
.replaceFirst("[.][^.]+$", "")
|
||||
+ "_repaired.pdf";
|
||||
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
||||
} finally {
|
||||
// Clean up the temporary files
|
||||
Files.deleteIfExists(tempInputFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@ import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.util.List;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
@@ -40,6 +39,8 @@ import lombok.RequiredArgsConstructor;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.AddStampRequest;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.TempFile;
|
||||
import stirling.software.common.util.TempFileManager;
|
||||
import stirling.software.common.util.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@@ -49,6 +50,7 @@ import stirling.software.common.util.WebResponseUtils;
|
||||
public class StampController {
|
||||
|
||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
private final TempFileManager tempFileManager;
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/add-stamp")
|
||||
@Operation(
|
||||
@@ -188,14 +190,14 @@ public class StampController {
|
||||
if (!"".equals(resourceDir)) {
|
||||
ClassPathResource classPathResource = new ClassPathResource(resourceDir);
|
||||
String fileExtension = resourceDir.substring(resourceDir.lastIndexOf("."));
|
||||
File tempFile = Files.createTempFile("NotoSansFont", fileExtension).toFile();
|
||||
try (InputStream is = classPathResource.getInputStream();
|
||||
FileOutputStream os = new FileOutputStream(tempFile)) {
|
||||
IOUtils.copy(is, os);
|
||||
font = PDType0Font.load(document, tempFile);
|
||||
} finally {
|
||||
if (tempFile != null) {
|
||||
Files.deleteIfExists(tempFile.toPath());
|
||||
|
||||
// Use TempFile with try-with-resources for automatic cleanup
|
||||
try (TempFile tempFileWrapper = new TempFile(tempFileManager, fileExtension)) {
|
||||
File tempFile = tempFileWrapper.getFile();
|
||||
try (InputStream is = classPathResource.getInputStream();
|
||||
FileOutputStream os = new FileOutputStream(tempFile)) {
|
||||
IOUtils.copy(is, os);
|
||||
font = PDType0Font.load(document, tempFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user