diff --git a/app/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java b/app/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java index d106a2729..a7f158539 100644 --- a/app/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java +++ b/app/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java @@ -41,7 +41,7 @@ public class CustomPDFDocumentFactory { // Memory thresholds and limits - private static final long SMALL_FILE_THRESHOLD = 10 * 1024 * 1024; // 10 MB + public static final long SMALL_FILE_THRESHOLD = 10 * 1024 * 1024; // 10 MB // Files smaller than this threshold are loaded entirely in memory for better performance. // These files use IOUtils.createMemoryOnlyStreamCache() which keeps all document data in RAM. // No temp files are created for document data, reducing I/O operations but consuming more diff --git a/app/proprietary/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java b/app/proprietary/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java index e194e3b5f..dfeb095f3 100644 --- a/app/proprietary/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java +++ b/app/proprietary/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java @@ -309,6 +309,13 @@ public class PdfJsonConversionService { byte[] cachedPdfBytes = null; + // Pre-read file bytes before loading PDDocument, since loading may delete the file + // (small files get loaded into memory and original is deleted) + // This is needed for lazy image caching where we need the bytes later + if (Files.size(workingPath) <= CustomPDFDocumentFactory.SMALL_FILE_THRESHOLD) { + cachedPdfBytes = Files.readAllBytes(workingPath); + } + try (PDDocument document = pdfDocumentFactory.load(workingPath, true)) { int totalPages = document.getNumberOfPages(); // Only use lazy images for real async jobs where client can access the cache