From b661eb8ff03e2808dd8a9979c2a7b18db1d9f0a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Sz=C3=BCcs?= <127139797+balazs-szucs@users.noreply.github.com> Date: Sat, 4 Oct 2025 11:49:39 +0200 Subject: [PATCH] refactor: Replace manual resource handling with try-with-resources (#4593) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description of Changes Explanation on try-with-resource: > Many resources in Java need be closed after they have been used. If they are not, the garbage collector cannot reclaim the resources' memory, and they are still considered to be in use by the operating system. Such resources are considered to be leaked, which can lead to performance issues. > > Java 7 introduced the try-with-resources statement, which guarantees that the resource in question will be closed. > > try (InputStream input = Files.newInputStream(path)) { > // "input" will be closed after the execution of this block > } > > This syntax is safer than the traditional method using try, catch, and finally and hence should be preferred. https://rules.sonarsource.com/java/tag/java8/RSPEC-2093/ --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Signed-off-by: Balázs Szücs --- .../software/common/util/PDFToFile.java | 71 ++++----- .../misc/ColorSpaceConversionStrategy.java | 34 ++--- .../software/common/util/PDFToFileTest.java | 17 ++- .../ReplaceAndInvertColorFactory.java | 8 +- .../api/converters/ConvertPDFToHtml.java | 8 +- .../api/converters/ConvertPDFToOffice.java | 10 +- .../controller/api/misc/OCRController.java | 141 ++++++++---------- .../api/converters/ConvertPDFToMarkdown.java | 8 +- 8 files changed, 145 insertions(+), 152 deletions(-) diff --git a/app/common/src/main/java/stirling/software/common/util/PDFToFile.java b/app/common/src/main/java/stirling/software/common/util/PDFToFile.java index 6b3d61b11..32f2cc874 100644 --- a/app/common/src/main/java/stirling/software/common/util/PDFToFile.java +++ b/app/common/src/main/java/stirling/software/common/util/PDFToFile.java @@ -25,15 +25,19 @@ import com.vladsch.flexmark.util.data.MutableDataSet; import io.github.pixee.security.Filenames; -import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult; @Slf4j -@NoArgsConstructor public class PDFToFile { + private final TempFileManager tempFileManager; + + public PDFToFile(TempFileManager tempFileManager) { + this.tempFileManager = tempFileManager; + } + public ResponseEntity processPdfToMarkdown(MultipartFile inputFile) throws IOException, InterruptedException { if (!MediaType.APPLICATION_PDF_VALUE.equals(inputFile.getContentType())) { @@ -71,15 +75,12 @@ public class PDFToFile { pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.')); } - Path tempInputFile = null; - Path tempOutputDir = null; byte[] fileBytes; String fileName; - try { - tempInputFile = Files.createTempFile("input_", ".pdf"); - inputFile.transferTo(tempInputFile); - tempOutputDir = Files.createTempDirectory("output_"); + try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf"); + TempDirectory tempOutputDir = new TempDirectory(tempFileManager)) { + inputFile.transferTo(tempInputFile.getFile()); List command = new ArrayList<>( @@ -88,14 +89,16 @@ public class PDFToFile { "-s", "-noframes", "-c", - tempInputFile.toString(), + tempInputFile.getAbsolutePath(), pdfBaseName)); ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PDFTOHTML) - .runCommandWithOutputHandling(command, tempOutputDir.toFile()); + .runCommandWithOutputHandling( + command, tempOutputDir.getPath().toFile()); // Process HTML files to Markdown - File[] outputFiles = Objects.requireNonNull(tempOutputDir.toFile().listFiles()); + File[] outputFiles = + Objects.requireNonNull(tempOutputDir.getPath().toFile().listFiles()); List markdownFiles = new ArrayList<>(); // Convert HTML files to Markdown @@ -105,7 +108,7 @@ public class PDFToFile { String markdown = htmlToMarkdownConverter.convert(html); String mdFileName = outputFile.getName().replace(".html", ".md"); - File mdFile = new File(tempOutputDir.toFile(), mdFileName); + File mdFile = new File(tempOutputDir.getPath().toFile(), mdFileName); Files.writeString(mdFile.toPath(), markdown); markdownFiles.add(mdFile); } @@ -142,10 +145,6 @@ public class PDFToFile { fileBytes = byteArrayOutputStream.toByteArray(); } - - } finally { - if (tempInputFile != null) Files.deleteIfExists(tempInputFile); - if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile()); } return WebResponseUtils.bytesToWebResponse( fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM); @@ -164,18 +163,17 @@ public class PDFToFile { pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.')); } - Path tempInputFile = null; - Path tempOutputDir = null; byte[] fileBytes; String fileName; - try { - // Save the uploaded file to a temporary location - tempInputFile = Files.createTempFile("input_", ".pdf"); - inputFile.transferTo(tempInputFile); + try (TempFile inputFileTemp = new TempFile(tempFileManager, ".pdf"); + TempDirectory outputDirTemp = new TempDirectory(tempFileManager)) { - // Prepare the output directory - tempOutputDir = Files.createTempDirectory("output_"); + Path tempInputFile = inputFileTemp.getPath(); + Path tempOutputDir = outputDirTemp.getPath(); + + // Save the uploaded file to a temporary location + inputFile.transferTo(tempInputFile); // Run the pdftohtml command with complex output List command = @@ -208,11 +206,6 @@ public class PDFToFile { log.error("Exception writing zip", e); } fileBytes = byteArrayOutputStream.toByteArray(); - - } finally { - // Clean up the temporary files - if (tempInputFile != null) Files.deleteIfExists(tempInputFile); - if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile()); } return WebResponseUtils.bytesToWebResponse( @@ -245,18 +238,17 @@ public class PDFToFile { return new ResponseEntity<>(HttpStatus.BAD_REQUEST); } - Path tempInputFile = null; - Path tempOutputDir = null; byte[] fileBytes; String fileName; - try { - // Save the uploaded file to a temporary location - tempInputFile = Files.createTempFile("input_", ".pdf"); - inputFile.transferTo(tempInputFile); + try (TempFile inputFileTemp = new TempFile(tempFileManager, ".pdf"); + TempDirectory outputDirTemp = new TempDirectory(tempFileManager)) { - // Prepare the output directory - tempOutputDir = Files.createTempDirectory("output_"); + Path tempInputFile = inputFileTemp.getPath(); + Path tempOutputDir = outputDirTemp.getPath(); + + // Save the uploaded file to a temporary location + inputFile.transferTo(tempInputFile); // Run the LibreOffice command List command = @@ -308,11 +300,6 @@ public class PDFToFile { fileBytes = byteArrayOutputStream.toByteArray(); } - - } finally { - // Clean up the temporary files - Files.deleteIfExists(tempInputFile); - if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile()); } return WebResponseUtils.bytesToWebResponse( fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM); diff --git a/app/common/src/main/java/stirling/software/common/util/misc/ColorSpaceConversionStrategy.java b/app/common/src/main/java/stirling/software/common/util/misc/ColorSpaceConversionStrategy.java index c784bbed6..ca4970b71 100644 --- a/app/common/src/main/java/stirling/software/common/util/misc/ColorSpaceConversionStrategy.java +++ b/app/common/src/main/java/stirling/software/common/util/misc/ColorSpaceConversionStrategy.java @@ -15,22 +15,29 @@ import lombok.extern.slf4j.Slf4j; import stirling.software.common.model.api.misc.ReplaceAndInvert; import stirling.software.common.util.ProcessExecutor; import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult; +import stirling.software.common.util.TempFile; +import stirling.software.common.util.TempFileManager; @Slf4j public class ColorSpaceConversionStrategy extends ReplaceAndInvertColorStrategy { - public ColorSpaceConversionStrategy(MultipartFile file, ReplaceAndInvert replaceAndInvert) { + private final TempFileManager tempFileManager; + + public ColorSpaceConversionStrategy( + MultipartFile file, + ReplaceAndInvert replaceAndInvert, + TempFileManager tempFileManager) { super(file, replaceAndInvert); + this.tempFileManager = tempFileManager; } @Override public InputStreamResource replace() throws IOException { - Path tempInputFile = null; - Path tempOutputFile = null; + try (TempFile tempInput = new TempFile(tempFileManager, ".pdf"); + TempFile tempOutput = new TempFile(tempFileManager, ".pdf")) { - try { - tempInputFile = Files.createTempFile("colorspace_input_", ".pdf"); - tempOutputFile = Files.createTempFile("colorspace_output_", ".pdf"); + Path tempInputFile = tempInput.getPath(); + Path tempOutputFile = tempOutput.getPath(); Files.write(tempInputFile, getFileInput().getBytes()); @@ -74,21 +81,6 @@ public class ColorSpaceConversionStrategy extends ReplaceAndInvertColorStrategy log.warn("CMYK color space conversion failed", e); throw new IOException( "Failed to convert PDF to CMYK color space: " + e.getMessage(), e); - } finally { - if (tempInputFile != null) { - try { - Files.deleteIfExists(tempInputFile); - } catch (IOException e) { - log.warn("Failed to delete temporary input file: {}", tempInputFile, e); - } - } - if (tempOutputFile != null) { - try { - Files.deleteIfExists(tempOutputFile); - } catch (IOException e) { - log.warn("Failed to delete temporary output file: {}", tempOutputFile, e); - } - } } } } diff --git a/app/common/src/test/java/stirling/software/common/util/PDFToFileTest.java b/app/common/src/test/java/stirling/software/common/util/PDFToFileTest.java index 9a178a400..2ebb58c0d 100644 --- a/app/common/src/test/java/stirling/software/common/util/PDFToFileTest.java +++ b/app/common/src/test/java/stirling/software/common/util/PDFToFileTest.java @@ -5,7 +5,9 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.argThat; +import static org.mockito.Mockito.lenient; import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.when; @@ -47,10 +49,21 @@ class PDFToFileTest { @Mock private ProcessExecutor mockProcessExecutor; @Mock private ProcessExecutorResult mockExecutorResult; + @Mock private TempFileManager mockTempFileManager; @BeforeEach - void setUp() { - pdfToFile = new PDFToFile(); + void setUp() throws IOException { + // Mock the TempFileManager to return real temp files + lenient() + .when(mockTempFileManager.createTempFile(anyString())) + .thenAnswer( + invocation -> + Files.createTempFile("test", invocation.getArgument(0)).toFile()); + lenient() + .when(mockTempFileManager.createTempDirectory()) + .thenAnswer(invocation -> Files.createTempDirectory("test")); + + pdfToFile = new PDFToFile(mockTempFileManager); } @Test diff --git a/app/core/src/main/java/stirling/software/SPDF/Factories/ReplaceAndInvertColorFactory.java b/app/core/src/main/java/stirling/software/SPDF/Factories/ReplaceAndInvertColorFactory.java index e53850ff8..6697beb79 100644 --- a/app/core/src/main/java/stirling/software/SPDF/Factories/ReplaceAndInvertColorFactory.java +++ b/app/core/src/main/java/stirling/software/SPDF/Factories/ReplaceAndInvertColorFactory.java @@ -3,16 +3,22 @@ package stirling.software.SPDF.Factories; import org.springframework.stereotype.Component; import org.springframework.web.multipart.MultipartFile; +import lombok.RequiredArgsConstructor; + import stirling.software.common.model.api.misc.HighContrastColorCombination; import stirling.software.common.model.api.misc.ReplaceAndInvert; +import stirling.software.common.util.TempFileManager; import stirling.software.common.util.misc.ColorSpaceConversionStrategy; import stirling.software.common.util.misc.CustomColorReplaceStrategy; import stirling.software.common.util.misc.InvertFullColorStrategy; import stirling.software.common.util.misc.ReplaceAndInvertColorStrategy; @Component +@RequiredArgsConstructor public class ReplaceAndInvertColorFactory { + private final TempFileManager tempFileManager; + public ReplaceAndInvertColorStrategy replaceAndInvert( MultipartFile file, ReplaceAndInvert replaceAndInvertOption, @@ -30,7 +36,7 @@ public class ReplaceAndInvertColorFactory { highContrastColorCombination); case FULL_INVERSION -> new InvertFullColorStrategy(file, replaceAndInvertOption); case COLOR_SPACE_CONVERSION -> - new ColorSpaceConversionStrategy(file, replaceAndInvertOption); + new ColorSpaceConversionStrategy(file, replaceAndInvertOption, tempFileManager); }; } } diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToHtml.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToHtml.java index ed8f30458..76414ca57 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToHtml.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToHtml.java @@ -11,14 +11,20 @@ import org.springframework.web.multipart.MultipartFile; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; +import lombok.RequiredArgsConstructor; + import stirling.software.common.model.api.PDFFile; import stirling.software.common.util.PDFToFile; +import stirling.software.common.util.TempFileManager; @RestController @Tag(name = "Convert", description = "Convert APIs") @RequestMapping("/api/v1/convert") +@RequiredArgsConstructor public class ConvertPDFToHtml { + private final TempFileManager tempFileManager; + @PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/html") @Operation( summary = "Convert PDF to HTML", @@ -26,7 +32,7 @@ public class ConvertPDFToHtml { "This endpoint converts a PDF file to HTML format. Input:PDF Output:HTML Type:SISO") public ResponseEntity processPdfToHTML(@ModelAttribute PDFFile file) throws Exception { MultipartFile inputFile = file.getFileInput(); - PDFToFile pdfToFile = new PDFToFile(); + PDFToFile pdfToFile = new PDFToFile(tempFileManager); return pdfToFile.processPdfToHtml(inputFile); } } diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java index ac72fb926..d9538de58 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java @@ -24,6 +24,7 @@ import stirling.software.common.model.api.PDFFile; import stirling.software.common.service.CustomPDFDocumentFactory; import stirling.software.common.util.GeneralUtils; import stirling.software.common.util.PDFToFile; +import stirling.software.common.util.TempFileManager; import stirling.software.common.util.WebResponseUtils; @RestController @@ -33,6 +34,7 @@ import stirling.software.common.util.WebResponseUtils; public class ConvertPDFToOffice { private final CustomPDFDocumentFactory pdfDocumentFactory; + private final TempFileManager tempFileManager; @PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/presentation") @Operation( @@ -45,7 +47,7 @@ public class ConvertPDFToOffice { throws IOException, InterruptedException { MultipartFile inputFile = request.getFileInput(); String outputFormat = request.getOutputFormat(); - PDFToFile pdfToFile = new PDFToFile(); + PDFToFile pdfToFile = new PDFToFile(tempFileManager); return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import"); } @@ -70,7 +72,7 @@ public class ConvertPDFToOffice { MediaType.TEXT_PLAIN); } } else { - PDFToFile pdfToFile = new PDFToFile(); + PDFToFile pdfToFile = new PDFToFile(tempFileManager); return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import"); } } @@ -85,7 +87,7 @@ public class ConvertPDFToOffice { throws IOException, InterruptedException { MultipartFile inputFile = request.getFileInput(); String outputFormat = request.getOutputFormat(); - PDFToFile pdfToFile = new PDFToFile(); + PDFToFile pdfToFile = new PDFToFile(tempFileManager); return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import"); } @@ -98,7 +100,7 @@ public class ConvertPDFToOffice { public ResponseEntity processPdfToXML(@ModelAttribute PDFFile file) throws Exception { MultipartFile inputFile = file.getFileInput(); - PDFToFile pdfToFile = new PDFToFile(); + PDFToFile pdfToFile = new PDFToFile(tempFileManager); return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import"); } } diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java index 0338a76c4..79b65ca33 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java @@ -116,101 +116,82 @@ public class OCRController { // Use try-with-resources for proper temp file management try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf"); - TempFile tempOutputFile = new TempFile(tempFileManager, ".pdf")) { + TempFile tempOutputFile = new TempFile(tempFileManager, ".pdf"); + TempFile sidecarTextFile = sidecar ? new TempFile(tempFileManager, ".txt") : null) { inputFile.transferTo(tempInputFile.getFile()); - TempFile sidecarTextFile = null; + // Use OCRmyPDF if available (no fallback - error if it fails) + if (isOcrMyPdfEnabled()) { + processWithOcrMyPdf( + selectedLanguages, + sidecar, + deskew, + clean, + cleanFinal, + ocrType, + ocrRenderType, + removeImagesAfter, + tempInputFile.getPath(), + tempOutputFile.getPath(), + sidecarTextFile != null ? sidecarTextFile.getPath() : null); + log.info("OCRmyPDF processing completed successfully"); + } + // Use Tesseract only if OCRmyPDF is not available + else if (isTesseractEnabled()) { + processWithTesseract( + selectedLanguages, + ocrType, + tempInputFile.getPath(), + tempOutputFile.getPath()); + log.info("Tesseract processing completed successfully"); + } else { + throw ExceptionUtils.createOcrToolsUnavailableException(); + } - try { - // Use OCRmyPDF if available (no fallback - error if it fails) - if (isOcrMyPdfEnabled()) { - if (sidecar) { - sidecarTextFile = new TempFile(tempFileManager, ".txt"); - } + // Read the processed PDF file + byte[] pdfBytes = Files.readAllBytes(tempOutputFile.getPath()); - processWithOcrMyPdf( - selectedLanguages, - sidecar, - deskew, - clean, - cleanFinal, - ocrType, - ocrRenderType, - removeImagesAfter, - tempInputFile.getPath(), - tempOutputFile.getPath(), - sidecarTextFile != null ? sidecarTextFile.getPath() : null); - log.info("OCRmyPDF processing completed successfully"); - } - // Use Tesseract only if OCRmyPDF is not available - else if (isTesseractEnabled()) { - processWithTesseract( - selectedLanguages, - ocrType, - tempInputFile.getPath(), - tempOutputFile.getPath()); - log.info("Tesseract processing completed successfully"); - } else { - throw ExceptionUtils.createOcrToolsUnavailableException(); - } + // Return the OCR processed PDF as a response + String outputFilename = + GeneralUtils.removeExtension( + Filenames.toSimpleFileName(inputFile.getOriginalFilename())) + + "_OCR.pdf"; - // Read the processed PDF file - byte[] pdfBytes = Files.readAllBytes(tempOutputFile.getPath()); - - // Return the OCR processed PDF as a response - String outputFilename = + if (sidecar && sidecarTextFile != null) { + // Create a zip file containing both the PDF and the text file + String outputZipFilename = GeneralUtils.removeExtension( Filenames.toSimpleFileName(inputFile.getOriginalFilename())) - + "_OCR.pdf"; + + "_OCR.zip"; - if (sidecar && sidecarTextFile != null) { - // Create a zip file containing both the PDF and the text file - String outputZipFilename = - GeneralUtils.removeExtension( - Filenames.toSimpleFileName( - inputFile.getOriginalFilename())) - + "_OCR.zip"; + try (TempFile tempZipFile = new TempFile(tempFileManager, ".zip"); + ZipOutputStream zipOut = + new ZipOutputStream(Files.newOutputStream(tempZipFile.getPath()))) { - try (TempFile tempZipFile = new TempFile(tempFileManager, ".zip"); - ZipOutputStream zipOut = - new ZipOutputStream( - Files.newOutputStream(tempZipFile.getPath()))) { + // Add PDF file to the zip + ZipEntry pdfEntry = new ZipEntry(outputFilename); + zipOut.putNextEntry(pdfEntry); + zipOut.write(pdfBytes); + zipOut.closeEntry(); - // Add PDF file to the zip - ZipEntry pdfEntry = new ZipEntry(outputFilename); - zipOut.putNextEntry(pdfEntry); - zipOut.write(pdfBytes); - zipOut.closeEntry(); + // Add text file to the zip + ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt")); + zipOut.putNextEntry(txtEntry); + Files.copy(sidecarTextFile.getPath(), zipOut); + zipOut.closeEntry(); - // Add text file to the zip - ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt")); - zipOut.putNextEntry(txtEntry); - Files.copy(sidecarTextFile.getPath(), zipOut); - zipOut.closeEntry(); + zipOut.finish(); - zipOut.finish(); + byte[] zipBytes = Files.readAllBytes(tempZipFile.getPath()); - byte[] zipBytes = Files.readAllBytes(tempZipFile.getPath()); - - // Return the zip file containing both the PDF and the text file - return WebResponseUtils.bytesToWebResponse( - zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM); - } - } else { - // Return the OCR processed PDF as a response - return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); - } - - } finally { - // Clean up sidecar temp file if created - if (sidecarTextFile != null) { - try { - sidecarTextFile.close(); - } catch (Exception e) { - log.warn("Failed to close sidecar temp file", e); - } + // Return the zip file containing both the PDF and the text file + return WebResponseUtils.bytesToWebResponse( + zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM); } + } else { + // Return the OCR processed PDF as a response + return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); } } } diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/converters/ConvertPDFToMarkdown.java b/app/core/src/main/java/stirling/software/SPDF/model/api/converters/ConvertPDFToMarkdown.java index d5e4aa57a..c7a6bd7af 100644 --- a/app/core/src/main/java/stirling/software/SPDF/model/api/converters/ConvertPDFToMarkdown.java +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/converters/ConvertPDFToMarkdown.java @@ -11,14 +11,20 @@ import org.springframework.web.multipart.MultipartFile; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; +import lombok.RequiredArgsConstructor; + import stirling.software.common.model.api.PDFFile; import stirling.software.common.util.PDFToFile; +import stirling.software.common.util.TempFileManager; @RestController @Tag(name = "Convert", description = "Convert APIs") @RequestMapping("/api/v1/convert") +@RequiredArgsConstructor public class ConvertPDFToMarkdown { + private final TempFileManager tempFileManager; + @PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/markdown") @Operation( summary = "Convert PDF to Markdown", @@ -27,7 +33,7 @@ public class ConvertPDFToMarkdown { public ResponseEntity processPdfToMarkdown(@ModelAttribute PDFFile file) throws Exception { MultipartFile inputFile = file.getFileInput(); - PDFToFile pdfToFile = new PDFToFile(); + PDFToFile pdfToFile = new PDFToFile(tempFileManager); return pdfToFile.processPdfToMarkdown(inputFile); } }