refactor: Replace manual resource handling with try-with-resources (#4593)

# Description of Changes


Explanation on try-with-resource:

> Many resources in Java need be closed after they have been used. If
they are not, the garbage collector cannot reclaim the resources'
memory, and they are still considered to be in use by the operating
system. Such resources are considered to be leaked, which can lead to
performance issues.
> 
> Java 7 introduced the try-with-resources statement, which guarantees
that the resource in question will be closed.
> 
> try (InputStream input = Files.newInputStream(path)) {
>   // "input" will be closed after the execution of this block
> }
> 
> This syntax is safer than the traditional method using try, catch, and
finally and hence should be preferred.

https://rules.sonarsource.com/java/tag/java8/RSPEC-2093/

<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-10-04 11:49:39 +02:00 committed by GitHub
parent 575854952a
commit b661eb8ff0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 145 additions and 152 deletions

View File

@ -25,15 +25,19 @@ import com.vladsch.flexmark.util.data.MutableDataSet;
import io.github.pixee.security.Filenames;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
@Slf4j
@NoArgsConstructor
public class PDFToFile {
private final TempFileManager tempFileManager;
public PDFToFile(TempFileManager tempFileManager) {
this.tempFileManager = tempFileManager;
}
public ResponseEntity<byte[]> processPdfToMarkdown(MultipartFile inputFile)
throws IOException, InterruptedException {
if (!MediaType.APPLICATION_PDF_VALUE.equals(inputFile.getContentType())) {
@ -71,15 +75,12 @@ public class PDFToFile {
pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
}
Path tempInputFile = null;
Path tempOutputDir = null;
byte[] fileBytes;
String fileName;
try {
tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile);
tempOutputDir = Files.createTempDirectory("output_");
try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf");
TempDirectory tempOutputDir = new TempDirectory(tempFileManager)) {
inputFile.transferTo(tempInputFile.getFile());
List<String> command =
new ArrayList<>(
@ -88,14 +89,16 @@ public class PDFToFile {
"-s",
"-noframes",
"-c",
tempInputFile.toString(),
tempInputFile.getAbsolutePath(),
pdfBaseName));
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.PDFTOHTML)
.runCommandWithOutputHandling(command, tempOutputDir.toFile());
.runCommandWithOutputHandling(
command, tempOutputDir.getPath().toFile());
// Process HTML files to Markdown
File[] outputFiles = Objects.requireNonNull(tempOutputDir.toFile().listFiles());
File[] outputFiles =
Objects.requireNonNull(tempOutputDir.getPath().toFile().listFiles());
List<File> markdownFiles = new ArrayList<>();
// Convert HTML files to Markdown
@ -105,7 +108,7 @@ public class PDFToFile {
String markdown = htmlToMarkdownConverter.convert(html);
String mdFileName = outputFile.getName().replace(".html", ".md");
File mdFile = new File(tempOutputDir.toFile(), mdFileName);
File mdFile = new File(tempOutputDir.getPath().toFile(), mdFileName);
Files.writeString(mdFile.toPath(), markdown);
markdownFiles.add(mdFile);
}
@ -142,10 +145,6 @@ public class PDFToFile {
fileBytes = byteArrayOutputStream.toByteArray();
}
} finally {
if (tempInputFile != null) Files.deleteIfExists(tempInputFile);
if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile());
}
return WebResponseUtils.bytesToWebResponse(
fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM);
@ -164,18 +163,17 @@ public class PDFToFile {
pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
}
Path tempInputFile = null;
Path tempOutputDir = null;
byte[] fileBytes;
String fileName;
try {
// Save the uploaded file to a temporary location
tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile);
try (TempFile inputFileTemp = new TempFile(tempFileManager, ".pdf");
TempDirectory outputDirTemp = new TempDirectory(tempFileManager)) {
// Prepare the output directory
tempOutputDir = Files.createTempDirectory("output_");
Path tempInputFile = inputFileTemp.getPath();
Path tempOutputDir = outputDirTemp.getPath();
// Save the uploaded file to a temporary location
inputFile.transferTo(tempInputFile);
// Run the pdftohtml command with complex output
List<String> command =
@ -208,11 +206,6 @@ public class PDFToFile {
log.error("Exception writing zip", e);
}
fileBytes = byteArrayOutputStream.toByteArray();
} finally {
// Clean up the temporary files
if (tempInputFile != null) Files.deleteIfExists(tempInputFile);
if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile());
}
return WebResponseUtils.bytesToWebResponse(
@ -245,18 +238,17 @@ public class PDFToFile {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}
Path tempInputFile = null;
Path tempOutputDir = null;
byte[] fileBytes;
String fileName;
try {
// Save the uploaded file to a temporary location
tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile);
try (TempFile inputFileTemp = new TempFile(tempFileManager, ".pdf");
TempDirectory outputDirTemp = new TempDirectory(tempFileManager)) {
// Prepare the output directory
tempOutputDir = Files.createTempDirectory("output_");
Path tempInputFile = inputFileTemp.getPath();
Path tempOutputDir = outputDirTemp.getPath();
// Save the uploaded file to a temporary location
inputFile.transferTo(tempInputFile);
// Run the LibreOffice command
List<String> command =
@ -308,11 +300,6 @@ public class PDFToFile {
fileBytes = byteArrayOutputStream.toByteArray();
}
} finally {
// Clean up the temporary files
Files.deleteIfExists(tempInputFile);
if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile());
}
return WebResponseUtils.bytesToWebResponse(
fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM);

View File

@ -15,22 +15,29 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.api.misc.ReplaceAndInvert;
import stirling.software.common.util.ProcessExecutor;
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
import stirling.software.common.util.TempFile;
import stirling.software.common.util.TempFileManager;
@Slf4j
public class ColorSpaceConversionStrategy extends ReplaceAndInvertColorStrategy {
public ColorSpaceConversionStrategy(MultipartFile file, ReplaceAndInvert replaceAndInvert) {
private final TempFileManager tempFileManager;
public ColorSpaceConversionStrategy(
MultipartFile file,
ReplaceAndInvert replaceAndInvert,
TempFileManager tempFileManager) {
super(file, replaceAndInvert);
this.tempFileManager = tempFileManager;
}
@Override
public InputStreamResource replace() throws IOException {
Path tempInputFile = null;
Path tempOutputFile = null;
try (TempFile tempInput = new TempFile(tempFileManager, ".pdf");
TempFile tempOutput = new TempFile(tempFileManager, ".pdf")) {
try {
tempInputFile = Files.createTempFile("colorspace_input_", ".pdf");
tempOutputFile = Files.createTempFile("colorspace_output_", ".pdf");
Path tempInputFile = tempInput.getPath();
Path tempOutputFile = tempOutput.getPath();
Files.write(tempInputFile, getFileInput().getBytes());
@ -74,21 +81,6 @@ public class ColorSpaceConversionStrategy extends ReplaceAndInvertColorStrategy
log.warn("CMYK color space conversion failed", e);
throw new IOException(
"Failed to convert PDF to CMYK color space: " + e.getMessage(), e);
} finally {
if (tempInputFile != null) {
try {
Files.deleteIfExists(tempInputFile);
} catch (IOException e) {
log.warn("Failed to delete temporary input file: {}", tempInputFile, e);
}
}
if (tempOutputFile != null) {
try {
Files.deleteIfExists(tempOutputFile);
} catch (IOException e) {
log.warn("Failed to delete temporary output file: {}", tempOutputFile, e);
}
}
}
}
}

View File

@ -5,7 +5,9 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyList;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.ArgumentMatchers.argThat;
import static org.mockito.Mockito.lenient;
import static org.mockito.Mockito.mockStatic;
import static org.mockito.Mockito.when;
@ -47,10 +49,21 @@ class PDFToFileTest {
@Mock private ProcessExecutor mockProcessExecutor;
@Mock private ProcessExecutorResult mockExecutorResult;
@Mock private TempFileManager mockTempFileManager;
@BeforeEach
void setUp() {
pdfToFile = new PDFToFile();
void setUp() throws IOException {
// Mock the TempFileManager to return real temp files
lenient()
.when(mockTempFileManager.createTempFile(anyString()))
.thenAnswer(
invocation ->
Files.createTempFile("test", invocation.getArgument(0)).toFile());
lenient()
.when(mockTempFileManager.createTempDirectory())
.thenAnswer(invocation -> Files.createTempDirectory("test"));
pdfToFile = new PDFToFile(mockTempFileManager);
}
@Test

View File

@ -3,16 +3,22 @@ package stirling.software.SPDF.Factories;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;
import lombok.RequiredArgsConstructor;
import stirling.software.common.model.api.misc.HighContrastColorCombination;
import stirling.software.common.model.api.misc.ReplaceAndInvert;
import stirling.software.common.util.TempFileManager;
import stirling.software.common.util.misc.ColorSpaceConversionStrategy;
import stirling.software.common.util.misc.CustomColorReplaceStrategy;
import stirling.software.common.util.misc.InvertFullColorStrategy;
import stirling.software.common.util.misc.ReplaceAndInvertColorStrategy;
@Component
@RequiredArgsConstructor
public class ReplaceAndInvertColorFactory {
private final TempFileManager tempFileManager;
public ReplaceAndInvertColorStrategy replaceAndInvert(
MultipartFile file,
ReplaceAndInvert replaceAndInvertOption,
@ -30,7 +36,7 @@ public class ReplaceAndInvertColorFactory {
highContrastColorCombination);
case FULL_INVERSION -> new InvertFullColorStrategy(file, replaceAndInvertOption);
case COLOR_SPACE_CONVERSION ->
new ColorSpaceConversionStrategy(file, replaceAndInvertOption);
new ColorSpaceConversionStrategy(file, replaceAndInvertOption, tempFileManager);
};
}
}

View File

@ -11,14 +11,20 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.util.PDFToFile;
import stirling.software.common.util.TempFileManager;
@RestController
@Tag(name = "Convert", description = "Convert APIs")
@RequestMapping("/api/v1/convert")
@RequiredArgsConstructor
public class ConvertPDFToHtml {
private final TempFileManager tempFileManager;
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/html")
@Operation(
summary = "Convert PDF to HTML",
@ -26,7 +32,7 @@ public class ConvertPDFToHtml {
"This endpoint converts a PDF file to HTML format. Input:PDF Output:HTML Type:SISO")
public ResponseEntity<byte[]> processPdfToHTML(@ModelAttribute PDFFile file) throws Exception {
MultipartFile inputFile = file.getFileInput();
PDFToFile pdfToFile = new PDFToFile();
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
return pdfToFile.processPdfToHtml(inputFile);
}
}

View File

@ -24,6 +24,7 @@ import stirling.software.common.model.api.PDFFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.PDFToFile;
import stirling.software.common.util.TempFileManager;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -33,6 +34,7 @@ import stirling.software.common.util.WebResponseUtils;
public class ConvertPDFToOffice {
private final CustomPDFDocumentFactory pdfDocumentFactory;
private final TempFileManager tempFileManager;
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/presentation")
@Operation(
@ -45,7 +47,7 @@ public class ConvertPDFToOffice {
throws IOException, InterruptedException {
MultipartFile inputFile = request.getFileInput();
String outputFormat = request.getOutputFormat();
PDFToFile pdfToFile = new PDFToFile();
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import");
}
@ -70,7 +72,7 @@ public class ConvertPDFToOffice {
MediaType.TEXT_PLAIN);
}
} else {
PDFToFile pdfToFile = new PDFToFile();
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
}
}
@ -85,7 +87,7 @@ public class ConvertPDFToOffice {
throws IOException, InterruptedException {
MultipartFile inputFile = request.getFileInput();
String outputFormat = request.getOutputFormat();
PDFToFile pdfToFile = new PDFToFile();
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
}
@ -98,7 +100,7 @@ public class ConvertPDFToOffice {
public ResponseEntity<byte[]> processPdfToXML(@ModelAttribute PDFFile file) throws Exception {
MultipartFile inputFile = file.getFileInput();
PDFToFile pdfToFile = new PDFToFile();
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import");
}
}

View File

@ -116,101 +116,82 @@ public class OCRController {
// Use try-with-resources for proper temp file management
try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf");
TempFile tempOutputFile = new TempFile(tempFileManager, ".pdf")) {
TempFile tempOutputFile = new TempFile(tempFileManager, ".pdf");
TempFile sidecarTextFile = sidecar ? new TempFile(tempFileManager, ".txt") : null) {
inputFile.transferTo(tempInputFile.getFile());
TempFile sidecarTextFile = null;
// Use OCRmyPDF if available (no fallback - error if it fails)
if (isOcrMyPdfEnabled()) {
processWithOcrMyPdf(
selectedLanguages,
sidecar,
deskew,
clean,
cleanFinal,
ocrType,
ocrRenderType,
removeImagesAfter,
tempInputFile.getPath(),
tempOutputFile.getPath(),
sidecarTextFile != null ? sidecarTextFile.getPath() : null);
log.info("OCRmyPDF processing completed successfully");
}
// Use Tesseract only if OCRmyPDF is not available
else if (isTesseractEnabled()) {
processWithTesseract(
selectedLanguages,
ocrType,
tempInputFile.getPath(),
tempOutputFile.getPath());
log.info("Tesseract processing completed successfully");
} else {
throw ExceptionUtils.createOcrToolsUnavailableException();
}
try {
// Use OCRmyPDF if available (no fallback - error if it fails)
if (isOcrMyPdfEnabled()) {
if (sidecar) {
sidecarTextFile = new TempFile(tempFileManager, ".txt");
}
// Read the processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile.getPath());
processWithOcrMyPdf(
selectedLanguages,
sidecar,
deskew,
clean,
cleanFinal,
ocrType,
ocrRenderType,
removeImagesAfter,
tempInputFile.getPath(),
tempOutputFile.getPath(),
sidecarTextFile != null ? sidecarTextFile.getPath() : null);
log.info("OCRmyPDF processing completed successfully");
}
// Use Tesseract only if OCRmyPDF is not available
else if (isTesseractEnabled()) {
processWithTesseract(
selectedLanguages,
ocrType,
tempInputFile.getPath(),
tempOutputFile.getPath());
log.info("Tesseract processing completed successfully");
} else {
throw ExceptionUtils.createOcrToolsUnavailableException();
}
// Return the OCR processed PDF as a response
String outputFilename =
GeneralUtils.removeExtension(
Filenames.toSimpleFileName(inputFile.getOriginalFilename()))
+ "_OCR.pdf";
// Read the processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile.getPath());
// Return the OCR processed PDF as a response
String outputFilename =
if (sidecar && sidecarTextFile != null) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename =
GeneralUtils.removeExtension(
Filenames.toSimpleFileName(inputFile.getOriginalFilename()))
+ "_OCR.pdf";
+ "_OCR.zip";
if (sidecar && sidecarTextFile != null) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename =
GeneralUtils.removeExtension(
Filenames.toSimpleFileName(
inputFile.getOriginalFilename()))
+ "_OCR.zip";
try (TempFile tempZipFile = new TempFile(tempFileManager, ".zip");
ZipOutputStream zipOut =
new ZipOutputStream(Files.newOutputStream(tempZipFile.getPath()))) {
try (TempFile tempZipFile = new TempFile(tempFileManager, ".zip");
ZipOutputStream zipOut =
new ZipOutputStream(
Files.newOutputStream(tempZipFile.getPath()))) {
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
zipOut.write(pdfBytes);
zipOut.closeEntry();
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
zipOut.write(pdfBytes);
zipOut.closeEntry();
// Add text file to the zip
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
zipOut.putNextEntry(txtEntry);
Files.copy(sidecarTextFile.getPath(), zipOut);
zipOut.closeEntry();
// Add text file to the zip
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
zipOut.putNextEntry(txtEntry);
Files.copy(sidecarTextFile.getPath(), zipOut);
zipOut.closeEntry();
zipOut.finish();
zipOut.finish();
byte[] zipBytes = Files.readAllBytes(tempZipFile.getPath());
byte[] zipBytes = Files.readAllBytes(tempZipFile.getPath());
// Return the zip file containing both the PDF and the text file
return WebResponseUtils.bytesToWebResponse(
zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
}
} else {
// Return the OCR processed PDF as a response
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
} finally {
// Clean up sidecar temp file if created
if (sidecarTextFile != null) {
try {
sidecarTextFile.close();
} catch (Exception e) {
log.warn("Failed to close sidecar temp file", e);
}
// Return the zip file containing both the PDF and the text file
return WebResponseUtils.bytesToWebResponse(
zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
}
} else {
// Return the OCR processed PDF as a response
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
}
}

View File

@ -11,14 +11,20 @@ import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.util.PDFToFile;
import stirling.software.common.util.TempFileManager;
@RestController
@Tag(name = "Convert", description = "Convert APIs")
@RequestMapping("/api/v1/convert")
@RequiredArgsConstructor
public class ConvertPDFToMarkdown {
private final TempFileManager tempFileManager;
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/markdown")
@Operation(
summary = "Convert PDF to Markdown",
@ -27,7 +33,7 @@ public class ConvertPDFToMarkdown {
public ResponseEntity<byte[]> processPdfToMarkdown(@ModelAttribute PDFFile file)
throws Exception {
MultipartFile inputFile = file.getFileInput();
PDFToFile pdfToFile = new PDFToFile();
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
return pdfToFile.processPdfToMarkdown(inputFile);
}
}