mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
refactor: Replace manual resource handling with try-with-resources (#4593)
# Description of Changes
Explanation on try-with-resource:
> Many resources in Java need be closed after they have been used. If
they are not, the garbage collector cannot reclaim the resources'
memory, and they are still considered to be in use by the operating
system. Such resources are considered to be leaked, which can lead to
performance issues.
>
> Java 7 introduced the try-with-resources statement, which guarantees
that the resource in question will be closed.
>
> try (InputStream input = Files.newInputStream(path)) {
> // "input" will be closed after the execution of this block
> }
>
> This syntax is safer than the traditional method using try, catch, and
finally and hence should be preferred.
https://rules.sonarsource.com/java/tag/java8/RSPEC-2093/
<!--
Please provide a summary of the changes, including:
- What was changed
- Why the change was made
- Any challenges encountered
Closes #(issue_number)
-->
---
## Checklist
### General
- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings
### Documentation
- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)
### UI Changes (if applicable)
- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)
### Testing (if applicable)
- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.
---------
Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
575854952a
commit
b661eb8ff0
@ -25,15 +25,19 @@ import com.vladsch.flexmark.util.data.MutableDataSet;
|
|||||||
|
|
||||||
import io.github.pixee.security.Filenames;
|
import io.github.pixee.security.Filenames;
|
||||||
|
|
||||||
import lombok.NoArgsConstructor;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
|
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@NoArgsConstructor
|
|
||||||
public class PDFToFile {
|
public class PDFToFile {
|
||||||
|
|
||||||
|
private final TempFileManager tempFileManager;
|
||||||
|
|
||||||
|
public PDFToFile(TempFileManager tempFileManager) {
|
||||||
|
this.tempFileManager = tempFileManager;
|
||||||
|
}
|
||||||
|
|
||||||
public ResponseEntity<byte[]> processPdfToMarkdown(MultipartFile inputFile)
|
public ResponseEntity<byte[]> processPdfToMarkdown(MultipartFile inputFile)
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
if (!MediaType.APPLICATION_PDF_VALUE.equals(inputFile.getContentType())) {
|
if (!MediaType.APPLICATION_PDF_VALUE.equals(inputFile.getContentType())) {
|
||||||
@ -71,15 +75,12 @@ public class PDFToFile {
|
|||||||
pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
|
pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
|
||||||
}
|
}
|
||||||
|
|
||||||
Path tempInputFile = null;
|
|
||||||
Path tempOutputDir = null;
|
|
||||||
byte[] fileBytes;
|
byte[] fileBytes;
|
||||||
String fileName;
|
String fileName;
|
||||||
|
|
||||||
try {
|
try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf");
|
||||||
tempInputFile = Files.createTempFile("input_", ".pdf");
|
TempDirectory tempOutputDir = new TempDirectory(tempFileManager)) {
|
||||||
inputFile.transferTo(tempInputFile);
|
inputFile.transferTo(tempInputFile.getFile());
|
||||||
tempOutputDir = Files.createTempDirectory("output_");
|
|
||||||
|
|
||||||
List<String> command =
|
List<String> command =
|
||||||
new ArrayList<>(
|
new ArrayList<>(
|
||||||
@ -88,14 +89,16 @@ public class PDFToFile {
|
|||||||
"-s",
|
"-s",
|
||||||
"-noframes",
|
"-noframes",
|
||||||
"-c",
|
"-c",
|
||||||
tempInputFile.toString(),
|
tempInputFile.getAbsolutePath(),
|
||||||
pdfBaseName));
|
pdfBaseName));
|
||||||
|
|
||||||
ProcessExecutorResult returnCode =
|
ProcessExecutorResult returnCode =
|
||||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.PDFTOHTML)
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.PDFTOHTML)
|
||||||
.runCommandWithOutputHandling(command, tempOutputDir.toFile());
|
.runCommandWithOutputHandling(
|
||||||
|
command, tempOutputDir.getPath().toFile());
|
||||||
// Process HTML files to Markdown
|
// Process HTML files to Markdown
|
||||||
File[] outputFiles = Objects.requireNonNull(tempOutputDir.toFile().listFiles());
|
File[] outputFiles =
|
||||||
|
Objects.requireNonNull(tempOutputDir.getPath().toFile().listFiles());
|
||||||
List<File> markdownFiles = new ArrayList<>();
|
List<File> markdownFiles = new ArrayList<>();
|
||||||
|
|
||||||
// Convert HTML files to Markdown
|
// Convert HTML files to Markdown
|
||||||
@ -105,7 +108,7 @@ public class PDFToFile {
|
|||||||
String markdown = htmlToMarkdownConverter.convert(html);
|
String markdown = htmlToMarkdownConverter.convert(html);
|
||||||
|
|
||||||
String mdFileName = outputFile.getName().replace(".html", ".md");
|
String mdFileName = outputFile.getName().replace(".html", ".md");
|
||||||
File mdFile = new File(tempOutputDir.toFile(), mdFileName);
|
File mdFile = new File(tempOutputDir.getPath().toFile(), mdFileName);
|
||||||
Files.writeString(mdFile.toPath(), markdown);
|
Files.writeString(mdFile.toPath(), markdown);
|
||||||
markdownFiles.add(mdFile);
|
markdownFiles.add(mdFile);
|
||||||
}
|
}
|
||||||
@ -142,10 +145,6 @@ public class PDFToFile {
|
|||||||
|
|
||||||
fileBytes = byteArrayOutputStream.toByteArray();
|
fileBytes = byteArrayOutputStream.toByteArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
} finally {
|
|
||||||
if (tempInputFile != null) Files.deleteIfExists(tempInputFile);
|
|
||||||
if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile());
|
|
||||||
}
|
}
|
||||||
return WebResponseUtils.bytesToWebResponse(
|
return WebResponseUtils.bytesToWebResponse(
|
||||||
fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
||||||
@ -164,18 +163,17 @@ public class PDFToFile {
|
|||||||
pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
|
pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
|
||||||
}
|
}
|
||||||
|
|
||||||
Path tempInputFile = null;
|
|
||||||
Path tempOutputDir = null;
|
|
||||||
byte[] fileBytes;
|
byte[] fileBytes;
|
||||||
String fileName;
|
String fileName;
|
||||||
|
|
||||||
try {
|
try (TempFile inputFileTemp = new TempFile(tempFileManager, ".pdf");
|
||||||
// Save the uploaded file to a temporary location
|
TempDirectory outputDirTemp = new TempDirectory(tempFileManager)) {
|
||||||
tempInputFile = Files.createTempFile("input_", ".pdf");
|
|
||||||
inputFile.transferTo(tempInputFile);
|
|
||||||
|
|
||||||
// Prepare the output directory
|
Path tempInputFile = inputFileTemp.getPath();
|
||||||
tempOutputDir = Files.createTempDirectory("output_");
|
Path tempOutputDir = outputDirTemp.getPath();
|
||||||
|
|
||||||
|
// Save the uploaded file to a temporary location
|
||||||
|
inputFile.transferTo(tempInputFile);
|
||||||
|
|
||||||
// Run the pdftohtml command with complex output
|
// Run the pdftohtml command with complex output
|
||||||
List<String> command =
|
List<String> command =
|
||||||
@ -208,11 +206,6 @@ public class PDFToFile {
|
|||||||
log.error("Exception writing zip", e);
|
log.error("Exception writing zip", e);
|
||||||
}
|
}
|
||||||
fileBytes = byteArrayOutputStream.toByteArray();
|
fileBytes = byteArrayOutputStream.toByteArray();
|
||||||
|
|
||||||
} finally {
|
|
||||||
// Clean up the temporary files
|
|
||||||
if (tempInputFile != null) Files.deleteIfExists(tempInputFile);
|
|
||||||
if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return WebResponseUtils.bytesToWebResponse(
|
return WebResponseUtils.bytesToWebResponse(
|
||||||
@ -245,18 +238,17 @@ public class PDFToFile {
|
|||||||
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
||||||
}
|
}
|
||||||
|
|
||||||
Path tempInputFile = null;
|
|
||||||
Path tempOutputDir = null;
|
|
||||||
byte[] fileBytes;
|
byte[] fileBytes;
|
||||||
String fileName;
|
String fileName;
|
||||||
|
|
||||||
try {
|
try (TempFile inputFileTemp = new TempFile(tempFileManager, ".pdf");
|
||||||
// Save the uploaded file to a temporary location
|
TempDirectory outputDirTemp = new TempDirectory(tempFileManager)) {
|
||||||
tempInputFile = Files.createTempFile("input_", ".pdf");
|
|
||||||
inputFile.transferTo(tempInputFile);
|
|
||||||
|
|
||||||
// Prepare the output directory
|
Path tempInputFile = inputFileTemp.getPath();
|
||||||
tempOutputDir = Files.createTempDirectory("output_");
|
Path tempOutputDir = outputDirTemp.getPath();
|
||||||
|
|
||||||
|
// Save the uploaded file to a temporary location
|
||||||
|
inputFile.transferTo(tempInputFile);
|
||||||
|
|
||||||
// Run the LibreOffice command
|
// Run the LibreOffice command
|
||||||
List<String> command =
|
List<String> command =
|
||||||
@ -308,11 +300,6 @@ public class PDFToFile {
|
|||||||
|
|
||||||
fileBytes = byteArrayOutputStream.toByteArray();
|
fileBytes = byteArrayOutputStream.toByteArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
} finally {
|
|
||||||
// Clean up the temporary files
|
|
||||||
Files.deleteIfExists(tempInputFile);
|
|
||||||
if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile());
|
|
||||||
}
|
}
|
||||||
return WebResponseUtils.bytesToWebResponse(
|
return WebResponseUtils.bytesToWebResponse(
|
||||||
fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
||||||
|
|||||||
@ -15,22 +15,29 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
import stirling.software.common.model.api.misc.ReplaceAndInvert;
|
import stirling.software.common.model.api.misc.ReplaceAndInvert;
|
||||||
import stirling.software.common.util.ProcessExecutor;
|
import stirling.software.common.util.ProcessExecutor;
|
||||||
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
|
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
|
||||||
|
import stirling.software.common.util.TempFile;
|
||||||
|
import stirling.software.common.util.TempFileManager;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class ColorSpaceConversionStrategy extends ReplaceAndInvertColorStrategy {
|
public class ColorSpaceConversionStrategy extends ReplaceAndInvertColorStrategy {
|
||||||
|
|
||||||
public ColorSpaceConversionStrategy(MultipartFile file, ReplaceAndInvert replaceAndInvert) {
|
private final TempFileManager tempFileManager;
|
||||||
|
|
||||||
|
public ColorSpaceConversionStrategy(
|
||||||
|
MultipartFile file,
|
||||||
|
ReplaceAndInvert replaceAndInvert,
|
||||||
|
TempFileManager tempFileManager) {
|
||||||
super(file, replaceAndInvert);
|
super(file, replaceAndInvert);
|
||||||
|
this.tempFileManager = tempFileManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InputStreamResource replace() throws IOException {
|
public InputStreamResource replace() throws IOException {
|
||||||
Path tempInputFile = null;
|
try (TempFile tempInput = new TempFile(tempFileManager, ".pdf");
|
||||||
Path tempOutputFile = null;
|
TempFile tempOutput = new TempFile(tempFileManager, ".pdf")) {
|
||||||
|
|
||||||
try {
|
Path tempInputFile = tempInput.getPath();
|
||||||
tempInputFile = Files.createTempFile("colorspace_input_", ".pdf");
|
Path tempOutputFile = tempOutput.getPath();
|
||||||
tempOutputFile = Files.createTempFile("colorspace_output_", ".pdf");
|
|
||||||
|
|
||||||
Files.write(tempInputFile, getFileInput().getBytes());
|
Files.write(tempInputFile, getFileInput().getBytes());
|
||||||
|
|
||||||
@ -74,21 +81,6 @@ public class ColorSpaceConversionStrategy extends ReplaceAndInvertColorStrategy
|
|||||||
log.warn("CMYK color space conversion failed", e);
|
log.warn("CMYK color space conversion failed", e);
|
||||||
throw new IOException(
|
throw new IOException(
|
||||||
"Failed to convert PDF to CMYK color space: " + e.getMessage(), e);
|
"Failed to convert PDF to CMYK color space: " + e.getMessage(), e);
|
||||||
} finally {
|
|
||||||
if (tempInputFile != null) {
|
|
||||||
try {
|
|
||||||
Files.deleteIfExists(tempInputFile);
|
|
||||||
} catch (IOException e) {
|
|
||||||
log.warn("Failed to delete temporary input file: {}", tempInputFile, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (tempOutputFile != null) {
|
|
||||||
try {
|
|
||||||
Files.deleteIfExists(tempOutputFile);
|
|
||||||
} catch (IOException e) {
|
|
||||||
log.warn("Failed to delete temporary output file: {}", tempOutputFile, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,7 +5,9 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
|
|||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
import static org.mockito.ArgumentMatchers.any;
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
import static org.mockito.ArgumentMatchers.anyList;
|
import static org.mockito.ArgumentMatchers.anyList;
|
||||||
|
import static org.mockito.ArgumentMatchers.anyString;
|
||||||
import static org.mockito.ArgumentMatchers.argThat;
|
import static org.mockito.ArgumentMatchers.argThat;
|
||||||
|
import static org.mockito.Mockito.lenient;
|
||||||
import static org.mockito.Mockito.mockStatic;
|
import static org.mockito.Mockito.mockStatic;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
@ -47,10 +49,21 @@ class PDFToFileTest {
|
|||||||
|
|
||||||
@Mock private ProcessExecutor mockProcessExecutor;
|
@Mock private ProcessExecutor mockProcessExecutor;
|
||||||
@Mock private ProcessExecutorResult mockExecutorResult;
|
@Mock private ProcessExecutorResult mockExecutorResult;
|
||||||
|
@Mock private TempFileManager mockTempFileManager;
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
void setUp() {
|
void setUp() throws IOException {
|
||||||
pdfToFile = new PDFToFile();
|
// Mock the TempFileManager to return real temp files
|
||||||
|
lenient()
|
||||||
|
.when(mockTempFileManager.createTempFile(anyString()))
|
||||||
|
.thenAnswer(
|
||||||
|
invocation ->
|
||||||
|
Files.createTempFile("test", invocation.getArgument(0)).toFile());
|
||||||
|
lenient()
|
||||||
|
.when(mockTempFileManager.createTempDirectory())
|
||||||
|
.thenAnswer(invocation -> Files.createTempDirectory("test"));
|
||||||
|
|
||||||
|
pdfToFile = new PDFToFile(mockTempFileManager);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|||||||
@ -3,16 +3,22 @@ package stirling.software.SPDF.Factories;
|
|||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
import stirling.software.common.model.api.misc.HighContrastColorCombination;
|
import stirling.software.common.model.api.misc.HighContrastColorCombination;
|
||||||
import stirling.software.common.model.api.misc.ReplaceAndInvert;
|
import stirling.software.common.model.api.misc.ReplaceAndInvert;
|
||||||
|
import stirling.software.common.util.TempFileManager;
|
||||||
import stirling.software.common.util.misc.ColorSpaceConversionStrategy;
|
import stirling.software.common.util.misc.ColorSpaceConversionStrategy;
|
||||||
import stirling.software.common.util.misc.CustomColorReplaceStrategy;
|
import stirling.software.common.util.misc.CustomColorReplaceStrategy;
|
||||||
import stirling.software.common.util.misc.InvertFullColorStrategy;
|
import stirling.software.common.util.misc.InvertFullColorStrategy;
|
||||||
import stirling.software.common.util.misc.ReplaceAndInvertColorStrategy;
|
import stirling.software.common.util.misc.ReplaceAndInvertColorStrategy;
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
|
@RequiredArgsConstructor
|
||||||
public class ReplaceAndInvertColorFactory {
|
public class ReplaceAndInvertColorFactory {
|
||||||
|
|
||||||
|
private final TempFileManager tempFileManager;
|
||||||
|
|
||||||
public ReplaceAndInvertColorStrategy replaceAndInvert(
|
public ReplaceAndInvertColorStrategy replaceAndInvert(
|
||||||
MultipartFile file,
|
MultipartFile file,
|
||||||
ReplaceAndInvert replaceAndInvertOption,
|
ReplaceAndInvert replaceAndInvertOption,
|
||||||
@ -30,7 +36,7 @@ public class ReplaceAndInvertColorFactory {
|
|||||||
highContrastColorCombination);
|
highContrastColorCombination);
|
||||||
case FULL_INVERSION -> new InvertFullColorStrategy(file, replaceAndInvertOption);
|
case FULL_INVERSION -> new InvertFullColorStrategy(file, replaceAndInvertOption);
|
||||||
case COLOR_SPACE_CONVERSION ->
|
case COLOR_SPACE_CONVERSION ->
|
||||||
new ColorSpaceConversionStrategy(file, replaceAndInvertOption);
|
new ColorSpaceConversionStrategy(file, replaceAndInvertOption, tempFileManager);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -11,14 +11,20 @@ import org.springframework.web.multipart.MultipartFile;
|
|||||||
import io.swagger.v3.oas.annotations.Operation;
|
import io.swagger.v3.oas.annotations.Operation;
|
||||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
import stirling.software.common.model.api.PDFFile;
|
import stirling.software.common.model.api.PDFFile;
|
||||||
import stirling.software.common.util.PDFToFile;
|
import stirling.software.common.util.PDFToFile;
|
||||||
|
import stirling.software.common.util.TempFileManager;
|
||||||
|
|
||||||
@RestController
|
@RestController
|
||||||
@Tag(name = "Convert", description = "Convert APIs")
|
@Tag(name = "Convert", description = "Convert APIs")
|
||||||
@RequestMapping("/api/v1/convert")
|
@RequestMapping("/api/v1/convert")
|
||||||
|
@RequiredArgsConstructor
|
||||||
public class ConvertPDFToHtml {
|
public class ConvertPDFToHtml {
|
||||||
|
|
||||||
|
private final TempFileManager tempFileManager;
|
||||||
|
|
||||||
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/html")
|
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/html")
|
||||||
@Operation(
|
@Operation(
|
||||||
summary = "Convert PDF to HTML",
|
summary = "Convert PDF to HTML",
|
||||||
@ -26,7 +32,7 @@ public class ConvertPDFToHtml {
|
|||||||
"This endpoint converts a PDF file to HTML format. Input:PDF Output:HTML Type:SISO")
|
"This endpoint converts a PDF file to HTML format. Input:PDF Output:HTML Type:SISO")
|
||||||
public ResponseEntity<byte[]> processPdfToHTML(@ModelAttribute PDFFile file) throws Exception {
|
public ResponseEntity<byte[]> processPdfToHTML(@ModelAttribute PDFFile file) throws Exception {
|
||||||
MultipartFile inputFile = file.getFileInput();
|
MultipartFile inputFile = file.getFileInput();
|
||||||
PDFToFile pdfToFile = new PDFToFile();
|
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
|
||||||
return pdfToFile.processPdfToHtml(inputFile);
|
return pdfToFile.processPdfToHtml(inputFile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -24,6 +24,7 @@ import stirling.software.common.model.api.PDFFile;
|
|||||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||||
import stirling.software.common.util.GeneralUtils;
|
import stirling.software.common.util.GeneralUtils;
|
||||||
import stirling.software.common.util.PDFToFile;
|
import stirling.software.common.util.PDFToFile;
|
||||||
|
import stirling.software.common.util.TempFileManager;
|
||||||
import stirling.software.common.util.WebResponseUtils;
|
import stirling.software.common.util.WebResponseUtils;
|
||||||
|
|
||||||
@RestController
|
@RestController
|
||||||
@ -33,6 +34,7 @@ import stirling.software.common.util.WebResponseUtils;
|
|||||||
public class ConvertPDFToOffice {
|
public class ConvertPDFToOffice {
|
||||||
|
|
||||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||||
|
private final TempFileManager tempFileManager;
|
||||||
|
|
||||||
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/presentation")
|
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/presentation")
|
||||||
@Operation(
|
@Operation(
|
||||||
@ -45,7 +47,7 @@ public class ConvertPDFToOffice {
|
|||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
MultipartFile inputFile = request.getFileInput();
|
MultipartFile inputFile = request.getFileInput();
|
||||||
String outputFormat = request.getOutputFormat();
|
String outputFormat = request.getOutputFormat();
|
||||||
PDFToFile pdfToFile = new PDFToFile();
|
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
|
||||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import");
|
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -70,7 +72,7 @@ public class ConvertPDFToOffice {
|
|||||||
MediaType.TEXT_PLAIN);
|
MediaType.TEXT_PLAIN);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
PDFToFile pdfToFile = new PDFToFile();
|
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
|
||||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -85,7 +87,7 @@ public class ConvertPDFToOffice {
|
|||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
MultipartFile inputFile = request.getFileInput();
|
MultipartFile inputFile = request.getFileInput();
|
||||||
String outputFormat = request.getOutputFormat();
|
String outputFormat = request.getOutputFormat();
|
||||||
PDFToFile pdfToFile = new PDFToFile();
|
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
|
||||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,7 +100,7 @@ public class ConvertPDFToOffice {
|
|||||||
public ResponseEntity<byte[]> processPdfToXML(@ModelAttribute PDFFile file) throws Exception {
|
public ResponseEntity<byte[]> processPdfToXML(@ModelAttribute PDFFile file) throws Exception {
|
||||||
MultipartFile inputFile = file.getFileInput();
|
MultipartFile inputFile = file.getFileInput();
|
||||||
|
|
||||||
PDFToFile pdfToFile = new PDFToFile();
|
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
|
||||||
return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import");
|
return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -116,101 +116,82 @@ public class OCRController {
|
|||||||
|
|
||||||
// Use try-with-resources for proper temp file management
|
// Use try-with-resources for proper temp file management
|
||||||
try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf");
|
try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf");
|
||||||
TempFile tempOutputFile = new TempFile(tempFileManager, ".pdf")) {
|
TempFile tempOutputFile = new TempFile(tempFileManager, ".pdf");
|
||||||
|
TempFile sidecarTextFile = sidecar ? new TempFile(tempFileManager, ".txt") : null) {
|
||||||
|
|
||||||
inputFile.transferTo(tempInputFile.getFile());
|
inputFile.transferTo(tempInputFile.getFile());
|
||||||
|
|
||||||
TempFile sidecarTextFile = null;
|
// Use OCRmyPDF if available (no fallback - error if it fails)
|
||||||
|
if (isOcrMyPdfEnabled()) {
|
||||||
|
processWithOcrMyPdf(
|
||||||
|
selectedLanguages,
|
||||||
|
sidecar,
|
||||||
|
deskew,
|
||||||
|
clean,
|
||||||
|
cleanFinal,
|
||||||
|
ocrType,
|
||||||
|
ocrRenderType,
|
||||||
|
removeImagesAfter,
|
||||||
|
tempInputFile.getPath(),
|
||||||
|
tempOutputFile.getPath(),
|
||||||
|
sidecarTextFile != null ? sidecarTextFile.getPath() : null);
|
||||||
|
log.info("OCRmyPDF processing completed successfully");
|
||||||
|
}
|
||||||
|
// Use Tesseract only if OCRmyPDF is not available
|
||||||
|
else if (isTesseractEnabled()) {
|
||||||
|
processWithTesseract(
|
||||||
|
selectedLanguages,
|
||||||
|
ocrType,
|
||||||
|
tempInputFile.getPath(),
|
||||||
|
tempOutputFile.getPath());
|
||||||
|
log.info("Tesseract processing completed successfully");
|
||||||
|
} else {
|
||||||
|
throw ExceptionUtils.createOcrToolsUnavailableException();
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
// Read the processed PDF file
|
||||||
// Use OCRmyPDF if available (no fallback - error if it fails)
|
byte[] pdfBytes = Files.readAllBytes(tempOutputFile.getPath());
|
||||||
if (isOcrMyPdfEnabled()) {
|
|
||||||
if (sidecar) {
|
|
||||||
sidecarTextFile = new TempFile(tempFileManager, ".txt");
|
|
||||||
}
|
|
||||||
|
|
||||||
processWithOcrMyPdf(
|
// Return the OCR processed PDF as a response
|
||||||
selectedLanguages,
|
String outputFilename =
|
||||||
sidecar,
|
GeneralUtils.removeExtension(
|
||||||
deskew,
|
Filenames.toSimpleFileName(inputFile.getOriginalFilename()))
|
||||||
clean,
|
+ "_OCR.pdf";
|
||||||
cleanFinal,
|
|
||||||
ocrType,
|
|
||||||
ocrRenderType,
|
|
||||||
removeImagesAfter,
|
|
||||||
tempInputFile.getPath(),
|
|
||||||
tempOutputFile.getPath(),
|
|
||||||
sidecarTextFile != null ? sidecarTextFile.getPath() : null);
|
|
||||||
log.info("OCRmyPDF processing completed successfully");
|
|
||||||
}
|
|
||||||
// Use Tesseract only if OCRmyPDF is not available
|
|
||||||
else if (isTesseractEnabled()) {
|
|
||||||
processWithTesseract(
|
|
||||||
selectedLanguages,
|
|
||||||
ocrType,
|
|
||||||
tempInputFile.getPath(),
|
|
||||||
tempOutputFile.getPath());
|
|
||||||
log.info("Tesseract processing completed successfully");
|
|
||||||
} else {
|
|
||||||
throw ExceptionUtils.createOcrToolsUnavailableException();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read the processed PDF file
|
if (sidecar && sidecarTextFile != null) {
|
||||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile.getPath());
|
// Create a zip file containing both the PDF and the text file
|
||||||
|
String outputZipFilename =
|
||||||
// Return the OCR processed PDF as a response
|
|
||||||
String outputFilename =
|
|
||||||
GeneralUtils.removeExtension(
|
GeneralUtils.removeExtension(
|
||||||
Filenames.toSimpleFileName(inputFile.getOriginalFilename()))
|
Filenames.toSimpleFileName(inputFile.getOriginalFilename()))
|
||||||
+ "_OCR.pdf";
|
+ "_OCR.zip";
|
||||||
|
|
||||||
if (sidecar && sidecarTextFile != null) {
|
try (TempFile tempZipFile = new TempFile(tempFileManager, ".zip");
|
||||||
// Create a zip file containing both the PDF and the text file
|
ZipOutputStream zipOut =
|
||||||
String outputZipFilename =
|
new ZipOutputStream(Files.newOutputStream(tempZipFile.getPath()))) {
|
||||||
GeneralUtils.removeExtension(
|
|
||||||
Filenames.toSimpleFileName(
|
|
||||||
inputFile.getOriginalFilename()))
|
|
||||||
+ "_OCR.zip";
|
|
||||||
|
|
||||||
try (TempFile tempZipFile = new TempFile(tempFileManager, ".zip");
|
// Add PDF file to the zip
|
||||||
ZipOutputStream zipOut =
|
ZipEntry pdfEntry = new ZipEntry(outputFilename);
|
||||||
new ZipOutputStream(
|
zipOut.putNextEntry(pdfEntry);
|
||||||
Files.newOutputStream(tempZipFile.getPath()))) {
|
zipOut.write(pdfBytes);
|
||||||
|
zipOut.closeEntry();
|
||||||
|
|
||||||
// Add PDF file to the zip
|
// Add text file to the zip
|
||||||
ZipEntry pdfEntry = new ZipEntry(outputFilename);
|
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
|
||||||
zipOut.putNextEntry(pdfEntry);
|
zipOut.putNextEntry(txtEntry);
|
||||||
zipOut.write(pdfBytes);
|
Files.copy(sidecarTextFile.getPath(), zipOut);
|
||||||
zipOut.closeEntry();
|
zipOut.closeEntry();
|
||||||
|
|
||||||
// Add text file to the zip
|
zipOut.finish();
|
||||||
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
|
|
||||||
zipOut.putNextEntry(txtEntry);
|
|
||||||
Files.copy(sidecarTextFile.getPath(), zipOut);
|
|
||||||
zipOut.closeEntry();
|
|
||||||
|
|
||||||
zipOut.finish();
|
byte[] zipBytes = Files.readAllBytes(tempZipFile.getPath());
|
||||||
|
|
||||||
byte[] zipBytes = Files.readAllBytes(tempZipFile.getPath());
|
// Return the zip file containing both the PDF and the text file
|
||||||
|
return WebResponseUtils.bytesToWebResponse(
|
||||||
// Return the zip file containing both the PDF and the text file
|
zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
|
||||||
return WebResponseUtils.bytesToWebResponse(
|
|
||||||
zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Return the OCR processed PDF as a response
|
|
||||||
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
|
||||||
}
|
|
||||||
|
|
||||||
} finally {
|
|
||||||
// Clean up sidecar temp file if created
|
|
||||||
if (sidecarTextFile != null) {
|
|
||||||
try {
|
|
||||||
sidecarTextFile.close();
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.warn("Failed to close sidecar temp file", e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// Return the OCR processed PDF as a response
|
||||||
|
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -11,14 +11,20 @@ import org.springframework.web.multipart.MultipartFile;
|
|||||||
import io.swagger.v3.oas.annotations.Operation;
|
import io.swagger.v3.oas.annotations.Operation;
|
||||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
import stirling.software.common.model.api.PDFFile;
|
import stirling.software.common.model.api.PDFFile;
|
||||||
import stirling.software.common.util.PDFToFile;
|
import stirling.software.common.util.PDFToFile;
|
||||||
|
import stirling.software.common.util.TempFileManager;
|
||||||
|
|
||||||
@RestController
|
@RestController
|
||||||
@Tag(name = "Convert", description = "Convert APIs")
|
@Tag(name = "Convert", description = "Convert APIs")
|
||||||
@RequestMapping("/api/v1/convert")
|
@RequestMapping("/api/v1/convert")
|
||||||
|
@RequiredArgsConstructor
|
||||||
public class ConvertPDFToMarkdown {
|
public class ConvertPDFToMarkdown {
|
||||||
|
|
||||||
|
private final TempFileManager tempFileManager;
|
||||||
|
|
||||||
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/markdown")
|
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/markdown")
|
||||||
@Operation(
|
@Operation(
|
||||||
summary = "Convert PDF to Markdown",
|
summary = "Convert PDF to Markdown",
|
||||||
@ -27,7 +33,7 @@ public class ConvertPDFToMarkdown {
|
|||||||
public ResponseEntity<byte[]> processPdfToMarkdown(@ModelAttribute PDFFile file)
|
public ResponseEntity<byte[]> processPdfToMarkdown(@ModelAttribute PDFFile file)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
MultipartFile inputFile = file.getFileInput();
|
MultipartFile inputFile = file.getFileInput();
|
||||||
PDFToFile pdfToFile = new PDFToFile();
|
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
|
||||||
return pdfToFile.processPdfToMarkdown(inputFile);
|
return pdfToFile.processPdfToMarkdown(inputFile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user