From c055f9456ab8609171640935897036b189a3c199 Mon Sep 17 00:00:00 2001 From: Ludy Date: Thu, 4 Sep 2025 15:33:35 +0200 Subject: [PATCH] feat(convert): PDF conversion with unoconvert fallback soffice (#4316) # Description of Changes - **What was changed** - Reworked `ConvertOfficeController` to use a dedicated temporary working directory per request and cleaned up with directory-level deletion. - Added detection for converter availability via `EndpointConfiguration` to choose between **unoconvert** and a **soffice** headless fallback. - Ensured safe filename handling (sanitization, extension checks, lowercase normalization) and early validation errors for missing/invalid filenames. - Switched raw temp file writes to `Files.copy` / `Files.writeString` with `StandardCopyOption.REPLACE_EXISTING`. - Implemented robust output handling: - Verified non-zero exit codes and null results. - Checked for missing/empty PDF outputs. - Added fallback lookup for any produced `.pdf` within the work directory if the expected name is not present. - Introduced `@Slf4j` logging; improved error and cleanup logging. - Replaced ad-hoc temp cleanup with `FileUtils.deleteDirectory` for full working-dir removal. - Minor imports/cleanup: removed unused `Arrays`, added `StandardCopyOption`, `FileUtils`, and related imports. - **Why the change was made** - Increase conversion reliability across environments where either unoconvert or soffice may be available. - Harden security and stability through strict input validation and sanitized HTML processing. - Prevent orphaned files/directories and ensure consistent cleanup to reduce disk footprint and operational issues. - Provide clearer operational signals (logging, explicit exceptions) for easier troubleshooting. --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --- .../converters/ConvertOfficeController.java | 142 ++++++++++++++---- 1 file changed, 109 insertions(+), 33 deletions(-) diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java index 651444c69..007454305 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java @@ -5,10 +5,11 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardCopyOption; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; +import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; import org.apache.pdfbox.pdmodel.PDDocument; import org.springframework.http.ResponseEntity; @@ -23,7 +24,9 @@ import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import stirling.software.SPDF.config.EndpointConfiguration; import stirling.software.common.configuration.RuntimePathConfig; import stirling.software.common.model.api.GeneralFile; import stirling.software.common.service.CustomPDFDocumentFactory; @@ -36,59 +39,130 @@ import stirling.software.common.util.WebResponseUtils; @Tag(name = "Convert", description = "Convert APIs") @RequestMapping("/api/v1/convert") @RequiredArgsConstructor +@Slf4j public class ConvertOfficeController { private final CustomPDFDocumentFactory pdfDocumentFactory; private final RuntimePathConfig runtimePathConfig; private final CustomHtmlSanitizer customHtmlSanitizer; + private final EndpointConfiguration endpointConfiguration; + + private boolean isUnoconvertAvailable() { + return endpointConfiguration.isGroupEnabled("Unoconvert") + || endpointConfiguration.isGroupEnabled("Python"); + } public File convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException { - // Check for valid file extension + // Check for valid file extension and sanitize filename String originalFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename()); - if (originalFilename == null - || !isValidFileExtension(FilenameUtils.getExtension(originalFilename))) { - throw new IllegalArgumentException("Invalid file extension"); + if (originalFilename == null || originalFilename.isBlank()) { + throw new IllegalArgumentException("Missing original filename"); } - // Save the uploaded file to a temporary location - Path tempInputFile = - Files.createTempFile("input_", "." + FilenameUtils.getExtension(originalFilename)); + // Check for valid file extension + String extension = FilenameUtils.getExtension(originalFilename); + if (extension == null || !isValidFileExtension(extension)) { + throw new IllegalArgumentException("Invalid file extension"); + } + String extensionLower = extension.toLowerCase(); + + String baseName = FilenameUtils.getBaseName(originalFilename); + if (baseName == null || baseName.isBlank()) { + baseName = "input"; + } + + // create temporary working directory + Path workDir = Files.createTempDirectory("office2pdf_"); + Path inputPath = workDir.resolve(baseName + "." + extensionLower); + Path outputPath = workDir.resolve(baseName + ".pdf"); // Check if the file is HTML and apply sanitization if needed - String fileExtension = FilenameUtils.getExtension(originalFilename).toLowerCase(); - if ("html".equals(fileExtension) || "htm".equals(fileExtension)) { + if ("html".equals(extensionLower) || "htm".equals(extensionLower)) { // Read and sanitize HTML content String htmlContent = new String(inputFile.getBytes(), StandardCharsets.UTF_8); String sanitizedHtml = customHtmlSanitizer.sanitize(htmlContent); - Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8)); + Files.writeString(inputPath, sanitizedHtml, StandardCharsets.UTF_8); } else { - inputFile.transferTo(tempInputFile); + // copy file content + Files.copy(inputFile.getInputStream(), inputPath, StandardCopyOption.REPLACE_EXISTING); } - // Prepare the output file path - Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - try { - // Run the LibreOffice command - List command = - new ArrayList<>( - Arrays.asList( - runtimePathConfig.getUnoConvertPath(), - "--port", - "2003", - "--convert-to", - "pdf", - tempInputFile.toString(), - tempOutputFile.toString())); - ProcessExecutorResult returnCode = - ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE) - .runCommandWithOutputHandling(command); + ProcessExecutorResult result; + // Run Unoconvert command + if (isUnoconvertAvailable()) { + // Unoconvert: schreibe direkt in outputPath innerhalb des workDir + List command = new ArrayList<>(); + command.add(runtimePathConfig.getUnoConvertPath()); + command.add("--port"); + command.add("2003"); + command.add("--convert-to"); + command.add("pdf"); + command.add(inputPath.toString()); + command.add(outputPath.toString()); - // Read the converted PDF file - return tempOutputFile.toFile(); + result = + ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE) + .runCommandWithOutputHandling(command); + } // Run soffice command + else { + List command = new ArrayList<>(); + command.add("soffice"); + command.add("--headless"); + command.add("--nologo"); + command.add("--convert-to"); + command.add("pdf:writer_pdf_Export"); + command.add("--outdir"); + command.add(workDir.toString()); + command.add(inputPath.toString()); + + result = + ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE) + .runCommandWithOutputHandling(command); + } + + // Check the result + if (result == null) { + throw new IllegalStateException("Converter returned no result"); + } + if (result.getRc() != 0) { + throw new IllegalStateException("Conversion failed (exit " + result.getRc() + ")"); + } + + if (!Files.exists(outputPath)) { + // Some LibreOffice versions may deviate with exotic names – as a fallback, we try + // to find any .pdf in the workDir + try (var stream = Files.list(workDir)) { + Path fallback = + stream.filter( + p -> + p.getFileName() + .toString() + .toLowerCase() + .endsWith(".pdf")) + .findFirst() + .orElse(null); + if (fallback == null) { + throw new IllegalStateException("No PDF produced."); + } + // Move the found PDF to the expected outputPath + Files.move(fallback, outputPath, StandardCopyOption.REPLACE_EXISTING); + } + } + + // Check if the output file is empty + if (Files.size(outputPath) == 0L) { + throw new IllegalStateException("Produced PDF is empty"); + } + + return outputPath.toFile(); } finally { // Clean up the temporary files - if (tempInputFile != null) Files.deleteIfExists(tempInputFile); + try { + Files.deleteIfExists(inputPath); + } catch (IOException e) { + log.warn("Failed to delete temp input file: {}", inputPath, e); + } } } @@ -119,7 +193,9 @@ public class ConvertOfficeController { .replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf"); } finally { - if (file != null) file.delete(); + if (file != null && file.getParent() != null) { + FileUtils.deleteDirectory(file.getParentFile()); + } } } }