feat(convert): PDF conversion with unoconvert fallback soffice (#4316)

# Description of Changes

- **What was changed**
- Reworked `ConvertOfficeController` to use a dedicated temporary
working directory per request and cleaned up with directory-level
deletion.
- Added detection for converter availability via `EndpointConfiguration`
to choose between **unoconvert** and a **soffice** headless fallback.
- Ensured safe filename handling (sanitization, extension checks,
lowercase normalization) and early validation errors for missing/invalid
filenames.
- Switched raw temp file writes to `Files.copy` / `Files.writeString`
with `StandardCopyOption.REPLACE_EXISTING`.
  - Implemented robust output handling:
    - Verified non-zero exit codes and null results.
    - Checked for missing/empty PDF outputs.
- Added fallback lookup for any produced `.pdf` within the work
directory if the expected name is not present.
  - Introduced `@Slf4j` logging; improved error and cleanup logging.
- Replaced ad-hoc temp cleanup with `FileUtils.deleteDirectory` for full
working-dir removal.
- Minor imports/cleanup: removed unused `Arrays`, added
`StandardCopyOption`, `FileUtils`, and related imports.

- **Why the change was made**
- Increase conversion reliability across environments where either
unoconvert or soffice may be available.
- Harden security and stability through strict input validation and
sanitized HTML processing.
- Prevent orphaned files/directories and ensure consistent cleanup to
reduce disk footprint and operational issues.
- Provide clearer operational signals (logging, explicit exceptions) for
easier troubleshooting.


---

## Checklist

### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.
This commit is contained in:
Ludy 2025-09-04 15:33:35 +02:00 committed by GitHub
parent fe84b3ff15
commit c055f9456a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -5,10 +5,11 @@ import java.io.IOException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.FilenameUtils;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
@ -23,7 +24,9 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag; import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.config.EndpointConfiguration;
import stirling.software.common.configuration.RuntimePathConfig; import stirling.software.common.configuration.RuntimePathConfig;
import stirling.software.common.model.api.GeneralFile; import stirling.software.common.model.api.GeneralFile;
import stirling.software.common.service.CustomPDFDocumentFactory; import stirling.software.common.service.CustomPDFDocumentFactory;
@ -36,59 +39,130 @@ import stirling.software.common.util.WebResponseUtils;
@Tag(name = "Convert", description = "Convert APIs") @Tag(name = "Convert", description = "Convert APIs")
@RequestMapping("/api/v1/convert") @RequestMapping("/api/v1/convert")
@RequiredArgsConstructor @RequiredArgsConstructor
@Slf4j
public class ConvertOfficeController { public class ConvertOfficeController {
private final CustomPDFDocumentFactory pdfDocumentFactory; private final CustomPDFDocumentFactory pdfDocumentFactory;
private final RuntimePathConfig runtimePathConfig; private final RuntimePathConfig runtimePathConfig;
private final CustomHtmlSanitizer customHtmlSanitizer; private final CustomHtmlSanitizer customHtmlSanitizer;
private final EndpointConfiguration endpointConfiguration;
public File convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException { private boolean isUnoconvertAvailable() {
// Check for valid file extension return endpointConfiguration.isGroupEnabled("Unoconvert")
String originalFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename()); || endpointConfiguration.isGroupEnabled("Python");
if (originalFilename == null
|| !isValidFileExtension(FilenameUtils.getExtension(originalFilename))) {
throw new IllegalArgumentException("Invalid file extension");
} }
// Save the uploaded file to a temporary location public File convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
Path tempInputFile = // Check for valid file extension and sanitize filename
Files.createTempFile("input_", "." + FilenameUtils.getExtension(originalFilename)); String originalFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
if (originalFilename == null || originalFilename.isBlank()) {
throw new IllegalArgumentException("Missing original filename");
}
// Check for valid file extension
String extension = FilenameUtils.getExtension(originalFilename);
if (extension == null || !isValidFileExtension(extension)) {
throw new IllegalArgumentException("Invalid file extension");
}
String extensionLower = extension.toLowerCase();
String baseName = FilenameUtils.getBaseName(originalFilename);
if (baseName == null || baseName.isBlank()) {
baseName = "input";
}
// create temporary working directory
Path workDir = Files.createTempDirectory("office2pdf_");
Path inputPath = workDir.resolve(baseName + "." + extensionLower);
Path outputPath = workDir.resolve(baseName + ".pdf");
// Check if the file is HTML and apply sanitization if needed // Check if the file is HTML and apply sanitization if needed
String fileExtension = FilenameUtils.getExtension(originalFilename).toLowerCase(); if ("html".equals(extensionLower) || "htm".equals(extensionLower)) {
if ("html".equals(fileExtension) || "htm".equals(fileExtension)) {
// Read and sanitize HTML content // Read and sanitize HTML content
String htmlContent = new String(inputFile.getBytes(), StandardCharsets.UTF_8); String htmlContent = new String(inputFile.getBytes(), StandardCharsets.UTF_8);
String sanitizedHtml = customHtmlSanitizer.sanitize(htmlContent); String sanitizedHtml = customHtmlSanitizer.sanitize(htmlContent);
Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8)); Files.writeString(inputPath, sanitizedHtml, StandardCharsets.UTF_8);
} else { } else {
inputFile.transferTo(tempInputFile); // copy file content
Files.copy(inputFile.getInputStream(), inputPath, StandardCopyOption.REPLACE_EXISTING);
} }
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
try { try {
// Run the LibreOffice command ProcessExecutorResult result;
List<String> command = // Run Unoconvert command
new ArrayList<>( if (isUnoconvertAvailable()) {
Arrays.asList( // Unoconvert: schreibe direkt in outputPath innerhalb des workDir
runtimePathConfig.getUnoConvertPath(), List<String> command = new ArrayList<>();
"--port", command.add(runtimePathConfig.getUnoConvertPath());
"2003", command.add("--port");
"--convert-to", command.add("2003");
"pdf", command.add("--convert-to");
tempInputFile.toString(), command.add("pdf");
tempOutputFile.toString())); command.add(inputPath.toString());
ProcessExecutorResult returnCode = command.add(outputPath.toString());
result =
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE) ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
.runCommandWithOutputHandling(command); .runCommandWithOutputHandling(command);
} // Run soffice command
else {
List<String> command = new ArrayList<>();
command.add("soffice");
command.add("--headless");
command.add("--nologo");
command.add("--convert-to");
command.add("pdf:writer_pdf_Export");
command.add("--outdir");
command.add(workDir.toString());
command.add(inputPath.toString());
// Read the converted PDF file result =
return tempOutputFile.toFile(); ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
.runCommandWithOutputHandling(command);
}
// Check the result
if (result == null) {
throw new IllegalStateException("Converter returned no result");
}
if (result.getRc() != 0) {
throw new IllegalStateException("Conversion failed (exit " + result.getRc() + ")");
}
if (!Files.exists(outputPath)) {
// Some LibreOffice versions may deviate with exotic names as a fallback, we try
// to find any .pdf in the workDir
try (var stream = Files.list(workDir)) {
Path fallback =
stream.filter(
p ->
p.getFileName()
.toString()
.toLowerCase()
.endsWith(".pdf"))
.findFirst()
.orElse(null);
if (fallback == null) {
throw new IllegalStateException("No PDF produced.");
}
// Move the found PDF to the expected outputPath
Files.move(fallback, outputPath, StandardCopyOption.REPLACE_EXISTING);
}
}
// Check if the output file is empty
if (Files.size(outputPath) == 0L) {
throw new IllegalStateException("Produced PDF is empty");
}
return outputPath.toFile();
} finally { } finally {
// Clean up the temporary files // Clean up the temporary files
if (tempInputFile != null) Files.deleteIfExists(tempInputFile); try {
Files.deleteIfExists(inputPath);
} catch (IOException e) {
log.warn("Failed to delete temp input file: {}", inputPath, e);
}
} }
} }
@ -119,7 +193,9 @@ public class ConvertOfficeController {
.replaceFirst("[.][^.]+$", "") .replaceFirst("[.][^.]+$", "")
+ "_convertedToPDF.pdf"); + "_convertedToPDF.pdf");
} finally { } finally {
if (file != null) file.delete(); if (file != null && file.getParent() != null) {
FileUtils.deleteDirectory(file.getParentFile());
}
} }
} }
} }