mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
feat(convert): PDF conversion with unoconvert fallback soffice (#4316)
# Description of Changes - **What was changed** - Reworked `ConvertOfficeController` to use a dedicated temporary working directory per request and cleaned up with directory-level deletion. - Added detection for converter availability via `EndpointConfiguration` to choose between **unoconvert** and a **soffice** headless fallback. - Ensured safe filename handling (sanitization, extension checks, lowercase normalization) and early validation errors for missing/invalid filenames. - Switched raw temp file writes to `Files.copy` / `Files.writeString` with `StandardCopyOption.REPLACE_EXISTING`. - Implemented robust output handling: - Verified non-zero exit codes and null results. - Checked for missing/empty PDF outputs. - Added fallback lookup for any produced `.pdf` within the work directory if the expected name is not present. - Introduced `@Slf4j` logging; improved error and cleanup logging. - Replaced ad-hoc temp cleanup with `FileUtils.deleteDirectory` for full working-dir removal. - Minor imports/cleanup: removed unused `Arrays`, added `StandardCopyOption`, `FileUtils`, and related imports. - **Why the change was made** - Increase conversion reliability across environments where either unoconvert or soffice may be available. - Harden security and stability through strict input validation and sanitized HTML processing. - Prevent orphaned files/directories and ensure consistent cleanup to reduce disk footprint and operational issues. - Provide clearer operational signals (logging, explicit exceptions) for easier troubleshooting. --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details.
This commit is contained in:
parent
fe84b3ff15
commit
c055f9456a
@ -5,10 +5,11 @@ import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
@ -23,7 +24,9 @@ import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.SPDF.config.EndpointConfiguration;
|
||||
import stirling.software.common.configuration.RuntimePathConfig;
|
||||
import stirling.software.common.model.api.GeneralFile;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
@ -36,59 +39,130 @@ import stirling.software.common.util.WebResponseUtils;
|
||||
@Tag(name = "Convert", description = "Convert APIs")
|
||||
@RequestMapping("/api/v1/convert")
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class ConvertOfficeController {
|
||||
|
||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
private final RuntimePathConfig runtimePathConfig;
|
||||
private final CustomHtmlSanitizer customHtmlSanitizer;
|
||||
private final EndpointConfiguration endpointConfiguration;
|
||||
|
||||
private boolean isUnoconvertAvailable() {
|
||||
return endpointConfiguration.isGroupEnabled("Unoconvert")
|
||||
|| endpointConfiguration.isGroupEnabled("Python");
|
||||
}
|
||||
|
||||
public File convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
// Check for valid file extension
|
||||
// Check for valid file extension and sanitize filename
|
||||
String originalFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
|
||||
if (originalFilename == null
|
||||
|| !isValidFileExtension(FilenameUtils.getExtension(originalFilename))) {
|
||||
throw new IllegalArgumentException("Invalid file extension");
|
||||
if (originalFilename == null || originalFilename.isBlank()) {
|
||||
throw new IllegalArgumentException("Missing original filename");
|
||||
}
|
||||
|
||||
// Save the uploaded file to a temporary location
|
||||
Path tempInputFile =
|
||||
Files.createTempFile("input_", "." + FilenameUtils.getExtension(originalFilename));
|
||||
// Check for valid file extension
|
||||
String extension = FilenameUtils.getExtension(originalFilename);
|
||||
if (extension == null || !isValidFileExtension(extension)) {
|
||||
throw new IllegalArgumentException("Invalid file extension");
|
||||
}
|
||||
String extensionLower = extension.toLowerCase();
|
||||
|
||||
String baseName = FilenameUtils.getBaseName(originalFilename);
|
||||
if (baseName == null || baseName.isBlank()) {
|
||||
baseName = "input";
|
||||
}
|
||||
|
||||
// create temporary working directory
|
||||
Path workDir = Files.createTempDirectory("office2pdf_");
|
||||
Path inputPath = workDir.resolve(baseName + "." + extensionLower);
|
||||
Path outputPath = workDir.resolve(baseName + ".pdf");
|
||||
|
||||
// Check if the file is HTML and apply sanitization if needed
|
||||
String fileExtension = FilenameUtils.getExtension(originalFilename).toLowerCase();
|
||||
if ("html".equals(fileExtension) || "htm".equals(fileExtension)) {
|
||||
if ("html".equals(extensionLower) || "htm".equals(extensionLower)) {
|
||||
// Read and sanitize HTML content
|
||||
String htmlContent = new String(inputFile.getBytes(), StandardCharsets.UTF_8);
|
||||
String sanitizedHtml = customHtmlSanitizer.sanitize(htmlContent);
|
||||
Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8));
|
||||
Files.writeString(inputPath, sanitizedHtml, StandardCharsets.UTF_8);
|
||||
} else {
|
||||
inputFile.transferTo(tempInputFile);
|
||||
// copy file content
|
||||
Files.copy(inputFile.getInputStream(), inputPath, StandardCopyOption.REPLACE_EXISTING);
|
||||
}
|
||||
|
||||
// Prepare the output file path
|
||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
try {
|
||||
// Run the LibreOffice command
|
||||
List<String> command =
|
||||
new ArrayList<>(
|
||||
Arrays.asList(
|
||||
runtimePathConfig.getUnoConvertPath(),
|
||||
"--port",
|
||||
"2003",
|
||||
"--convert-to",
|
||||
"pdf",
|
||||
tempInputFile.toString(),
|
||||
tempOutputFile.toString()));
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
||||
.runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult result;
|
||||
// Run Unoconvert command
|
||||
if (isUnoconvertAvailable()) {
|
||||
// Unoconvert: schreibe direkt in outputPath innerhalb des workDir
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add(runtimePathConfig.getUnoConvertPath());
|
||||
command.add("--port");
|
||||
command.add("2003");
|
||||
command.add("--convert-to");
|
||||
command.add("pdf");
|
||||
command.add(inputPath.toString());
|
||||
command.add(outputPath.toString());
|
||||
|
||||
// Read the converted PDF file
|
||||
return tempOutputFile.toFile();
|
||||
result =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
||||
.runCommandWithOutputHandling(command);
|
||||
} // Run soffice command
|
||||
else {
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("soffice");
|
||||
command.add("--headless");
|
||||
command.add("--nologo");
|
||||
command.add("--convert-to");
|
||||
command.add("pdf:writer_pdf_Export");
|
||||
command.add("--outdir");
|
||||
command.add(workDir.toString());
|
||||
command.add(inputPath.toString());
|
||||
|
||||
result =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
||||
.runCommandWithOutputHandling(command);
|
||||
}
|
||||
|
||||
// Check the result
|
||||
if (result == null) {
|
||||
throw new IllegalStateException("Converter returned no result");
|
||||
}
|
||||
if (result.getRc() != 0) {
|
||||
throw new IllegalStateException("Conversion failed (exit " + result.getRc() + ")");
|
||||
}
|
||||
|
||||
if (!Files.exists(outputPath)) {
|
||||
// Some LibreOffice versions may deviate with exotic names – as a fallback, we try
|
||||
// to find any .pdf in the workDir
|
||||
try (var stream = Files.list(workDir)) {
|
||||
Path fallback =
|
||||
stream.filter(
|
||||
p ->
|
||||
p.getFileName()
|
||||
.toString()
|
||||
.toLowerCase()
|
||||
.endsWith(".pdf"))
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
if (fallback == null) {
|
||||
throw new IllegalStateException("No PDF produced.");
|
||||
}
|
||||
// Move the found PDF to the expected outputPath
|
||||
Files.move(fallback, outputPath, StandardCopyOption.REPLACE_EXISTING);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the output file is empty
|
||||
if (Files.size(outputPath) == 0L) {
|
||||
throw new IllegalStateException("Produced PDF is empty");
|
||||
}
|
||||
|
||||
return outputPath.toFile();
|
||||
} finally {
|
||||
// Clean up the temporary files
|
||||
if (tempInputFile != null) Files.deleteIfExists(tempInputFile);
|
||||
try {
|
||||
Files.deleteIfExists(inputPath);
|
||||
} catch (IOException e) {
|
||||
log.warn("Failed to delete temp input file: {}", inputPath, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -119,7 +193,9 @@ public class ConvertOfficeController {
|
||||
.replaceFirst("[.][^.]+$", "")
|
||||
+ "_convertedToPDF.pdf");
|
||||
} finally {
|
||||
if (file != null) file.delete();
|
||||
if (file != null && file.getParent() != null) {
|
||||
FileUtils.deleteDirectory(file.getParentFile());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user