mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
feat(convert): PDF conversion with unoconvert fallback soffice (#4316)
# Description of Changes - **What was changed** - Reworked `ConvertOfficeController` to use a dedicated temporary working directory per request and cleaned up with directory-level deletion. - Added detection for converter availability via `EndpointConfiguration` to choose between **unoconvert** and a **soffice** headless fallback. - Ensured safe filename handling (sanitization, extension checks, lowercase normalization) and early validation errors for missing/invalid filenames. - Switched raw temp file writes to `Files.copy` / `Files.writeString` with `StandardCopyOption.REPLACE_EXISTING`. - Implemented robust output handling: - Verified non-zero exit codes and null results. - Checked for missing/empty PDF outputs. - Added fallback lookup for any produced `.pdf` within the work directory if the expected name is not present. - Introduced `@Slf4j` logging; improved error and cleanup logging. - Replaced ad-hoc temp cleanup with `FileUtils.deleteDirectory` for full working-dir removal. - Minor imports/cleanup: removed unused `Arrays`, added `StandardCopyOption`, `FileUtils`, and related imports. - **Why the change was made** - Increase conversion reliability across environments where either unoconvert or soffice may be available. - Harden security and stability through strict input validation and sanitized HTML processing. - Prevent orphaned files/directories and ensure consistent cleanup to reduce disk footprint and operational issues. - Provide clearer operational signals (logging, explicit exceptions) for easier troubleshooting. --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details.
This commit is contained in:
parent
fe84b3ff15
commit
c055f9456a
@ -5,10 +5,11 @@ import java.io.IOException;
|
|||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.StandardCopyOption;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.commons.io.FilenameUtils;
|
import org.apache.commons.io.FilenameUtils;
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
@ -23,7 +24,9 @@ import io.swagger.v3.oas.annotations.Operation;
|
|||||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
import stirling.software.SPDF.config.EndpointConfiguration;
|
||||||
import stirling.software.common.configuration.RuntimePathConfig;
|
import stirling.software.common.configuration.RuntimePathConfig;
|
||||||
import stirling.software.common.model.api.GeneralFile;
|
import stirling.software.common.model.api.GeneralFile;
|
||||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||||
@ -36,59 +39,130 @@ import stirling.software.common.util.WebResponseUtils;
|
|||||||
@Tag(name = "Convert", description = "Convert APIs")
|
@Tag(name = "Convert", description = "Convert APIs")
|
||||||
@RequestMapping("/api/v1/convert")
|
@RequestMapping("/api/v1/convert")
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
public class ConvertOfficeController {
|
public class ConvertOfficeController {
|
||||||
|
|
||||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||||
private final RuntimePathConfig runtimePathConfig;
|
private final RuntimePathConfig runtimePathConfig;
|
||||||
private final CustomHtmlSanitizer customHtmlSanitizer;
|
private final CustomHtmlSanitizer customHtmlSanitizer;
|
||||||
|
private final EndpointConfiguration endpointConfiguration;
|
||||||
|
|
||||||
|
private boolean isUnoconvertAvailable() {
|
||||||
|
return endpointConfiguration.isGroupEnabled("Unoconvert")
|
||||||
|
|| endpointConfiguration.isGroupEnabled("Python");
|
||||||
|
}
|
||||||
|
|
||||||
public File convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
|
public File convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
|
||||||
// Check for valid file extension
|
// Check for valid file extension and sanitize filename
|
||||||
String originalFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
|
String originalFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
|
||||||
if (originalFilename == null
|
if (originalFilename == null || originalFilename.isBlank()) {
|
||||||
|| !isValidFileExtension(FilenameUtils.getExtension(originalFilename))) {
|
throw new IllegalArgumentException("Missing original filename");
|
||||||
throw new IllegalArgumentException("Invalid file extension");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save the uploaded file to a temporary location
|
// Check for valid file extension
|
||||||
Path tempInputFile =
|
String extension = FilenameUtils.getExtension(originalFilename);
|
||||||
Files.createTempFile("input_", "." + FilenameUtils.getExtension(originalFilename));
|
if (extension == null || !isValidFileExtension(extension)) {
|
||||||
|
throw new IllegalArgumentException("Invalid file extension");
|
||||||
|
}
|
||||||
|
String extensionLower = extension.toLowerCase();
|
||||||
|
|
||||||
|
String baseName = FilenameUtils.getBaseName(originalFilename);
|
||||||
|
if (baseName == null || baseName.isBlank()) {
|
||||||
|
baseName = "input";
|
||||||
|
}
|
||||||
|
|
||||||
|
// create temporary working directory
|
||||||
|
Path workDir = Files.createTempDirectory("office2pdf_");
|
||||||
|
Path inputPath = workDir.resolve(baseName + "." + extensionLower);
|
||||||
|
Path outputPath = workDir.resolve(baseName + ".pdf");
|
||||||
|
|
||||||
// Check if the file is HTML and apply sanitization if needed
|
// Check if the file is HTML and apply sanitization if needed
|
||||||
String fileExtension = FilenameUtils.getExtension(originalFilename).toLowerCase();
|
if ("html".equals(extensionLower) || "htm".equals(extensionLower)) {
|
||||||
if ("html".equals(fileExtension) || "htm".equals(fileExtension)) {
|
|
||||||
// Read and sanitize HTML content
|
// Read and sanitize HTML content
|
||||||
String htmlContent = new String(inputFile.getBytes(), StandardCharsets.UTF_8);
|
String htmlContent = new String(inputFile.getBytes(), StandardCharsets.UTF_8);
|
||||||
String sanitizedHtml = customHtmlSanitizer.sanitize(htmlContent);
|
String sanitizedHtml = customHtmlSanitizer.sanitize(htmlContent);
|
||||||
Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8));
|
Files.writeString(inputPath, sanitizedHtml, StandardCharsets.UTF_8);
|
||||||
} else {
|
} else {
|
||||||
inputFile.transferTo(tempInputFile);
|
// copy file content
|
||||||
|
Files.copy(inputFile.getInputStream(), inputPath, StandardCopyOption.REPLACE_EXISTING);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prepare the output file path
|
|
||||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Run the LibreOffice command
|
ProcessExecutorResult result;
|
||||||
List<String> command =
|
// Run Unoconvert command
|
||||||
new ArrayList<>(
|
if (isUnoconvertAvailable()) {
|
||||||
Arrays.asList(
|
// Unoconvert: schreibe direkt in outputPath innerhalb des workDir
|
||||||
runtimePathConfig.getUnoConvertPath(),
|
List<String> command = new ArrayList<>();
|
||||||
"--port",
|
command.add(runtimePathConfig.getUnoConvertPath());
|
||||||
"2003",
|
command.add("--port");
|
||||||
"--convert-to",
|
command.add("2003");
|
||||||
"pdf",
|
command.add("--convert-to");
|
||||||
tempInputFile.toString(),
|
command.add("pdf");
|
||||||
tempOutputFile.toString()));
|
command.add(inputPath.toString());
|
||||||
ProcessExecutorResult returnCode =
|
command.add(outputPath.toString());
|
||||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
|
||||||
.runCommandWithOutputHandling(command);
|
|
||||||
|
|
||||||
// Read the converted PDF file
|
result =
|
||||||
return tempOutputFile.toFile();
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
||||||
|
.runCommandWithOutputHandling(command);
|
||||||
|
} // Run soffice command
|
||||||
|
else {
|
||||||
|
List<String> command = new ArrayList<>();
|
||||||
|
command.add("soffice");
|
||||||
|
command.add("--headless");
|
||||||
|
command.add("--nologo");
|
||||||
|
command.add("--convert-to");
|
||||||
|
command.add("pdf:writer_pdf_Export");
|
||||||
|
command.add("--outdir");
|
||||||
|
command.add(workDir.toString());
|
||||||
|
command.add(inputPath.toString());
|
||||||
|
|
||||||
|
result =
|
||||||
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
||||||
|
.runCommandWithOutputHandling(command);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the result
|
||||||
|
if (result == null) {
|
||||||
|
throw new IllegalStateException("Converter returned no result");
|
||||||
|
}
|
||||||
|
if (result.getRc() != 0) {
|
||||||
|
throw new IllegalStateException("Conversion failed (exit " + result.getRc() + ")");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Files.exists(outputPath)) {
|
||||||
|
// Some LibreOffice versions may deviate with exotic names – as a fallback, we try
|
||||||
|
// to find any .pdf in the workDir
|
||||||
|
try (var stream = Files.list(workDir)) {
|
||||||
|
Path fallback =
|
||||||
|
stream.filter(
|
||||||
|
p ->
|
||||||
|
p.getFileName()
|
||||||
|
.toString()
|
||||||
|
.toLowerCase()
|
||||||
|
.endsWith(".pdf"))
|
||||||
|
.findFirst()
|
||||||
|
.orElse(null);
|
||||||
|
if (fallback == null) {
|
||||||
|
throw new IllegalStateException("No PDF produced.");
|
||||||
|
}
|
||||||
|
// Move the found PDF to the expected outputPath
|
||||||
|
Files.move(fallback, outputPath, StandardCopyOption.REPLACE_EXISTING);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the output file is empty
|
||||||
|
if (Files.size(outputPath) == 0L) {
|
||||||
|
throw new IllegalStateException("Produced PDF is empty");
|
||||||
|
}
|
||||||
|
|
||||||
|
return outputPath.toFile();
|
||||||
} finally {
|
} finally {
|
||||||
// Clean up the temporary files
|
// Clean up the temporary files
|
||||||
if (tempInputFile != null) Files.deleteIfExists(tempInputFile);
|
try {
|
||||||
|
Files.deleteIfExists(inputPath);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.warn("Failed to delete temp input file: {}", inputPath, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -119,7 +193,9 @@ public class ConvertOfficeController {
|
|||||||
.replaceFirst("[.][^.]+$", "")
|
.replaceFirst("[.][^.]+$", "")
|
||||||
+ "_convertedToPDF.pdf");
|
+ "_convertedToPDF.pdf");
|
||||||
} finally {
|
} finally {
|
||||||
if (file != null) file.delete();
|
if (file != null && file.getParent() != null) {
|
||||||
|
FileUtils.deleteDirectory(file.getParentFile());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user