enable HTML santise for office

This commit is contained in:
Anthony Stirling 2025-07-24 10:48:13 +01:00
parent b69c3a0ea9
commit d87b27b578

View File

@ -2,6 +2,7 @@ package stirling.software.SPDF.controller.api.converters;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
@ -24,8 +25,10 @@ import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import stirling.software.common.configuration.RuntimePathConfig;
import stirling.software.common.model.ApplicationProperties;
import stirling.software.common.model.api.GeneralFile;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.CustomHtmlSanitizer;
import stirling.software.common.util.ProcessExecutor;
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
import stirling.software.common.util.WebResponseUtils;
@ -38,6 +41,7 @@ public class ConvertOfficeController {
private final CustomPDFDocumentFactory pdfDocumentFactory;
private final RuntimePathConfig runtimePathConfig;
private final ApplicationProperties applicationProperties;
public File convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
// Check for valid file extension
@ -50,7 +54,24 @@ public class ConvertOfficeController {
// Save the uploaded file to a temporary location
Path tempInputFile =
Files.createTempFile("input_", "." + FilenameUtils.getExtension(originalFilename));
inputFile.transferTo(tempInputFile);
// Check if the file is HTML and apply sanitization if needed
String fileExtension = FilenameUtils.getExtension(originalFilename).toLowerCase();
if (fileExtension.equals("html") || fileExtension.equals("htm")) {
boolean disableSanitize =
Boolean.TRUE.equals(applicationProperties.getSystem().getDisableSanitize());
if (!disableSanitize) {
// Read and sanitize HTML content
String htmlContent = new String(inputFile.getBytes(), StandardCharsets.UTF_8);
String sanitizedHtml = CustomHtmlSanitizer.sanitize(htmlContent);
Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8));
} else {
inputFile.transferTo(tempInputFile);
}
} else {
inputFile.transferTo(tempInputFile);
}
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");