added option for disabling HTML Sanitize (#2831)

# Description of Changes

Please provide a summary of the changes, including:

- added disableSanitize: false # set to 'true' to disable Sanitize HTML,
set to false to enable Sanitize HTML; (can lead to injections in HTML)
- Some users uses this on local boxes, and uses Google Fonts, and base64
image src.


### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [x] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [x] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.

---------

Co-authored-by: blaz.carli <blaz.carli@arctur.si>
Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
Blaž Carli 2025-02-01 00:36:50 +01:00 committed by GitHub
parent c5cffdcacb
commit 6ae2fddd48
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 35 additions and 15 deletions

View File

@ -14,6 +14,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag; import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest; import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest;
import stirling.software.SPDF.model.ApplicationProperties;
import stirling.software.SPDF.service.CustomPDDocumentFactory; import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.FileToPdf; import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.WebResponseUtils; import stirling.software.SPDF.utils.WebResponseUtils;
@ -27,12 +28,16 @@ public class ConvertHtmlToPDF {
private final CustomPDDocumentFactory pdfDocumentFactory; private final CustomPDDocumentFactory pdfDocumentFactory;
private final ApplicationProperties applicationProperties;
@Autowired @Autowired
public ConvertHtmlToPDF( public ConvertHtmlToPDF(
CustomPDDocumentFactory pdfDocumentFactory, CustomPDDocumentFactory pdfDocumentFactory,
@Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) { @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled,
ApplicationProperties applicationProperties) {
this.pdfDocumentFactory = pdfDocumentFactory; this.pdfDocumentFactory = pdfDocumentFactory;
this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled; this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled;
this.applicationProperties = applicationProperties;
} }
@PostMapping(consumes = "multipart/form-data", value = "/html/pdf") @PostMapping(consumes = "multipart/form-data", value = "/html/pdf")
@ -54,12 +59,16 @@ public class ConvertHtmlToPDF {
|| (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) { || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
throw new IllegalArgumentException("File must be either .html or .zip format."); throw new IllegalArgumentException("File must be either .html or .zip format.");
} }
boolean disableSanitize = Boolean.TRUE.equals(applicationProperties.getSystem().getDisableSanitize());
byte[] pdfBytes = byte[] pdfBytes =
FileToPdf.convertHtmlToPdf( FileToPdf.convertHtmlToPdf(
request, request,
fileInput.getBytes(), fileInput.getBytes(),
originalFilename, originalFilename,
bookAndHtmlFormatsInstalled); bookAndHtmlFormatsInstalled,
disableSanitize);
pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes); pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);

View File

@ -24,6 +24,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag; import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.GeneralFile; import stirling.software.SPDF.model.api.GeneralFile;
import stirling.software.SPDF.model.ApplicationProperties;
import stirling.software.SPDF.service.CustomPDDocumentFactory; import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.FileToPdf; import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.WebResponseUtils; import stirling.software.SPDF.utils.WebResponseUtils;
@ -37,12 +38,16 @@ public class ConvertMarkdownToPdf {
private final CustomPDDocumentFactory pdfDocumentFactory; private final CustomPDDocumentFactory pdfDocumentFactory;
private final ApplicationProperties applicationProperties;
@Autowired @Autowired
public ConvertMarkdownToPdf( public ConvertMarkdownToPdf(
CustomPDDocumentFactory pdfDocumentFactory, CustomPDDocumentFactory pdfDocumentFactory,
@Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) { @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled,
ApplicationProperties applicationProperties) {
this.pdfDocumentFactory = pdfDocumentFactory; this.pdfDocumentFactory = pdfDocumentFactory;
this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled; this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled;
this.applicationProperties = applicationProperties;
} }
@PostMapping(consumes = "multipart/form-data", value = "/markdown/pdf") @PostMapping(consumes = "multipart/form-data", value = "/markdown/pdf")
@ -76,12 +81,15 @@ public class ConvertMarkdownToPdf {
String htmlContent = renderer.render(document); String htmlContent = renderer.render(document);
boolean disableSanitize = Boolean.TRUE.equals(applicationProperties.getSystem().getDisableSanitize());
byte[] pdfBytes = byte[] pdfBytes =
FileToPdf.convertHtmlToPdf( FileToPdf.convertHtmlToPdf(
null, null,
htmlContent.getBytes(), htmlContent.getBytes(),
"converted.html", "converted.html",
bookAndHtmlFormatsInstalled); bookAndHtmlFormatsInstalled,
disableSanitize);
pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes); pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);
String outputFilename = String outputFilename =
originalFilename.replaceFirst("[.][^.]+$", "") originalFilename.replaceFirst("[.][^.]+$", "")

View File

@ -283,6 +283,7 @@ public class ApplicationProperties {
private Boolean enableAlphaFunctionality; private Boolean enableAlphaFunctionality;
private String enableAnalytics; private String enableAnalytics;
private Datasource datasource; private Datasource datasource;
private Boolean disableSanitize;
} }
@Data @Data

View File

@ -26,7 +26,8 @@ public class FileToPdf {
HTMLToPdfRequest request, HTMLToPdfRequest request,
byte[] fileBytes, byte[] fileBytes,
String fileName, String fileName,
boolean htmlFormatsInstalled) boolean htmlFormatsInstalled,
boolean disableSanitize)
throws IOException, InterruptedException { throws IOException, InterruptedException {
Path tempOutputFile = Files.createTempFile("output_", ".pdf"); Path tempOutputFile = Files.createTempFile("output_", ".pdf");
@ -35,13 +36,12 @@ public class FileToPdf {
try { try {
if (fileName.endsWith(".html")) { if (fileName.endsWith(".html")) {
tempInputFile = Files.createTempFile("input_", ".html"); tempInputFile = Files.createTempFile("input_", ".html");
String sanitizedHtml = String sanitizedHtml = sanitizeHtmlContent(new String(fileBytes, StandardCharsets.UTF_8), disableSanitize);
sanitizeHtmlContent(new String(fileBytes, StandardCharsets.UTF_8));
Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8)); Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8));
} else if (fileName.endsWith(".zip")) { } else if (fileName.endsWith(".zip")) {
tempInputFile = Files.createTempFile("input_", ".zip"); tempInputFile = Files.createTempFile("input_", ".zip");
Files.write(tempInputFile, fileBytes); Files.write(tempInputFile, fileBytes);
sanitizeHtmlFilesInZip(tempInputFile); sanitizeHtmlFilesInZip(tempInputFile, disableSanitize);
} else { } else {
throw new IllegalArgumentException("Unsupported file format: " + fileName); throw new IllegalArgumentException("Unsupported file format: " + fileName);
} }
@ -89,11 +89,11 @@ public class FileToPdf {
return pdfBytes; return pdfBytes;
} }
private static String sanitizeHtmlContent(String htmlContent) { private static String sanitizeHtmlContent(String htmlContent, boolean disableSanitize) {
return CustomHtmlSanitizer.sanitize(htmlContent); return (!disableSanitize) ? CustomHtmlSanitizer.sanitize(htmlContent) : htmlContent;
} }
private static void sanitizeHtmlFilesInZip(Path zipFilePath) throws IOException { private static void sanitizeHtmlFilesInZip(Path zipFilePath, boolean disableSanitize) throws IOException {
Path tempUnzippedDir = Files.createTempDirectory("unzipped_"); Path tempUnzippedDir = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn = try (ZipInputStream zipIn =
ZipSecurity.createHardenedInputStream( ZipSecurity.createHardenedInputStream(
@ -106,7 +106,7 @@ public class FileToPdf {
if (entry.getName().toLowerCase().endsWith(".html") if (entry.getName().toLowerCase().endsWith(".html")
|| entry.getName().toLowerCase().endsWith(".htm")) { || entry.getName().toLowerCase().endsWith(".htm")) {
String content = new String(zipIn.readAllBytes(), StandardCharsets.UTF_8); String content = new String(zipIn.readAllBytes(), StandardCharsets.UTF_8);
String sanitizedContent = sanitizeHtmlContent(content); String sanitizedContent = sanitizeHtmlContent(content, disableSanitize);
Files.write(filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8)); Files.write(filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8));
} else { } else {
Files.copy(zipIn, filePath); Files.copy(zipIn, filePath);

View File

@ -86,6 +86,7 @@ system:
customHTMLFiles: false # enable to have files placed in /customFiles/templates override the existing template HTML files customHTMLFiles: false # enable to have files placed in /customFiles/templates override the existing template HTML files
tessdataDir: /usr/share/tessdata # path to the directory containing the Tessdata files. This setting is relevant for Windows systems. For Windows users, this path should be adjusted to point to the appropriate directory where the Tessdata files are stored. tessdataDir: /usr/share/tessdata # path to the directory containing the Tessdata files. This setting is relevant for Windows systems. For Windows users, this path should be adjusted to point to the appropriate directory where the Tessdata files are stored.
enableAnalytics: 'true' # set to 'true' to enable analytics, set to 'false' to disable analytics; for enterprise users, this is set to true enableAnalytics: 'true' # set to 'true' to enable analytics, set to 'false' to disable analytics; for enterprise users, this is set to true
disableSanitize: false # set to true to disable Sanitize HTML; (can lead to injections in HTML)
datasource: datasource:
enableCustomDatabase: false # Enterprise users ONLY, set this property to 'true' if you would like to use your own custom database configuration enableCustomDatabase: false # Enterprise users ONLY, set this property to 'true' if you would like to use your own custom database configuration
customDatabaseUrl: '' # eg jdbc:postgresql://localhost:5432/postgres, set the url for your own custom database connection. If provided, the type, hostName, port and name are not necessary and will not be used customDatabaseUrl: '' # eg jdbc:postgresql://localhost:5432/postgres, set the url for your own custom database connection. If provided, the type, hostName, port and name are not necessary and will not be used

View File

@ -15,10 +15,11 @@ public class FileToPdfTest {
byte[] fileBytes = new byte[0]; // Sample file bytes byte[] fileBytes = new byte[0]; // Sample file bytes
String fileName = "test.html"; // Sample file name String fileName = "test.html"; // Sample file name
boolean htmlFormatsInstalled = true; // Sample boolean value boolean htmlFormatsInstalled = true; // Sample boolean value
boolean disableSanitize = false; // Sample boolean value
// Check if the method throws IOException // Check if the method throws IOException
assertThrows(IOException.class, () -> { assertThrows(IOException.class, () -> {
FileToPdf.convertHtmlToPdf(request, fileBytes, fileName, htmlFormatsInstalled); FileToPdf.convertHtmlToPdf(request, fileBytes, fileName, htmlFormatsInstalled, disableSanitize);
}); });
} }