From 6ae2fddd486dcd4f51ea9c54d81d33d9a739f714 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bla=C5=BE=20Carli?= Date: Sat, 1 Feb 2025 00:36:50 +0100 Subject: [PATCH] added option for disabling HTML Sanitize (#2831) # Description of Changes Please provide a summary of the changes, including: - added disableSanitize: false # set to 'true' to disable Sanitize HTML, set to false to enable Sanitize HTML; (can lead to injections in HTML) - Some users uses this on local boxes, and uses Google Fonts, and base64 image src. ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [x] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [x] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: blaz.carli Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> --- .../api/converters/ConvertHtmlToPDF.java | 13 +++++++++++-- .../api/converters/ConvertMarkdownToPdf.java | 12 ++++++++++-- .../SPDF/model/ApplicationProperties.java | 1 + .../stirling/software/SPDF/utils/FileToPdf.java | 16 ++++++++-------- src/main/resources/settings.yml.template | 5 +++-- .../software/SPDF/utils/FileToPdfTest.java | 3 ++- 6 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java index a4fe57bb..284fe8b9 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java @@ -14,6 +14,7 @@ import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest; +import stirling.software.SPDF.model.ApplicationProperties; import stirling.software.SPDF.service.CustomPDDocumentFactory; import stirling.software.SPDF.utils.FileToPdf; import stirling.software.SPDF.utils.WebResponseUtils; @@ -27,12 +28,16 @@ public class ConvertHtmlToPDF { private final CustomPDDocumentFactory pdfDocumentFactory; + private final ApplicationProperties applicationProperties; + @Autowired public ConvertHtmlToPDF( CustomPDDocumentFactory pdfDocumentFactory, - @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) { + @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled, + ApplicationProperties applicationProperties) { this.pdfDocumentFactory = pdfDocumentFactory; this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled; + this.applicationProperties = applicationProperties; } @PostMapping(consumes = "multipart/form-data", value = "/html/pdf") @@ -54,12 +59,16 @@ public class ConvertHtmlToPDF { || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) { throw new IllegalArgumentException("File must be either .html or .zip format."); } + + boolean disableSanitize = Boolean.TRUE.equals(applicationProperties.getSystem().getDisableSanitize()); + byte[] pdfBytes = FileToPdf.convertHtmlToPdf( request, fileInput.getBytes(), originalFilename, - bookAndHtmlFormatsInstalled); + bookAndHtmlFormatsInstalled, + disableSanitize); pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes); diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java index b378f479..41d1be06 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java @@ -24,6 +24,7 @@ import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.model.api.GeneralFile; +import stirling.software.SPDF.model.ApplicationProperties; import stirling.software.SPDF.service.CustomPDDocumentFactory; import stirling.software.SPDF.utils.FileToPdf; import stirling.software.SPDF.utils.WebResponseUtils; @@ -37,12 +38,16 @@ public class ConvertMarkdownToPdf { private final CustomPDDocumentFactory pdfDocumentFactory; + private final ApplicationProperties applicationProperties; + @Autowired public ConvertMarkdownToPdf( CustomPDDocumentFactory pdfDocumentFactory, - @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) { + @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled, + ApplicationProperties applicationProperties) { this.pdfDocumentFactory = pdfDocumentFactory; this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled; + this.applicationProperties = applicationProperties; } @PostMapping(consumes = "multipart/form-data", value = "/markdown/pdf") @@ -76,12 +81,15 @@ public class ConvertMarkdownToPdf { String htmlContent = renderer.render(document); + boolean disableSanitize = Boolean.TRUE.equals(applicationProperties.getSystem().getDisableSanitize()); + byte[] pdfBytes = FileToPdf.convertHtmlToPdf( null, htmlContent.getBytes(), "converted.html", - bookAndHtmlFormatsInstalled); + bookAndHtmlFormatsInstalled, + disableSanitize); pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes); String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") diff --git a/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java b/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java index a10c945a..1b0c1093 100644 --- a/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java +++ b/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java @@ -283,6 +283,7 @@ public class ApplicationProperties { private Boolean enableAlphaFunctionality; private String enableAnalytics; private Datasource datasource; + private Boolean disableSanitize; } @Data diff --git a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java index 442167d8..1f9cded7 100644 --- a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java +++ b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java @@ -26,7 +26,8 @@ public class FileToPdf { HTMLToPdfRequest request, byte[] fileBytes, String fileName, - boolean htmlFormatsInstalled) + boolean htmlFormatsInstalled, + boolean disableSanitize) throws IOException, InterruptedException { Path tempOutputFile = Files.createTempFile("output_", ".pdf"); @@ -35,13 +36,12 @@ public class FileToPdf { try { if (fileName.endsWith(".html")) { tempInputFile = Files.createTempFile("input_", ".html"); - String sanitizedHtml = - sanitizeHtmlContent(new String(fileBytes, StandardCharsets.UTF_8)); + String sanitizedHtml = sanitizeHtmlContent(new String(fileBytes, StandardCharsets.UTF_8), disableSanitize); Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8)); } else if (fileName.endsWith(".zip")) { tempInputFile = Files.createTempFile("input_", ".zip"); Files.write(tempInputFile, fileBytes); - sanitizeHtmlFilesInZip(tempInputFile); + sanitizeHtmlFilesInZip(tempInputFile, disableSanitize); } else { throw new IllegalArgumentException("Unsupported file format: " + fileName); } @@ -89,11 +89,11 @@ public class FileToPdf { return pdfBytes; } - private static String sanitizeHtmlContent(String htmlContent) { - return CustomHtmlSanitizer.sanitize(htmlContent); + private static String sanitizeHtmlContent(String htmlContent, boolean disableSanitize) { + return (!disableSanitize) ? CustomHtmlSanitizer.sanitize(htmlContent) : htmlContent; } - private static void sanitizeHtmlFilesInZip(Path zipFilePath) throws IOException { + private static void sanitizeHtmlFilesInZip(Path zipFilePath, boolean disableSanitize) throws IOException { Path tempUnzippedDir = Files.createTempDirectory("unzipped_"); try (ZipInputStream zipIn = ZipSecurity.createHardenedInputStream( @@ -106,7 +106,7 @@ public class FileToPdf { if (entry.getName().toLowerCase().endsWith(".html") || entry.getName().toLowerCase().endsWith(".htm")) { String content = new String(zipIn.readAllBytes(), StandardCharsets.UTF_8); - String sanitizedContent = sanitizeHtmlContent(content); + String sanitizedContent = sanitizeHtmlContent(content, disableSanitize); Files.write(filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8)); } else { Files.copy(zipIn, filePath); diff --git a/src/main/resources/settings.yml.template b/src/main/resources/settings.yml.template index 6ab2a8c7..ca05742d 100644 --- a/src/main/resources/settings.yml.template +++ b/src/main/resources/settings.yml.template @@ -16,7 +16,7 @@ security: csrfDisabled: false # set to 'true' to disable CSRF protection (not recommended for production) loginAttemptCount: 5 # lock user account after 5 tries; when using e.g. Fail2Ban you can deactivate the function with -1 loginResetTimeMinutes: 120 # lock account for 2 hours after x attempts - loginMethod: all # Accepts values like 'all' and 'normal'(only Login with Username/Password), 'oauth2'(only Login with OAuth2) or 'saml2'(only Login with SAML2) + loginMethod: all # Accepts values like 'all' and 'normal'(only Login with Username/Password), 'oauth2'(only Login with OAuth2) or 'saml2'(only Login with SAML2) initialLogin: username: '' # initial username for the first login password: '' # initial password for the first login @@ -86,6 +86,7 @@ system: customHTMLFiles: false # enable to have files placed in /customFiles/templates override the existing template HTML files tessdataDir: /usr/share/tessdata # path to the directory containing the Tessdata files. This setting is relevant for Windows systems. For Windows users, this path should be adjusted to point to the appropriate directory where the Tessdata files are stored. enableAnalytics: 'true' # set to 'true' to enable analytics, set to 'false' to disable analytics; for enterprise users, this is set to true + disableSanitize: false # set to true to disable Sanitize HTML; (can lead to injections in HTML) datasource: enableCustomDatabase: false # Enterprise users ONLY, set this property to 'true' if you would like to use your own custom database configuration customDatabaseUrl: '' # eg jdbc:postgresql://localhost:5432/postgres, set the url for your own custom database connection. If provided, the type, hostName, port and name are not necessary and will not be used @@ -113,7 +114,7 @@ AutomaticallyGenerated: key: example UUID: example appVersion: 0.35.0 - + processExecutor: sessionLimit: # Process executor instances limits libreOfficeSessionLimit: 1 diff --git a/src/test/java/stirling/software/SPDF/utils/FileToPdfTest.java b/src/test/java/stirling/software/SPDF/utils/FileToPdfTest.java index 804f1c16..9b0c004b 100644 --- a/src/test/java/stirling/software/SPDF/utils/FileToPdfTest.java +++ b/src/test/java/stirling/software/SPDF/utils/FileToPdfTest.java @@ -15,10 +15,11 @@ public class FileToPdfTest { byte[] fileBytes = new byte[0]; // Sample file bytes String fileName = "test.html"; // Sample file name boolean htmlFormatsInstalled = true; // Sample boolean value + boolean disableSanitize = false; // Sample boolean value // Check if the method throws IOException assertThrows(IOException.class, () -> { - FileToPdf.convertHtmlToPdf(request, fileBytes, fileName, htmlFormatsInstalled); + FileToPdf.convertHtmlToPdf(request, fileBytes, fileName, htmlFormatsInstalled, disableSanitize); }); }