Stirling-PDF/src/main/java/stirling/software/SPDF/utils/FileToPdf.java
Anthony Stirling c047a97c13
Weasyprint forms #3077 (#3084)
This pull request introduces a small but important change to the PDF
conversion functionality. The change ensures that PDF forms are
supported by adding the `--pdf-forms` option to the command lists in two
methods.

Changes to support PDF forms:

*
[`src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java`](diffhunk://#diff-0e78e0f49bdd0d38127cd04656de55c2eca0b56197e098c6bfceb65e8cc3cff5R75):
Added the `--pdf-forms` option to the command list in the `urlToPdf`
method.
*
[`src/main/java/stirling/software/SPDF/utils/FileToPdf.java`](diffhunk://#diff-337516e2839031154412aa3e7c9a73402f3a630813a1946eae78f8a84e9bbe7fR56):
Added the `--pdf-forms` option to the command list in the
`convertHtmlToPdf` method.

Closes #3077

---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.
2025-03-01 20:31:29 +00:00

211 lines
8.5 KiB
Java

package stirling.software.SPDF.utils;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
import io.github.pixee.security.ZipSecurity;
import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
public class FileToPdf {
public static byte[] convertHtmlToPdf(
String weasyprintPath,
HTMLToPdfRequest request,
byte[] fileBytes,
String fileName,
boolean disableSanitize)
throws IOException, InterruptedException {
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
Path tempInputFile = null;
byte[] pdfBytes;
try {
if (fileName.endsWith(".html")) {
tempInputFile = Files.createTempFile("input_", ".html");
String sanitizedHtml =
sanitizeHtmlContent(
new String(fileBytes, StandardCharsets.UTF_8), disableSanitize);
Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8));
} else if (fileName.endsWith(".zip")) {
tempInputFile = Files.createTempFile("input_", ".zip");
Files.write(tempInputFile, fileBytes);
sanitizeHtmlFilesInZip(tempInputFile, disableSanitize);
} else {
throw new IllegalArgumentException("Unsupported file format: " + fileName);
}
List<String> command = new ArrayList<>();
command.add(weasyprintPath);
command.add("-e");
command.add("utf-8");
command.add("-v");
command.add("--pdf-forms");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command);
pdfBytes = Files.readAllBytes(tempOutputFile);
} catch (IOException e) {
pdfBytes = Files.readAllBytes(tempOutputFile);
if (pdfBytes.length < 1) {
throw e;
}
} finally {
Files.deleteIfExists(tempOutputFile);
Files.deleteIfExists(tempInputFile);
}
return pdfBytes;
}
private static String sanitizeHtmlContent(String htmlContent, boolean disableSanitize) {
return (!disableSanitize) ? CustomHtmlSanitizer.sanitize(htmlContent) : htmlContent;
}
private static void sanitizeHtmlFilesInZip(Path zipFilePath, boolean disableSanitize)
throws IOException {
Path tempUnzippedDir = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn =
ZipSecurity.createHardenedInputStream(
new ByteArrayInputStream(Files.readAllBytes(zipFilePath)))) {
ZipEntry entry = zipIn.getNextEntry();
while (entry != null) {
Path filePath = tempUnzippedDir.resolve(sanitizeZipFilename(entry.getName()));
if (!entry.isDirectory()) {
Files.createDirectories(filePath.getParent());
if (entry.getName().toLowerCase().endsWith(".html")
|| entry.getName().toLowerCase().endsWith(".htm")) {
String content = new String(zipIn.readAllBytes(), StandardCharsets.UTF_8);
String sanitizedContent = sanitizeHtmlContent(content, disableSanitize);
Files.write(filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8));
} else {
Files.copy(zipIn, filePath);
}
}
zipIn.closeEntry();
entry = zipIn.getNextEntry();
}
}
// Repack the sanitized files
zipDirectory(tempUnzippedDir, zipFilePath);
// Clean up
deleteDirectory(tempUnzippedDir);
}
private static void zipDirectory(Path sourceDir, Path zipFilePath) throws IOException {
try (ZipOutputStream zos =
new ZipOutputStream(new FileOutputStream(zipFilePath.toFile()))) {
Files.walk(sourceDir)
.filter(path -> !Files.isDirectory(path))
.forEach(
path -> {
ZipEntry zipEntry =
new ZipEntry(sourceDir.relativize(path).toString());
try {
zos.putNextEntry(zipEntry);
Files.copy(path, zos);
zos.closeEntry();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
}
}
private static void deleteDirectory(Path dir) throws IOException {
Files.walkFileTree(
dir,
new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
throws IOException {
Files.delete(file);
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult postVisitDirectory(Path dir, IOException exc)
throws IOException {
Files.delete(dir);
return FileVisitResult.CONTINUE;
}
});
}
private static Path unzipAndGetMainHtml(byte[] fileBytes) throws IOException {
Path tempDirectory = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn =
ZipSecurity.createHardenedInputStream(new ByteArrayInputStream(fileBytes))) {
ZipEntry entry = zipIn.getNextEntry();
while (entry != null) {
Path filePath = tempDirectory.resolve(sanitizeZipFilename(entry.getName()));
if (entry.isDirectory()) {
Files.createDirectories(filePath); // Explicitly create the directory structure
} else {
Files.createDirectories(
filePath.getParent()); // Create parent directories if they don't exist
Files.copy(zipIn, filePath);
}
zipIn.closeEntry();
entry = zipIn.getNextEntry();
}
}
// Search for the main HTML file.
try (Stream<Path> walk = Files.walk(tempDirectory)) {
List<Path> htmlFiles =
walk.filter(file -> file.toString().endsWith(".html"))
.collect(Collectors.toList());
if (htmlFiles.isEmpty()) {
throw new IOException("No HTML files found in the unzipped directory.");
}
// Prioritize 'index.html' if it exists, otherwise use the first .html file
for (Path htmlFile : htmlFiles) {
if ("index.html".equals(htmlFile.getFileName().toString())) {
return htmlFile;
}
}
return htmlFiles.get(0);
}
}
static String sanitizeZipFilename(String entryName) {
if (entryName == null || entryName.trim().isEmpty()) {
return "";
}
// Remove any drive letters (e.g., "C:\") and leading forward/backslashes
entryName = entryName.replaceAll("^[a-zA-Z]:[\\\\/]+", "");
entryName = entryName.replaceAll("^[\\\\/]+", "");
// Recursively remove path traversal sequences
while (entryName.contains("../") || entryName.contains("..\\")) {
entryName = entryName.replace("../", "").replace("..\\", "");
}
// Normalize all backslashes to forward slashes
entryName = entryName.replaceAll("\\\\", "/");
return entryName;
}
}