mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-07-10 13:47:25 +02:00
This pull request introduces a small but important change to the PDF conversion functionality. The change ensures that PDF forms are supported by adding the `--pdf-forms` option to the command lists in two methods. Changes to support PDF forms: * [`src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java`](diffhunk://#diff-0e78e0f49bdd0d38127cd04656de55c2eca0b56197e098c6bfceb65e8cc3cff5R75): Added the `--pdf-forms` option to the command list in the `urlToPdf` method. * [`src/main/java/stirling/software/SPDF/utils/FileToPdf.java`](diffhunk://#diff-337516e2839031154412aa3e7c9a73402f3a630813a1946eae78f8a84e9bbe7fR56): Added the `--pdf-forms` option to the command list in the `convertHtmlToPdf` method. Closes #3077 --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details.
211 lines
8.5 KiB
Java
211 lines
8.5 KiB
Java
package stirling.software.SPDF.utils;
|
|
|
|
import java.io.*;
|
|
import java.nio.charset.StandardCharsets;
|
|
import java.nio.file.FileVisitResult;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.nio.file.SimpleFileVisitor;
|
|
import java.nio.file.attribute.BasicFileAttributes;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.stream.Collectors;
|
|
import java.util.stream.Stream;
|
|
import java.util.zip.ZipEntry;
|
|
import java.util.zip.ZipInputStream;
|
|
import java.util.zip.ZipOutputStream;
|
|
|
|
import io.github.pixee.security.ZipSecurity;
|
|
|
|
import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest;
|
|
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
|
|
|
public class FileToPdf {
|
|
|
|
public static byte[] convertHtmlToPdf(
|
|
String weasyprintPath,
|
|
HTMLToPdfRequest request,
|
|
byte[] fileBytes,
|
|
String fileName,
|
|
boolean disableSanitize)
|
|
throws IOException, InterruptedException {
|
|
|
|
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
|
Path tempInputFile = null;
|
|
byte[] pdfBytes;
|
|
try {
|
|
if (fileName.endsWith(".html")) {
|
|
tempInputFile = Files.createTempFile("input_", ".html");
|
|
String sanitizedHtml =
|
|
sanitizeHtmlContent(
|
|
new String(fileBytes, StandardCharsets.UTF_8), disableSanitize);
|
|
Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8));
|
|
} else if (fileName.endsWith(".zip")) {
|
|
tempInputFile = Files.createTempFile("input_", ".zip");
|
|
Files.write(tempInputFile, fileBytes);
|
|
sanitizeHtmlFilesInZip(tempInputFile, disableSanitize);
|
|
} else {
|
|
throw new IllegalArgumentException("Unsupported file format: " + fileName);
|
|
}
|
|
|
|
List<String> command = new ArrayList<>();
|
|
command.add(weasyprintPath);
|
|
command.add("-e");
|
|
command.add("utf-8");
|
|
command.add("-v");
|
|
command.add("--pdf-forms");
|
|
command.add(tempInputFile.toString());
|
|
command.add(tempOutputFile.toString());
|
|
|
|
ProcessExecutorResult returnCode =
|
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
|
|
.runCommandWithOutputHandling(command);
|
|
|
|
pdfBytes = Files.readAllBytes(tempOutputFile);
|
|
} catch (IOException e) {
|
|
pdfBytes = Files.readAllBytes(tempOutputFile);
|
|
if (pdfBytes.length < 1) {
|
|
throw e;
|
|
}
|
|
} finally {
|
|
Files.deleteIfExists(tempOutputFile);
|
|
Files.deleteIfExists(tempInputFile);
|
|
}
|
|
|
|
return pdfBytes;
|
|
}
|
|
|
|
private static String sanitizeHtmlContent(String htmlContent, boolean disableSanitize) {
|
|
return (!disableSanitize) ? CustomHtmlSanitizer.sanitize(htmlContent) : htmlContent;
|
|
}
|
|
|
|
private static void sanitizeHtmlFilesInZip(Path zipFilePath, boolean disableSanitize)
|
|
throws IOException {
|
|
Path tempUnzippedDir = Files.createTempDirectory("unzipped_");
|
|
try (ZipInputStream zipIn =
|
|
ZipSecurity.createHardenedInputStream(
|
|
new ByteArrayInputStream(Files.readAllBytes(zipFilePath)))) {
|
|
ZipEntry entry = zipIn.getNextEntry();
|
|
while (entry != null) {
|
|
Path filePath = tempUnzippedDir.resolve(sanitizeZipFilename(entry.getName()));
|
|
if (!entry.isDirectory()) {
|
|
Files.createDirectories(filePath.getParent());
|
|
if (entry.getName().toLowerCase().endsWith(".html")
|
|
|| entry.getName().toLowerCase().endsWith(".htm")) {
|
|
String content = new String(zipIn.readAllBytes(), StandardCharsets.UTF_8);
|
|
String sanitizedContent = sanitizeHtmlContent(content, disableSanitize);
|
|
Files.write(filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8));
|
|
} else {
|
|
Files.copy(zipIn, filePath);
|
|
}
|
|
}
|
|
zipIn.closeEntry();
|
|
entry = zipIn.getNextEntry();
|
|
}
|
|
}
|
|
|
|
// Repack the sanitized files
|
|
zipDirectory(tempUnzippedDir, zipFilePath);
|
|
|
|
// Clean up
|
|
deleteDirectory(tempUnzippedDir);
|
|
}
|
|
|
|
private static void zipDirectory(Path sourceDir, Path zipFilePath) throws IOException {
|
|
try (ZipOutputStream zos =
|
|
new ZipOutputStream(new FileOutputStream(zipFilePath.toFile()))) {
|
|
Files.walk(sourceDir)
|
|
.filter(path -> !Files.isDirectory(path))
|
|
.forEach(
|
|
path -> {
|
|
ZipEntry zipEntry =
|
|
new ZipEntry(sourceDir.relativize(path).toString());
|
|
try {
|
|
zos.putNextEntry(zipEntry);
|
|
Files.copy(path, zos);
|
|
zos.closeEntry();
|
|
} catch (IOException e) {
|
|
throw new UncheckedIOException(e);
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
private static void deleteDirectory(Path dir) throws IOException {
|
|
Files.walkFileTree(
|
|
dir,
|
|
new SimpleFileVisitor<Path>() {
|
|
@Override
|
|
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
|
|
throws IOException {
|
|
Files.delete(file);
|
|
return FileVisitResult.CONTINUE;
|
|
}
|
|
|
|
@Override
|
|
public FileVisitResult postVisitDirectory(Path dir, IOException exc)
|
|
throws IOException {
|
|
Files.delete(dir);
|
|
return FileVisitResult.CONTINUE;
|
|
}
|
|
});
|
|
}
|
|
|
|
private static Path unzipAndGetMainHtml(byte[] fileBytes) throws IOException {
|
|
Path tempDirectory = Files.createTempDirectory("unzipped_");
|
|
try (ZipInputStream zipIn =
|
|
ZipSecurity.createHardenedInputStream(new ByteArrayInputStream(fileBytes))) {
|
|
ZipEntry entry = zipIn.getNextEntry();
|
|
while (entry != null) {
|
|
Path filePath = tempDirectory.resolve(sanitizeZipFilename(entry.getName()));
|
|
if (entry.isDirectory()) {
|
|
Files.createDirectories(filePath); // Explicitly create the directory structure
|
|
} else {
|
|
Files.createDirectories(
|
|
filePath.getParent()); // Create parent directories if they don't exist
|
|
Files.copy(zipIn, filePath);
|
|
}
|
|
zipIn.closeEntry();
|
|
entry = zipIn.getNextEntry();
|
|
}
|
|
}
|
|
|
|
// Search for the main HTML file.
|
|
try (Stream<Path> walk = Files.walk(tempDirectory)) {
|
|
List<Path> htmlFiles =
|
|
walk.filter(file -> file.toString().endsWith(".html"))
|
|
.collect(Collectors.toList());
|
|
|
|
if (htmlFiles.isEmpty()) {
|
|
throw new IOException("No HTML files found in the unzipped directory.");
|
|
}
|
|
|
|
// Prioritize 'index.html' if it exists, otherwise use the first .html file
|
|
for (Path htmlFile : htmlFiles) {
|
|
if ("index.html".equals(htmlFile.getFileName().toString())) {
|
|
return htmlFile;
|
|
}
|
|
}
|
|
|
|
return htmlFiles.get(0);
|
|
}
|
|
}
|
|
|
|
static String sanitizeZipFilename(String entryName) {
|
|
if (entryName == null || entryName.trim().isEmpty()) {
|
|
return "";
|
|
}
|
|
// Remove any drive letters (e.g., "C:\") and leading forward/backslashes
|
|
entryName = entryName.replaceAll("^[a-zA-Z]:[\\\\/]+", "");
|
|
entryName = entryName.replaceAll("^[\\\\/]+", "");
|
|
|
|
// Recursively remove path traversal sequences
|
|
while (entryName.contains("../") || entryName.contains("..\\")) {
|
|
entryName = entryName.replace("../", "").replace("..\\", "");
|
|
}
|
|
// Normalize all backslashes to forward slashes
|
|
entryName = entryName.replaceAll("\\\\", "/");
|
|
return entryName;
|
|
}
|
|
}
|