feat: Add EML to PDF conversion functionality

This commit is contained in:
Balázs Szücs 2025-06-06 23:24:36 +02:00
parent 47ac4a4730
commit 27e8d16e7c
8 changed files with 1829 additions and 2 deletions

View File

@ -43,7 +43,7 @@ dependencies {
api 'jakarta.servlet:jakarta.servlet-api:6.1.0'
api 'org.snakeyaml:snakeyaml-engine:2.9'
api "org.springdoc:springdoc-openapi-starter-webmvc-ui:2.8.8"
api 'jakarta.mail:jakarta.mail-api:2.1.3'
compileOnly "org.projectlombok:lombok:$lombokVersion"
annotationProcessor "org.projectlombok:lombok:$lombokVersion"

View File

@ -0,0 +1,39 @@
package stirling.software.common.model.api.converters;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import lombok.EqualsAndHashCode;
import stirling.software.common.model.api.PDFFile;
@Data
@EqualsAndHashCode(callSuper = true)
public class EmlToPdfRequest extends PDFFile {
// fileInput is inherited from PDFFile
@Schema(
description = "Include email attachments in the PDF output",
requiredMode = Schema.RequiredMode.NOT_REQUIRED,
example = "false")
private boolean includeAttachments = false;
@Schema(
description = "Maximum attachment size in MB to include (default 10MB, range: 1-100)",
requiredMode = Schema.RequiredMode.NOT_REQUIRED,
example = "10",
minimum = "1",
maximum = "100")
private int maxAttachmentSizeMB = 10;
@Schema(
description = "Download HTML intermediate file instead of PDF",
requiredMode = Schema.RequiredMode.NOT_REQUIRED,
example = "false")
private boolean downloadHtml = false;
@Schema(
description = "Include CC and BCC recipients in header (if available)",
requiredMode = Schema.RequiredMode.NOT_REQUIRED,
example = "true")
private boolean includeAllRecipients = true;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,148 @@
package stirling.software.SPDF.controller.api.converters;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.jetbrains.annotations.NotNull;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.api.converters.EmlToPdfRequest;
import stirling.software.common.util.EmlToPdf;
import stirling.software.common.util.WebResponseUtils;
@RestController
@RequestMapping("/api/v1/convert")
@Tag(name = "Convert", description = "Convert APIs")
@Slf4j
public class ConvertEmlToPDF {
@Value("${WEASYPRINT_PATH:weasyprint}")
private String weasyprintPath;
@PostMapping(consumes = "multipart/form-data", value = "/eml/pdf")
@Operation(
summary = "Convert EML to PDF",
description =
"This endpoint converts EML (email) files to PDF format with extensive"
+ " customization options. Features include font settings, image constraints, display modes, attachment handling,"
+ " and HTML debug output. Input: EML file, Output: PDF"
+ " or HTML file. Type: SISO")
public ResponseEntity<byte[]> convertEmlToPdf(@ModelAttribute EmlToPdfRequest request) {
MultipartFile inputFile = request.getFileInput();
String originalFilename = inputFile.getOriginalFilename();
// Validate input
if (inputFile == null || inputFile.isEmpty()) {
log.error("No file provided for EML to PDF conversion.");
return ResponseEntity.badRequest()
.body("No file provided".getBytes(StandardCharsets.UTF_8));
}
if (originalFilename == null || originalFilename.trim().isEmpty()) {
log.error("Filename is null or empty.");
return ResponseEntity.badRequest()
.body("Please provide a valid filename".getBytes(StandardCharsets.UTF_8));
}
// Validate file type - support EML
String lowerFilename = originalFilename.toLowerCase();
if (!lowerFilename.endsWith(".eml")) {
log.error("Invalid file type for EML to PDF: {}", originalFilename);
return ResponseEntity.badRequest()
.body("Please upload a valid EML file".getBytes(StandardCharsets.UTF_8));
}
String baseFilename = Filenames.toSimpleFileName(originalFilename); // Use Filenames utility
try {
byte[] fileBytes = inputFile.getBytes();
if (request.isDownloadHtml()) {
try {
String htmlContent = EmlToPdf.convertEmlToHtml(fileBytes, request);
log.info("Successfully converted EML to HTML: {}", originalFilename);
return WebResponseUtils.bytesToWebResponse(
htmlContent.getBytes(StandardCharsets.UTF_8),
baseFilename + ".html",
MediaType.TEXT_HTML);
} catch (Exception e) {
log.error("HTML conversion failed for {}", originalFilename, e);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
.body(
("HTML conversion failed: " + e.getMessage())
.getBytes(StandardCharsets.UTF_8));
}
}
// Convert EML to PDF with enhanced options
try {
byte[] pdfBytes =
EmlToPdf.convertEmlToPdf(
weasyprintPath, // Use configured WeasyPrint path
request,
fileBytes,
originalFilename,
false);
if (pdfBytes == null || pdfBytes.length == 0) {
log.error("PDF conversion failed - empty output for {}", originalFilename);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
.body(
"PDF conversion failed - empty output"
.getBytes(StandardCharsets.UTF_8));
}
log.info("Successfully converted EML to PDF: {}", originalFilename);
return WebResponseUtils.bytesToWebResponse(
pdfBytes, baseFilename + ".pdf", MediaType.APPLICATION_PDF);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.error("EML to PDF conversion was interrupted for {}", originalFilename, e);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
.body("Conversion was interrupted".getBytes(StandardCharsets.UTF_8));
} catch (Exception e) {
String errorMessage = buildErrorMessage(e, originalFilename);
log.error("EML to PDF conversion failed for {}: {}", originalFilename, errorMessage, e);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
.body(errorMessage.getBytes(StandardCharsets.UTF_8));
}
} catch (IOException e) {
log.error("File processing error for EML to PDF: {}", originalFilename, e);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
.body("File processing error".getBytes(StandardCharsets.UTF_8));
}
}
private static @NotNull String buildErrorMessage(Exception e, String originalFilename) {
String errorMessage;
if (e.getMessage() != null && e.getMessage().contains("Invalid EML")) {
errorMessage =
"Invalid EML file format. Please ensure you've uploaded a valid email"
+ " file ("
+ originalFilename
+ ").";
} else if (e.getMessage() != null && e.getMessage().contains("WeasyPrint")) {
errorMessage =
"PDF generation failed for "
+ originalFilename
+ ". This may be due to complex email formatting.";
} else {
errorMessage = "Conversion failed for " + originalFilename + ": " + e.getMessage();
}
return errorMessage;
}
}

View File

@ -7,7 +7,6 @@ import org.springframework.web.servlet.ModelAndView;
import io.swagger.v3.oas.annotations.Hidden;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.common.util.CheckProgramInstall;
@Controller
@ -121,4 +120,11 @@ public class ConverterWebController {
model.addAttribute("currentPage", "pdf-to-pdfa");
return "convert/pdf-to-pdfa";
}
@GetMapping("/eml-to-pdf")
@Hidden
public String convertEmlToPdfForm(Model model) {
model.addAttribute("currentPage", "eml-to-pdf");
return "convert/eml-to-pdf";
}
}

View File

@ -621,6 +621,22 @@ home.HTMLToPDF.title=HTML to PDF
home.HTMLToPDF.desc=Converts any HTML file or zip to PDF
HTMLToPDF.tags=markup,web-content,transformation,convert
#eml-to-pdf
home.EMLToPDF.title=EML to PDF
home.EMLToPDF.desc=Converts email (EML) files to PDF format including headers, body, and inline images
EMLToPDF.tags=email,conversion,eml,message,transformation,convert,mail
EMLToPDF.title=EML To PDF
EMLToPDF.header=EML To PDF
EMLToPDF.submit=Convert
EMLToPDF.downloadHtml=Download HTML intermediate file instead of PDF
EMLToPDF.downloadHtmlHelp=This allows you to see the HTML version before PDF conversion and can help debug formatting issues
EMLToPDF.includeAttachments=Include attachments in PDF
EMLToPDF.maxAttachmentSize=Maximum attachment size (MB)
EMLToPDF.help=Converts email (EML) files to PDF format including headers, body, and inline images
EMLToPDF.troubleshootingTip1=EML to HTML is a more reliable process, so with batch-processing it is recommended to save both
EMLToPDF.troubleshootingTip2=With a small number of EMLs, if the PDF is malformed, you can download HTML and override some of the problematic HTML code.
EMLToPDF.troubleshootingTip3=Embeddings, however, do not work with HTMLs
home.MarkdownToPDF.title=Markdown to PDF
home.MarkdownToPDF.desc=Converts any Markdown file to PDF

View File

@ -0,0 +1,93 @@
<!DOCTYPE html>
<html th:lang="${#locale.language}" th:dir="#{language.direction}" th:data-language="${#locale.toString()}"
xmlns:th="https://www.thymeleaf.org">
<head>
<th:block th:insert="~{fragments/common :: head(title=#{EMLToPDF.title}, header=#{EMLToPDF.header})}"></th:block>
</head>
<body>
<div id="page-container">
<div id="content-wrap">
<th:block th:insert="~{fragments/navbar.html :: navbar}"></th:block>
<div class="container py-4">
<div class="row justify-content-center">
<div class="col-md-6">
<div class="card shadow-sm">
<div class="card-body">
<div class="tool-header">
<span class="material-symbols-rounded tool-header-icon convertto">email</span>
<span class="tool-header-text" th:text="#{EMLToPDF.header}"></span>
</div>
<form method="post" enctype="multipart/form-data" th:action="@{'/api/v1/convert/eml/pdf'}" class="mt-4">
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multipleInputsForSingleRequest=false, accept='.eml,message/rfc822')}">
</div>
<div class="form-check mb-3">
<input type="checkbox" class="form-check-input" name="downloadHtml" id="downloadHtml">
<label class="form-check-label" for="downloadHtml" th:text="#{EMLToPDF.downloadHtml}"></label>
<div class="form-text" th:text="#{EMLToPDF.downloadHtmlHelp}"></div>
</div>
<div id="pdfOnlyOptions">
<div class="form-check mb-3">
<input type="checkbox" class="form-check-input" name="includeAttachments" id="includeAttachments" checked>
<label class="form-check-label" for="includeAttachments" th:text="#{EMLToPDF.includeAttachments}"></label>
</div>
<div class="mb-3">
<label for="maxAttachmentSizeMB" class="form-label" th:text="#{EMLToPDF.maxAttachmentSize}"></label>
<input type="number" class="form-control" id="maxAttachmentSizeMB" name="maxAttachmentSizeMB" value="10" min="1" max="100">
</div>
</div>
<div class="mb-4">
<button class="btn btn-outline-primary" type="button" data-bs-toggle="collapse"
data-bs-target="#info" aria-expanded="false" aria-controls="info" th:text="#{info}">
</button>
<div class="collapse mt-3" id="info">
<div class="card card-body">
<p class="mb-2" th:text="#{EMLToPDF.help}"></p>
<ul class="mb-0">
<li th:text="#{EMLToPDF.troubleshootingTip1}"></li>
<li th:text="#{EMLToPDF.troubleshootingTip2}"></li>
<li th:text="#{EMLToPDF.troubleshootingTip3}"></li>
</ul>
</div>
</div>
</div>
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{EMLToPDF.submit}"></button>
</form>
</div>
</div>
</div>
</div>
</div>
</div>
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
</div>
<script>
document.addEventListener('DOMContentLoaded', function() {
const downloadHtml = document.getElementById('downloadHtml');
const pdfOnlyOptions = document.getElementById('pdfOnlyOptions');
const submitBtn = document.getElementById('submitBtn');
function updateFormState() {
if (pdfOnlyOptions && submitBtn) {
pdfOnlyOptions.style.display = downloadHtml.checked ? 'none' : 'block';
submitBtn.textContent = downloadHtml.checked ? 'Download HTML' : '[[#{EMLToPDF.submit}]]';
}
}
if (downloadHtml) {
downloadHtml.addEventListener('change', updateFormState);
updateFormState();
}
});
</script>
</body>
</html>

View File

@ -59,6 +59,9 @@
<div
th:replace="~{fragments/navbarEntry :: navbarEntry('markdown-to-pdf', 'markdown', 'home.MarkdownToPDF.title', 'home.MarkdownToPDF.desc', 'MarkdownToPDF.tags', 'convertto')}">
</div>
<div
th:replace="~{fragments/navbarEntry :: navbarEntry('eml-to-pdf', 'email', 'home.EMLToPDF.title', 'home.EMLToPDF.desc', 'EMLToPDF.tags', 'convertto')}">
</div>
</div>
</div>
<div id="groupConvertFrom" class="feature-group">