From b7815fff33b83e56fe21e9dfb5c68bd06f72173e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Sz=C3=BCcs?= Date: Sun, 3 Aug 2025 12:49:35 +0200 Subject: [PATCH] refactor(eml-to-pdf): Improve readability, maintainability, and overall standards compliance --- .../software/common/util/EmlToPdf.java | 1797 +++++++++++------ 1 file changed, 1126 insertions(+), 671 deletions(-) diff --git a/app/common/src/main/java/stirling/software/common/util/EmlToPdf.java b/app/common/src/main/java/stirling/software/common/util/EmlToPdf.java index 6b28dc683..2250974b7 100644 --- a/app/common/src/main/java/stirling/software/common/util/EmlToPdf.java +++ b/app/common/src/main/java/stirling/software/common/util/EmlToPdf.java @@ -4,12 +4,16 @@ import static stirling.software.common.util.AttachmentUtils.setCatalogViewerPref import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.lang.reflect.Constructor; import java.lang.reflect.Method; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Base64; import java.util.Date; @@ -19,10 +23,13 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Properties; +import java.util.Set; +import java.util.TimeZone; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary; import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode; import org.apache.pdfbox.pdmodel.PDPage; @@ -33,43 +40,40 @@ import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; +import org.apache.pdfbox.text.PDFTextStripper; +import org.apache.pdfbox.text.TextPosition; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; +import org.springframework.web.multipart.MultipartFile; import lombok.Data; import lombok.Getter; import lombok.experimental.UtilityClass; -import lombok.extern.slf4j.Slf4j; import stirling.software.common.model.api.converters.EmlToPdfRequest; import stirling.software.common.model.api.converters.HTMLToPdfRequest; import stirling.software.common.service.CustomPDFDocumentFactory; -@Slf4j @UtilityClass public class EmlToPdf { private static final class StyleConstants { - // Font and layout constants static final int DEFAULT_FONT_SIZE = 12; static final String DEFAULT_FONT_FAMILY = "Helvetica, sans-serif"; static final float DEFAULT_LINE_HEIGHT = 1.4f; static final String DEFAULT_ZOOM = "1.0"; - // Color constants - aligned with application theme static final String DEFAULT_TEXT_COLOR = "#202124"; static final String DEFAULT_BACKGROUND_COLOR = "#ffffff"; static final String DEFAULT_BORDER_COLOR = "#e8eaed"; static final String ATTACHMENT_BACKGROUND_COLOR = "#f9f9f9"; static final String ATTACHMENT_BORDER_COLOR = "#eeeeee"; - // Size constants for PDF annotations static final float ATTACHMENT_ICON_WIDTH = 12f; static final float ATTACHMENT_ICON_HEIGHT = 14f; static final float ANNOTATION_X_OFFSET = 2f; static final float ANNOTATION_Y_OFFSET = 10f; - // Content validation constants static final int EML_CHECK_LENGTH = 8192; static final int MIN_HEADER_COUNT_FOR_VALID_EML = 2; @@ -81,24 +85,67 @@ public class EmlToPdf { Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?="); static final String ATTACHMENT_MARKER = "@"; + static final String DISPOSITION_ATTACHMENT = "attachment"; + + static final String TEXT_PLAIN = "text/plain"; + static final String TEXT_HTML = "text/html"; + static final String MULTIPART_PREFIX = "multipart/"; + + static final String HEADER_CONTENT_TYPE = "content-type:"; + static final String HEADER_CONTENT_DISPOSITION = "content-disposition:"; + static final String HEADER_CONTENT_TRANSFER_ENCODING = "content-transfer-encoding:"; + static final String HEADER_CONTENT_ID = "Content-ID"; + static final String HEADER_SUBJECT = "Subject:"; + static final String HEADER_FROM = "From:"; + static final String HEADER_TO = "To:"; + static final String HEADER_CC = "Cc:"; + static final String HEADER_BCC = "Bcc:"; + static final String HEADER_DATE = "Date:"; + private MimeConstants() {} } - private static final class FileSizeConstants { - static final long BYTES_IN_KB = 1024L; - static final long BYTES_IN_MB = BYTES_IN_KB * 1024L; - static final long BYTES_IN_GB = BYTES_IN_MB * 1024L; + private static final class MimeTypeDetector { + private static final Map EXTENSION_TO_MIME_TYPE = + Map.of( + ".png", "image/png", + ".jpg", "image/jpeg", + ".jpeg", "image/jpeg", + ".gif", "image/gif", + ".bmp", "image/bmp", + ".webp", "image/webp", + ".svg", "image/svg+xml", + ".ico", "image/x-icon", + ".tiff", "image/tiff", + ".tif", "image/tiff"); - private FileSizeConstants() {} + static String detectMimeType(String filename, String existingMimeType) { + if (existingMimeType != null && !existingMimeType.isEmpty()) { + return existingMimeType; + } + + if (filename != null) { + String lowerFilename = filename.toLowerCase(); + for (Map.Entry entry : EXTENSION_TO_MIME_TYPE.entrySet()) { + if (lowerFilename.endsWith(entry.getKey())) { + return entry.getValue(); + } + } + } + + return "image/png"; + } + + private MimeTypeDetector() {} } - // Cached Jakarta Mail availability check - private static Boolean jakartaMailAvailable = null; + private static volatile Boolean jakartaMailAvailable = null; + private static volatile Method mimeUtilityDecodeTextMethod = null; + private static volatile boolean mimeUtilityChecked = false; - private static boolean isJakartaMailAvailable() { + private static synchronized boolean isJakartaMailAvailable() { if (jakartaMailAvailable == null) { try { - // Check for core Jakarta Mail classes Class.forName("jakarta.mail.internet.MimeMessage"); Class.forName("jakarta.mail.Session"); Class.forName("jakarta.mail.internet.MimeUtility"); @@ -108,10 +155,8 @@ public class EmlToPdf { Class.forName("jakarta.mail.Part"); jakartaMailAvailable = true; - log.debug("Jakarta Mail libraries are available"); } catch (ClassNotFoundException e) { jakartaMailAvailable = false; - log.debug("Jakarta Mail libraries are not available, using basic parsing"); } } return jakartaMailAvailable; @@ -124,7 +169,7 @@ public class EmlToPdf { if (isJakartaMailAvailable()) { return convertEmlToHtmlAdvanced(emlBytes, request); } else { - return convertEmlToHtmlBasic(emlBytes, request); + return convertEmlToHtmlBasic(emlBytes, request, null); } } @@ -133,7 +178,7 @@ public class EmlToPdf { EmlToPdfRequest request, byte[] emlBytes, String fileName, - stirling.software.common.service.CustomPDFDocumentFactory pdfDocumentFactory, + CustomPDFDocumentFactory pdfDocumentFactory, TempFileManager tempFileManager, CustomHtmlSanitizer customHtmlSanitizer) throws IOException, InterruptedException { @@ -141,18 +186,17 @@ public class EmlToPdf { validateEmlInput(emlBytes); try { - // Generate HTML representation - EmailContent emailContent = null; + EmailContent emailContent; String htmlContent; if (isJakartaMailAvailable()) { - emailContent = extractEmailContentAdvanced(emlBytes, request); - htmlContent = generateEnhancedEmailHtml(emailContent, request); + emailContent = extractEmailContentAdvanced(emlBytes, request, customHtmlSanitizer); + htmlContent = generateEnhancedEmailHtml(emailContent, request, customHtmlSanitizer); } else { - htmlContent = convertEmlToHtmlBasic(emlBytes, request); + emailContent = new EmailContent(); + htmlContent = convertEmlToHtmlBasic(emlBytes, request, customHtmlSanitizer); } - // Convert HTML to PDF byte[] pdfBytes = convertHtmlToPdf( weasyprintPath, @@ -161,7 +205,6 @@ public class EmlToPdf { tempFileManager, customHtmlSanitizer); - // Attach files if available and requested if (shouldAttachFiles(emailContent, request)) { pdfBytes = attachFilesToPdf( @@ -171,11 +214,9 @@ public class EmlToPdf { return pdfBytes; } catch (IOException | InterruptedException e) { - log.error("Failed to convert EML to PDF for file: {}", fileName, e); throw e; } catch (Exception e) { - log.error("Unexpected error during EML to PDF conversion for file: {}", fileName, e); - throw new IOException("Conversion failed: " + e.getMessage(), e); + throw new IOException("Error converting EML to PDF", e); } } @@ -215,7 +256,6 @@ public class EmlToPdf { tempFileManager, customHtmlSanitizer); } catch (IOException | InterruptedException e) { - log.warn("Initial HTML to PDF conversion failed, trying with simplified HTML"); String simplifiedHtml = simplifyHtmlContent(htmlContent); return FileToPdf.convertHtmlToPdf( weasyprintPath, @@ -234,105 +274,140 @@ public class EmlToPdf { } private static String generateUniqueAttachmentId(String filename) { - return "attachment_" + filename.hashCode() + "_" + System.nanoTime(); + return "attachment_" + + (filename != null ? filename.hashCode() : "unknown") + + "_" + + System.nanoTime(); } - private static String convertEmlToHtmlBasic(byte[] emlBytes, EmlToPdfRequest request) { + private static String convertEmlToHtmlBasic( + byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) { if (emlBytes == null || emlBytes.length == 0) { throw new IllegalArgumentException("EML file is empty or null"); } String emlContent = new String(emlBytes, StandardCharsets.UTF_8); - // Basic email parsing - String subject = extractBasicHeader(emlContent, "Subject:"); - String from = extractBasicHeader(emlContent, "From:"); - String to = extractBasicHeader(emlContent, "To:"); - String cc = extractBasicHeader(emlContent, "Cc:"); - String bcc = extractBasicHeader(emlContent, "Bcc:"); - String date = extractBasicHeader(emlContent, "Date:"); + String subject = extractBasicHeader(emlContent, MimeConstants.HEADER_SUBJECT); + String from = extractBasicHeader(emlContent, MimeConstants.HEADER_FROM); + String to = extractBasicHeader(emlContent, MimeConstants.HEADER_TO); + String cc = extractBasicHeader(emlContent, MimeConstants.HEADER_CC); + String bcc = extractBasicHeader(emlContent, MimeConstants.HEADER_BCC); + String date = extractBasicHeader(emlContent, MimeConstants.HEADER_DATE); - // Try to extract HTML content String htmlBody = extractHtmlBody(emlContent); if (htmlBody == null) { String textBody = extractTextBody(emlContent); htmlBody = convertTextToHtml( - textBody != null ? textBody : "Email content could not be parsed"); + textBody != null ? textBody : "Email content could not be parsed", + customHtmlSanitizer); } - // Generate HTML with custom styling based on request StringBuilder html = new StringBuilder(); - html.append("\n"); - html.append("\n"); - html.append("").append(escapeHtml(subject)).append("\n"); - html.append("\n"); - html.append("\n"); - html.append("
\n"); - html.append("
\n"); - html.append("

").append(escapeHtml(subject)).append("

\n"); - html.append("
\n"); - html.append("
From: ").append(escapeHtml(from)).append("
\n"); - html.append("
To: ").append(escapeHtml(to)).append("
\n"); + html.append( + """ + + + + \n"); - // Add attachment information - always check for and display attachments String attachmentInfo = extractAttachmentInfo(emlContent); if (!attachmentInfo.isEmpty()) { - html.append("
\n"); - html.append("

Attachments

\n"); + html.append( + """ +
+

Attachments

+ """); html.append(attachmentInfo); - // Add a status message about attachment inclusion if (request != null && request.isIncludeAttachments()) { - html.append("
\n"); html.append( - "

Note: Attachments are saved as external files and linked in this PDF. Click the links to open files externally.

\n"); - html.append("
\n"); + """ +
+

Note: Attachments are saved as external files and linked in this PDF. Click the links to open files externally.

+
+ """); } else { - html.append("
\n"); html.append( - "

Attachment information displayed - files not included in PDF. Enable 'Include attachments' to embed files.

\n"); - html.append("
\n"); + """ +
+

Attachment information displayed - files not included in PDF. Enable 'Include attachments' to embed files.

+
+ """); } html.append("
\n"); } - // Show advanced features status if requested - assert request != null; - if (request.getFileInput().isEmpty()) { - html.append("
\n"); + if (request != null && request.getFileInput().isEmpty()) { html.append( - "

Note: Some advanced features require Jakarta Mail dependencies.

\n"); - html.append("
\n"); + """ +
+

Note: Some advanced features require Jakarta Mail dependencies.

+
+ """); } - html.append("
\n"); - html.append(""); + html.append( + """ +
+ + """); return html.toString(); } @@ -340,7 +415,6 @@ public class EmlToPdf { private static EmailContent extractEmailContentAdvanced( byte[] emlBytes, EmlToPdfRequest request) { try { - // Use Jakarta Mail for processing Class sessionClass = Class.forName("jakarta.mail.Session"); Class mimeMessageClass = Class.forName("jakarta.mail.internet.MimeMessage"); @@ -348,26 +422,49 @@ public class EmlToPdf { sessionClass.getMethod("getDefaultInstance", Properties.class); Object session = getDefaultInstance.invoke(null, new Properties()); - // Cast the session object to the proper type for the constructor Class[] constructorArgs = new Class[] {sessionClass, InputStream.class}; Constructor mimeMessageConstructor = mimeMessageClass.getConstructor(constructorArgs); Object message = mimeMessageConstructor.newInstance(session, new ByteArrayInputStream(emlBytes)); - return extractEmailContentAdvanced(message, request); + return extractEmailContentAdvanced(message, request, null); } catch (ReflectiveOperationException e) { - // Create basic EmailContent from basic processing EmailContent content = new EmailContent(); - content.setHtmlBody(convertEmlToHtmlBasic(emlBytes, request)); + content.setHtmlBody(convertEmlToHtmlBasic(emlBytes, request, null)); + return content; + } + } + + private static EmailContent extractEmailContentAdvanced( + byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) { + try { + Class sessionClass = Class.forName("jakarta.mail.Session"); + Class mimeMessageClass = Class.forName("jakarta.mail.internet.MimeMessage"); + + Method getDefaultInstance = + sessionClass.getMethod("getDefaultInstance", Properties.class); + Object session = getDefaultInstance.invoke(null, new Properties()); + + Class[] constructorArgs = new Class[] {sessionClass, InputStream.class}; + Constructor mimeMessageConstructor = + mimeMessageClass.getConstructor(constructorArgs); + Object message = + mimeMessageConstructor.newInstance(session, new ByteArrayInputStream(emlBytes)); + + return extractEmailContentAdvanced(message, request, customHtmlSanitizer); + + } catch (ReflectiveOperationException e) { + EmailContent content = new EmailContent(); + content.setHtmlBody(convertEmlToHtmlBasic(emlBytes, request, customHtmlSanitizer)); return content; } } private static String convertEmlToHtmlAdvanced(byte[] emlBytes, EmlToPdfRequest request) { EmailContent content = extractEmailContentAdvanced(emlBytes, request); - return generateEnhancedEmailHtml(content, request); + return generateEnhancedEmailHtml(content, request, null); } private static String extractAttachmentInfo(String emlContent) { @@ -382,22 +479,12 @@ public class EmlToPdf { boolean inMultipart = false; String boundary = ""; - // First pass: find boundary for multipart messages for (String line : lines) { String lowerLine = line.toLowerCase().trim(); - if (lowerLine.startsWith("content-type:") && lowerLine.contains("multipart")) { - if (lowerLine.contains("boundary=")) { - int boundaryStart = lowerLine.indexOf("boundary=") + 9; - String boundaryPart = line.substring(boundaryStart).trim(); - if (boundaryPart.startsWith("\"")) { - boundary = boundaryPart.substring(1, boundaryPart.indexOf("\"", 1)); - } else { - int spaceIndex = boundaryPart.indexOf(" "); - boundary = - spaceIndex > 0 - ? boundaryPart.substring(0, spaceIndex) - : boundaryPart; - } + if (lowerLine.startsWith(MimeConstants.HEADER_CONTENT_TYPE) + && lowerLine.contains(MimeConstants.MULTIPART_PREFIX)) { + boundary = extractBoundary(line); + if (boundary != null) { inMultipart = true; break; } @@ -405,13 +492,10 @@ public class EmlToPdf { if (line.trim().isEmpty()) break; } - // Second pass: extract attachment information for (String line : lines) { String lowerLine = line.toLowerCase().trim(); - // Check for boundary markers in multipart messages if (inMultipart && line.trim().startsWith("--" + boundary)) { - // Reset for new part currentContentType = ""; currentDisposition = ""; currentFilename = ""; @@ -423,7 +507,6 @@ public class EmlToPdf { if (inHeaders && line.trim().isEmpty()) { inHeaders = false; - // Process accumulated attachment info if (isAttachment(currentDisposition, currentFilename, currentContentType)) { addAttachmentToInfo( attachmentInfo, @@ -431,7 +514,6 @@ public class EmlToPdf { currentContentType, currentEncoding); - // Reset for next attachment currentContentType = ""; currentDisposition = ""; currentFilename = ""; @@ -440,19 +522,21 @@ public class EmlToPdf { continue; } - if (!inHeaders) continue; // Skip body content + if (!inHeaders) continue; - // Parse headers - if (lowerLine.startsWith("content-type:")) { - currentContentType = line.substring(13).trim(); - } else if (lowerLine.startsWith("content-disposition:")) { - currentDisposition = line.substring(20).trim(); - // Extract filename if present + if (lowerLine.startsWith(MimeConstants.HEADER_CONTENT_TYPE)) { + currentContentType = + line.substring(MimeConstants.HEADER_CONTENT_TYPE.length()).trim(); + } else if (lowerLine.startsWith(MimeConstants.HEADER_CONTENT_DISPOSITION)) { + currentDisposition = + line.substring(MimeConstants.HEADER_CONTENT_DISPOSITION.length()) + .trim(); currentFilename = extractFilenameFromDisposition(currentDisposition); - } else if (lowerLine.startsWith("content-transfer-encoding:")) { - currentEncoding = line.substring(26).trim(); + } else if (lowerLine.startsWith(MimeConstants.HEADER_CONTENT_TRANSFER_ENCODING)) { + currentEncoding = + line.substring(MimeConstants.HEADER_CONTENT_TRANSFER_ENCODING.length()) + .trim(); } else if (line.startsWith(" ") || line.startsWith("\t")) { - // Continuation of previous header if (currentDisposition.contains("filename=")) { currentDisposition += " " + line.trim(); currentFilename = extractFilenameFromDisposition(currentDisposition); @@ -468,33 +552,78 @@ public class EmlToPdf { } } catch (RuntimeException e) { - log.warn("Error extracting attachment info: {}", e.getMessage()); + attachmentInfo + .append("
") + .append("Error parsing attachments: ") + .append(escapeHtml(e.getMessage())) + .append("
\n"); } return attachmentInfo.toString(); } private static boolean isAttachment(String disposition, String filename, String contentType) { - return (disposition.toLowerCase().contains("attachment") && !filename.isEmpty()) + return (disposition.toLowerCase().contains(MimeConstants.DISPOSITION_ATTACHMENT) + && !filename.isEmpty()) || (!filename.isEmpty() && !contentType.toLowerCase().startsWith("text/")) || (contentType.toLowerCase().contains("application/") && !filename.isEmpty()); } - private static String extractFilenameFromDisposition(String disposition) { - if (disposition.contains("filename=")) { - int filenameStart = disposition.toLowerCase().indexOf("filename=") + 9; - int filenameEnd = disposition.indexOf(";", filenameStart); - if (filenameEnd == -1) filenameEnd = disposition.length(); - String filename = disposition.substring(filenameStart, filenameEnd).trim(); - filename = filename.replaceAll("^\"|\"$", ""); - // Apply MIME decoding to handle encoded filenames - return safeMimeDecode(filename); + private static String extractBoundary(String contentType) { + if (contentType == null) return null; + + Pattern boundaryPattern = + Pattern.compile("boundary\\s*=\\s*([^;\\s]+|\"[^\"]*\")", Pattern.CASE_INSENSITIVE); + Matcher matcher = boundaryPattern.matcher(contentType); + + if (matcher.find()) { + String boundary = matcher.group(1); + if (boundary.startsWith("\"") && boundary.endsWith("\"") && boundary.length() > 1) { + boundary = boundary.substring(1, boundary.length() - 1); + } + return boundary; + } + return null; + } + + private static String extractFilenameFromDisposition(String disposition) { + if (disposition == null || !disposition.contains("filename=")) { + return ""; + } + + if (disposition.toLowerCase().contains("filename*=")) { + int filenameStarStart = disposition.toLowerCase().indexOf("filename*=") + 10; + int filenameStarEnd = disposition.indexOf(";", filenameStarStart); + if (filenameStarEnd == -1) filenameStarEnd = disposition.length(); + String extendedFilename = + disposition.substring(filenameStarStart, filenameStarEnd).trim(); + extendedFilename = extendedFilename.replaceAll("^\"|\"$", ""); + + if (extendedFilename.contains("'")) { + String[] parts = extendedFilename.split("'", 3); + if (parts.length == 3) { + return decodeUrlEncoded(parts[2]); + } + } + } + + int filenameStart = disposition.toLowerCase().indexOf("filename=") + 9; + int filenameEnd = disposition.indexOf(";", filenameStart); + if (filenameEnd == -1) filenameEnd = disposition.length(); + String filename = disposition.substring(filenameStart, filenameEnd).trim(); + filename = filename.replaceAll("^\"|\"$", ""); + return safeMimeDecode(filename); + } + + private static String decodeUrlEncoded(String encoded) { + try { + return java.net.URLDecoder.decode(encoded, StandardCharsets.UTF_8); + } catch (Exception e) { + return encoded; // Return original if decoding fails } - return ""; } private static void addAttachmentToInfo( StringBuilder attachmentInfo, String filename, String contentType, String encoding) { - // Create attachment info with paperclip emoji before filename attachmentInfo .append("
") .append("") @@ -504,7 +633,6 @@ public class EmlToPdf { .append(escapeHtml(filename)) .append(""); - // Add content type and encoding info if (!contentType.isEmpty() || !encoding.isEmpty()) { attachmentInfo.append(" ("); if (!contentType.isEmpty()) { @@ -522,8 +650,17 @@ public class EmlToPdf { private static boolean isInvalidEmlFormat(byte[] emlBytes) { try { int checkLength = Math.min(emlBytes.length, StyleConstants.EML_CHECK_LENGTH); - String content = new String(emlBytes, 0, checkLength, StandardCharsets.UTF_8); - String lowerContent = content.toLowerCase(); + String content; + try { + content = new String(emlBytes, 0, checkLength, StandardCharsets.UTF_8); + if (content.contains("\uFFFD")) { + content = new String(emlBytes, 0, checkLength, StandardCharsets.ISO_8859_1); + } + } catch (Exception e) { + content = new String(emlBytes, 0, checkLength, StandardCharsets.ISO_8859_1); + } + + String lowerContent = content.toLowerCase(Locale.ROOT); boolean hasFrom = lowerContent.contains("from:") || lowerContent.contains("return-path:"); @@ -562,7 +699,6 @@ public class EmlToPdf { if (line.toLowerCase().startsWith(headerName.toLowerCase())) { StringBuilder value = new StringBuilder(line.substring(headerName.length()).trim()); - // Handle multi-line headers for (int j = i + 1; j < lines.length; j++) { if (lines[j].startsWith(" ") || lines[j].startsWith("\t")) { value.append(" ").append(lines[j].trim()); @@ -570,13 +706,12 @@ public class EmlToPdf { break; } } - // Apply MIME header decoding return safeMimeDecode(value.toString()); } if (line.trim().isEmpty()) break; } } catch (RuntimeException e) { - log.warn("Error extracting header '{}': {}", headerName, e.getMessage()); + // Ignore errors in header extraction } return ""; } @@ -584,7 +719,9 @@ public class EmlToPdf { private static String extractHtmlBody(String emlContent) { try { String lowerContent = emlContent.toLowerCase(); - int htmlStart = lowerContent.indexOf("content-type: text/html"); + int htmlStart = + lowerContent.indexOf( + MimeConstants.HEADER_CONTENT_TYPE + " " + MimeConstants.TEXT_HTML); if (htmlStart == -1) return null; return getString(emlContent, htmlStart); @@ -609,7 +746,9 @@ public class EmlToPdf { private static String extractTextBody(String emlContent) { try { String lowerContent = emlContent.toLowerCase(); - int textStart = lowerContent.indexOf("content-type: text/plain"); + int textStart = + lowerContent.indexOf( + MimeConstants.HEADER_CONTENT_TYPE + " " + MimeConstants.TEXT_PLAIN); if (textStart == -1) { int bodyStart = emlContent.indexOf("\r\n\r\n"); if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n"); @@ -640,10 +779,17 @@ public class EmlToPdf { return start + result.length(); } - private static String convertTextToHtml(String textBody) { + private static String convertTextToHtml( + String textBody, CustomHtmlSanitizer customHtmlSanitizer) { if (textBody == null) return ""; - String html = escapeHtml(textBody); + String html; + if (customHtmlSanitizer != null) { + html = customHtmlSanitizer.sanitize(textBody); + } else { + html = escapeHtml(textBody); + } + html = html.replace("\r\n", "\n").replace("\r", "\n"); html = html.replace("\n", "
\n"); @@ -660,20 +806,25 @@ public class EmlToPdf { return html; } - private static String processEmailHtmlBody(String htmlBody) { - return processEmailHtmlBody(htmlBody, null); + private static String processEmailHtmlBody( + String htmlBody, CustomHtmlSanitizer customHtmlSanitizer) { + return processEmailHtmlBody(htmlBody, null, customHtmlSanitizer); } - private static String processEmailHtmlBody(String htmlBody, EmailContent emailContent) { + private static String processEmailHtmlBody( + String htmlBody, EmailContent emailContent, CustomHtmlSanitizer customHtmlSanitizer) { if (htmlBody == null) return ""; - String processed = htmlBody; + String processed; + if (customHtmlSanitizer != null) { + processed = customHtmlSanitizer.sanitize(htmlBody); + } else { + processed = htmlBody; + } - // Remove problematic CSS processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*fixed[^;]*;?", ""); processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*absolute[^;]*;?", ""); - // Process inline images (cid: references) if we have email content with attachments if (emailContent != null && !emailContent.getAttachments().isEmpty()) { processed = processInlineImages(processed, emailContent); } @@ -684,7 +835,6 @@ public class EmlToPdf { private static String processInlineImages(String htmlContent, EmailContent emailContent) { if (htmlContent == null || emailContent == null) return htmlContent; - // Create a map of Content-ID to attachment data Map contentIdMap = new HashMap<>(); for (EmailAttachment attachment : emailContent.getAttachments()) { if (attachment.isEmbedded() @@ -696,51 +846,29 @@ public class EmlToPdf { if (contentIdMap.isEmpty()) return htmlContent; - // Pattern to match cid: references in img src attributes Pattern cidPattern = Pattern.compile( "(?i)]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>", Pattern.CASE_INSENSITIVE); Matcher matcher = cidPattern.matcher(htmlContent); - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); while (matcher.find()) { String contentId = matcher.group(1); EmailAttachment attachment = contentIdMap.get(contentId); if (attachment != null && attachment.getData() != null) { - // Convert to data URI - String mimeType = attachment.getContentType(); - if (mimeType == null || mimeType.isEmpty()) { - // Try to determine MIME type from filename - String filename = attachment.getFilename(); - if (filename != null) { - if (filename.toLowerCase().endsWith(".png")) { - mimeType = "image/png"; - } else if (filename.toLowerCase().endsWith(".jpg") - || filename.toLowerCase().endsWith(".jpeg")) { - mimeType = "image/jpeg"; - } else if (filename.toLowerCase().endsWith(".gif")) { - mimeType = "image/gif"; - } else if (filename.toLowerCase().endsWith(".bmp")) { - mimeType = "image/bmp"; - } else { - mimeType = "image/png"; // fallback - } - } else { - mimeType = "image/png"; // fallback - } - } + String mimeType = + MimeTypeDetector.detectMimeType( + attachment.getFilename(), attachment.getContentType()); String base64Data = Base64.getEncoder().encodeToString(attachment.getData()); String dataUri = "data:" + mimeType + ";base64," + base64Data; - // Replace the cid: reference with the data URI String replacement = matcher.group(0).replaceFirst("cid:" + Pattern.quote(contentId), dataUri); matcher.appendReplacement(result, Matcher.quoteReplacement(replacement)); } else { - // Keep original if attachment not found matcher.appendReplacement(result, Matcher.quoteReplacement(matcher.group(0))); } } @@ -755,107 +883,134 @@ public class EmlToPdf { String backgroundColor = StyleConstants.DEFAULT_BACKGROUND_COLOR; String borderColor = StyleConstants.DEFAULT_BORDER_COLOR; - html.append("body {\n"); - html.append(" font-family: ").append(StyleConstants.DEFAULT_FONT_FAMILY).append(";\n"); - html.append(" font-size: ").append(fontSize).append("px;\n"); - html.append(" line-height: ").append(StyleConstants.DEFAULT_LINE_HEIGHT).append(";\n"); - html.append(" color: ").append(textColor).append(";\n"); - html.append(" margin: 0;\n"); - html.append(" padding: 16px;\n"); - html.append(" background-color: ").append(backgroundColor).append(";\n"); - html.append("}\n\n"); + String css = + String.format( + """ + body { + font-family: %s; + font-size: %dpx; + line-height: %s; + color: %s; + margin: 0; + padding: 16px; + background-color: %s; + } - html.append(".email-container {\n"); - html.append(" width: 100%;\n"); - html.append(" max-width: 100%;\n"); - html.append(" margin: 0 auto;\n"); - html.append("}\n\n"); + .email-container { + width: 100%%; + max-width: 100%%; + margin: 0 auto; + } - html.append(".email-header {\n"); - html.append(" padding-bottom: 10px;\n"); - html.append(" border-bottom: 1px solid ").append(borderColor).append(";\n"); - html.append(" margin-bottom: 10px;\n"); - html.append("}\n\n"); - html.append(".email-header h1 {\n"); - html.append(" margin: 0 0 10px 0;\n"); - html.append(" font-size: ").append(fontSize + 4).append("px;\n"); - html.append(" font-weight: bold;\n"); - html.append("}\n\n"); - html.append(".email-meta div {\n"); - html.append(" margin-bottom: 2px;\n"); - html.append(" font-size: ").append(fontSize - 1).append("px;\n"); - html.append("}\n\n"); + .email-header { + padding-bottom: 10px; + border-bottom: 1px solid %s; + margin-bottom: 10px; + } - html.append(".email-body {\n"); - html.append(" word-wrap: break-word;\n"); - html.append("}\n\n"); + .email-header h1 { + margin: 0 0 10px 0; + font-size: %dpx; + font-weight: bold; + } - html.append(".attachment-section {\n"); - html.append(" margin-top: 15px;\n"); - html.append(" padding: 10px;\n"); - html.append(" background-color: ") - .append(StyleConstants.ATTACHMENT_BACKGROUND_COLOR) - .append(";\n"); - html.append(" border: 1px solid ") - .append(StyleConstants.ATTACHMENT_BORDER_COLOR) - .append(";\n"); - html.append(" border-radius: 3px;\n"); - html.append("}\n\n"); - html.append(".attachment-section h3 {\n"); - html.append(" margin: 0 0 8px 0;\n"); - html.append(" font-size: ").append(fontSize + 1).append("px;\n"); - html.append("}\n\n"); - html.append(".attachment-item {\n"); - html.append(" padding: 5px 0;\n"); - html.append("}\n\n"); - html.append(".attachment-icon {\n"); - html.append(" margin-right: 5px;\n"); - html.append("}\n\n"); - html.append(".attachment-details, .attachment-type {\n"); - html.append(" font-size: ").append(fontSize - 2).append("px;\n"); - html.append(" color: #555555;\n"); - html.append("}\n\n"); - html.append(".attachment-inclusion-note, .attachment-info-note {\n"); - html.append(" margin-top: 8px;\n"); - html.append(" padding: 6px;\n"); - html.append(" font-size: ").append(fontSize - 2).append("px;\n"); - html.append(" border-radius: 3px;\n"); - html.append("}\n\n"); - html.append(".attachment-inclusion-note {\n"); - html.append(" background-color: #e6ffed;\n"); - html.append(" border: 1px solid #d4f7dc;\n"); - html.append(" color: #006420;\n"); - html.append("}\n\n"); - html.append(".attachment-info-note {\n"); - html.append(" background-color: #fff9e6;\n"); - html.append(" border: 1px solid #fff0c2;\n"); - html.append(" color: #664d00;\n"); - html.append("}\n\n"); - html.append(".attachment-link-container {\n"); - html.append(" display: flex;\n"); - html.append(" align-items: center;\n"); - html.append(" padding: 8px;\n"); - html.append(" background-color: #f8f9fa;\n"); - html.append(" border: 1px solid #dee2e6;\n"); - html.append(" border-radius: 4px;\n"); - html.append(" margin: 4px 0;\n"); - html.append("}\n\n"); - html.append(".attachment-link-container:hover {\n"); - html.append(" background-color: #e9ecef;\n"); - html.append("}\n\n"); - html.append(".attachment-note {\n"); - html.append(" font-size: ").append(fontSize - 3).append("px;\n"); - html.append(" color: #6c757d;\n"); - html.append(" font-style: italic;\n"); - html.append(" margin-left: 8px;\n"); - html.append("}\n\n"); + .email-meta div { + margin-bottom: 2px; + font-size: %dpx; + } - // Basic image styling: ensure images are responsive but not overly constrained. - html.append("img {\n"); - html.append(" max-width: 100%;\n"); // Make images responsive to container width - html.append(" height: auto;\n"); // Maintain aspect ratio - html.append(" display: block;\n"); // Avoid extra space below images - html.append("}\n\n"); + .email-body { + word-wrap: break-word; + } + + .attachment-section { + margin-top: 15px; + padding: 10px; + background-color: %s; + border: 1px solid %s; + border-radius: 3px; + } + + .attachment-section h3 { + margin: 0 0 8px 0; + font-size: %dpx; + } + + .attachment-item { + padding: 5px 0; + } + + .attachment-icon { + margin-right: 5px; + } + + .attachment-details, .attachment-type { + font-size: %dpx; + color: #555555; + } + + .attachment-inclusion-note, .attachment-info-note { + margin-top: 8px; + padding: 6px; + font-size: %dpx; + border-radius: 3px; + } + + .attachment-inclusion-note { + background-color: #e6ffed; + border: 1px solid #d4f7dc; + color: #006420; + } + + .attachment-info-note { + background-color: #fff9e6; + border: 1px solid #fff0c2; + color: #664d00; + } + + .attachment-link-container { + display: flex; + align-items: center; + padding: 8px; + background-color: #f8f9fa; + border: 1px solid #dee2e6; + border-radius: 4px; + margin: 4px 0; + } + + .attachment-link-container:hover { + background-color: #e9ecef; + } + + .attachment-note { + font-size: %dpx; + color: #6c757d; + font-style: italic; + margin-left: 8px; + } + + img { + max-width: 100%%; + height: auto; + display: block; + } + """, + StyleConstants.DEFAULT_FONT_FAMILY, + fontSize, + StyleConstants.DEFAULT_LINE_HEIGHT, + textColor, + backgroundColor, + borderColor, + fontSize + 4, + fontSize - 1, + StyleConstants.ATTACHMENT_BACKGROUND_COLOR, + StyleConstants.ATTACHMENT_BORDER_COLOR, + fontSize + 1, + fontSize - 2, + fontSize - 2, + fontSize - 3); + + html.append(css); } private static String escapeHtml(String text) { @@ -867,93 +1022,165 @@ public class EmlToPdf { .replace("'", "'"); } - private static stirling.software.common.model.api.converters.HTMLToPdfRequest createHtmlRequest( - EmlToPdfRequest request) { - stirling.software.common.model.api.converters.HTMLToPdfRequest htmlRequest = - new stirling.software.common.model.api.converters.HTMLToPdfRequest(); + private static String sanitizeText(String text, CustomHtmlSanitizer customHtmlSanitizer) { + if (customHtmlSanitizer != null) { + return customHtmlSanitizer.sanitize(text); + } else { + return escapeHtml(text); + } + } + + private static HTMLToPdfRequest createHtmlRequest(EmlToPdfRequest request) { + HTMLToPdfRequest htmlRequest = new HTMLToPdfRequest(); if (request != null) { htmlRequest.setFileInput(request.getFileInput()); } - // Set default zoom level htmlRequest.setZoom(Float.parseFloat(StyleConstants.DEFAULT_ZOOM)); return htmlRequest; } private static EmailContent extractEmailContentAdvanced( - Object message, EmlToPdfRequest request) { + Object message, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) { EmailContent content = new EmailContent(); try { Class messageClass = message.getClass(); - // Extract headers via reflection Method getSubject = messageClass.getMethod("getSubject"); String subject = (String) getSubject.invoke(message); content.setSubject(subject != null ? safeMimeDecode(subject) : "No Subject"); Method getFrom = messageClass.getMethod("getFrom"); Object[] fromAddresses = (Object[]) getFrom.invoke(message); - content.setFrom( - fromAddresses != null && fromAddresses.length > 0 - ? safeMimeDecode(fromAddresses[0].toString()) - : ""); + if (fromAddresses != null && fromAddresses.length > 0) { + StringBuilder fromBuilder = new StringBuilder(); + for (int i = 0; i < fromAddresses.length; i++) { + if (i > 0) fromBuilder.append(", "); + fromBuilder.append(safeMimeDecode(fromAddresses[i].toString())); + } + content.setFrom(fromBuilder.toString()); + } else { + content.setFrom(""); + } - Method getAllRecipients = messageClass.getMethod("getAllRecipients"); - Object[] recipients = (Object[]) getAllRecipients.invoke(message); - content.setTo( - recipients != null && recipients.length > 0 - ? safeMimeDecode(recipients[0].toString()) - : ""); + try { + Method getRecipients = + messageClass.getMethod( + "getRecipients", + Class.forName("jakarta.mail.Message$RecipientType")); + Class recipientTypeClass = Class.forName("jakarta.mail.Message$RecipientType"); + + Object toType = recipientTypeClass.getField("TO").get(null); + Object[] toRecipients = (Object[]) getRecipients.invoke(message, toType); + recipientBuilder(content, toRecipients); + + Object ccType = recipientTypeClass.getField("CC").get(null); + Object[] ccRecipients = (Object[]) getRecipients.invoke(message, ccType); + if (ccRecipients != null && ccRecipients.length > 0) { + StringBuilder ccBuilder = new StringBuilder(); + for (int i = 0; i < ccRecipients.length; i++) { + if (i > 0) ccBuilder.append(", "); + ccBuilder.append(safeMimeDecode(ccRecipients[i].toString())); + } + content.setCc(ccBuilder.toString()); + } else { + content.setCc(""); + } + + Object bccType = recipientTypeClass.getField("BCC").get(null); + Object[] bccRecipients = (Object[]) getRecipients.invoke(message, bccType); + if (bccRecipients != null && bccRecipients.length > 0) { + StringBuilder bccBuilder = new StringBuilder(); + for (int i = 0; i < bccRecipients.length; i++) { + if (i > 0) bccBuilder.append(", "); + bccBuilder.append(safeMimeDecode(bccRecipients[i].toString())); + } + content.setBcc(bccBuilder.toString()); + } else { + content.setBcc(""); + } + + } catch (ReflectiveOperationException e) { + Method getAllRecipients = messageClass.getMethod("getAllRecipients"); + Object[] recipients = (Object[]) getAllRecipients.invoke(message); + recipientBuilder(content, recipients); + content.setCc(""); + content.setBcc(""); + } Method getSentDate = messageClass.getMethod("getSentDate"); content.setDate((Date) getSentDate.invoke(message)); - // Extract content Method getContent = messageClass.getMethod("getContent"); Object messageContent = getContent.invoke(message); if (messageContent instanceof String stringContent) { Method getContentType = messageClass.getMethod("getContentType"); String contentType = (String) getContentType.invoke(message); - if (contentType != null && contentType.toLowerCase().contains("text/html")) { + if (contentType != null + && contentType.toLowerCase().contains(MimeConstants.TEXT_HTML)) { content.setHtmlBody(stringContent); } else { content.setTextBody(stringContent); } } else { - // Handle multipart content try { Class multipartClass = Class.forName("jakarta.mail.Multipart"); if (multipartClass.isInstance(messageContent)) { - processMultipartAdvanced(messageContent, content, request); + processMultipartAdvanced( + messageContent, content, request, customHtmlSanitizer, 0); } - } catch (Exception e) { - log.warn("Error processing content: {}", e.getMessage()); + } catch (ReflectiveOperationException | ClassCastException e) { + content.setTextBody( + "Email content could not be parsed with advanced processing"); + content.setHtmlBody( + "Email content could not be parsed with advanced processing"); } } - } catch (Exception e) { + } catch (ReflectiveOperationException | RuntimeException e) { content.setSubject("Email Conversion"); content.setFrom("Unknown"); content.setTo("Unknown"); + content.setCc(""); + content.setBcc(""); content.setTextBody("Email content could not be parsed with advanced processing"); } return content; } - private static void processMultipartAdvanced( - Object multipart, EmailContent content, EmlToPdfRequest request) { - try { - // Enhanced multipart type checking - if (!isValidJakartaMailMultipart(multipart)) { - log.warn("Invalid Jakarta Mail multipart type: {}", multipart.getClass().getName()); - return; + private static void recipientBuilder(EmailContent content, Object[] toRecipients) { + if (toRecipients != null && toRecipients.length > 0) { + StringBuilder toBuilder = new StringBuilder(); + for (int i = 0; i < toRecipients.length; i++) { + if (i > 0) toBuilder.append(", "); + toBuilder.append(safeMimeDecode(toRecipients[i].toString())); } + content.setTo(toBuilder.toString()); + } else { + content.setTo(""); + } + } + private static void processMultipartAdvanced( + Object multipart, + EmailContent content, + EmlToPdfRequest request, + CustomHtmlSanitizer customHtmlSanitizer, + int depth) { + + // RFC 2046: Prevent excessive recursion depth for malformed multipart structures + final int MAX_MULTIPART_DEPTH = 10; + if (depth > MAX_MULTIPART_DEPTH) { + content.setHtmlBody("
Maximum multipart depth exceeded
"); + return; + } + + try { Class multipartClass = multipart.getClass(); Method getCount = multipartClass.getMethod("getCount"); int count = (Integer) getCount.invoke(multipart); @@ -962,87 +1189,164 @@ public class EmlToPdf { for (int i = 0; i < count; i++) { Object part = getBodyPart.invoke(multipart, i); - processPartAdvanced(part, content, request); + processPartAdvanced(part, content, request, customHtmlSanitizer, depth + 1); } - } catch (Exception e) { - content.setTextBody("Email content could not be parsed with advanced processing"); + } catch (ReflectiveOperationException | ClassCastException e) { + if (request != null && request.isIncludeAttachments()) { + content.setHtmlBody( + "
Error processing multipart content: " + + escapeHtml(e.getMessage()) + + "
"); + } else { + content.setHtmlBody( + "
Error processing multipart content
"); + } } } private static void processPartAdvanced( - Object part, EmailContent content, EmlToPdfRequest request) { + Object part, + EmailContent content, + EmlToPdfRequest request, + CustomHtmlSanitizer customHtmlSanitizer, + int depth) { try { - if (!isValidJakartaMailPart(part)) { - log.warn("Invalid Jakarta Mail part type: {}", part.getClass().getName()); - return; - } Class partClass = part.getClass(); - Method isMimeType = partClass.getMethod("isMimeType", String.class); - Method getContent = partClass.getMethod("getContent"); - Method getDisposition = partClass.getMethod("getDisposition"); - Method getFileName = partClass.getMethod("getFileName"); - Method getContentType = partClass.getMethod("getContentType"); - Method getHeader = partClass.getMethod("getHeader", String.class); + + Method isMimeType; + Method getContent; + Method getDisposition; + Method getFileName; + Method getContentType; + Method getHeader; + + try { + isMimeType = partClass.getMethod("isMimeType", String.class); + getContent = partClass.getMethod("getContent"); + getDisposition = partClass.getMethod("getDisposition"); + getFileName = partClass.getMethod("getFileName"); + getContentType = partClass.getMethod("getContentType"); + getHeader = partClass.getMethod("getHeader", String.class); + } catch (NoSuchMethodException e) { + return; + } Object disposition = getDisposition.invoke(part); String filename = (String) getFileName.invoke(part); String contentType = (String) getContentType.invoke(part); - if ((Boolean) isMimeType.invoke(part, "text/plain") && disposition == null) { - content.setTextBody((String) getContent.invoke(part)); - } else if ((Boolean) isMimeType.invoke(part, "text/html") && disposition == null) { - content.setHtmlBody((String) getContent.invoke(part)); - } else if ("attachment".equalsIgnoreCase((String) disposition) + // RFC 2045: MIME type checks should be case-insensitive + String normalizedContentType = + contentType != null ? contentType.toLowerCase(Locale.ROOT) : ""; + String normalizedDisposition = + disposition != null ? ((String) disposition).toLowerCase(Locale.ROOT) : null; + + // RFC 2046: Check for text/plain with case-insensitive comparison + if ((Boolean) isMimeType.invoke(part, MimeConstants.TEXT_PLAIN) + && normalizedDisposition == null) { + Object partContent = getContent.invoke(part); + if (partContent instanceof String stringContent) { + content.setTextBody(stringContent); + } + // RFC 2046: Check for text/html with case-insensitive comparison + } else if ((Boolean) isMimeType.invoke(part, MimeConstants.TEXT_HTML) + && normalizedDisposition == null) { + Object partContent = getContent.invoke(part); + if (partContent instanceof String stringContent) { + String htmlBody = stringContent; + if (customHtmlSanitizer != null) { + htmlBody = customHtmlSanitizer.sanitize(htmlBody); + } + content.setHtmlBody(htmlBody); + } + // RFC 2183: Content-Disposition header handling + } else if ((normalizedDisposition != null + && normalizedDisposition.contains(MimeConstants.DISPOSITION_ATTACHMENT)) || (filename != null && !filename.trim().isEmpty())) { content.setAttachmentCount(content.getAttachmentCount() + 1); - // Always extract basic attachment metadata for display if (filename != null && !filename.trim().isEmpty()) { - // Create attachment with metadata only EmailAttachment attachment = new EmailAttachment(); - // Apply MIME decoding to filename to handle encoded attachment names attachment.setFilename(safeMimeDecode(filename)); attachment.setContentType(contentType); - // Check if it's an embedded image - String[] contentIdHeaders = (String[]) getHeader.invoke(part, "Content-ID"); - if (contentIdHeaders != null && contentIdHeaders.length > 0) { - attachment.setEmbedded(true); - // Store the Content-ID, removing angle brackets if present - String contentId = contentIdHeaders[0]; - if (contentId.startsWith("<") && contentId.endsWith(">")) { - contentId = contentId.substring(1, contentId.length() - 1); + try { + String[] contentIdHeaders = + (String[]) getHeader.invoke(part, MimeConstants.HEADER_CONTENT_ID); + if (contentIdHeaders != null) { + for (String contentIdHeader : contentIdHeaders) { + if (contentIdHeader != null && !contentIdHeader.trim().isEmpty()) { + attachment.setEmbedded(true); + String contentId = contentIdHeader.trim(); + + // RFC 2392: Content-ID should be fully stripped of angle + // brackets + contentId = contentId.replaceAll("[<>]", ""); + + attachment.setContentId(contentId); + break; + } + } } - attachment.setContentId(contentId); + } catch (ReflectiveOperationException e) { + // Silently continue if Content-ID header cannot be accessed } - // Extract attachment data if attachments should be included OR if it's an - // embedded image (needed for inline display) if ((request != null && request.isIncludeAttachments()) || attachment.isEmbedded()) { try { Object attachmentContent = getContent.invoke(part); byte[] attachmentData = null; - if (attachmentContent instanceof java.io.InputStream inputStream) { - try { - attachmentData = inputStream.readAllBytes(); - } catch (IOException e) { - log.warn( - "Failed to read InputStream attachment: {}", - e.getMessage()); + if (attachmentContent instanceof InputStream) { + InputStream inputStream = (InputStream) attachmentContent; + // Enhanced stream handling with EOF protection + try (InputStream stream = inputStream) { + attachmentData = stream.readAllBytes(); + } catch (EOFException e) { + // RFC-compliant error handling: unexpected end of stream + throw new IOException("Unexpected end of attachment stream", e); + } catch (IOException | OutOfMemoryError e) { + if (attachment.isEmbedded()) { + attachmentData = new byte[0]; + } else { + throw e; + } } } else if (attachmentContent instanceof byte[] byteArray) { attachmentData = byteArray; } else if (attachmentContent instanceof String stringContent) { - attachmentData = stringContent.getBytes(StandardCharsets.UTF_8); + // Enhanced charset handling with fallbacks per RFC 2047 + Charset charset = StandardCharsets.UTF_8; + if (normalizedContentType.contains("charset=")) { + try { + String charsetName = + normalizedContentType + .substring( + normalizedContentType.indexOf( + "charset=") + + 8) + .split("[;\\s]")[0] + .trim(); + if (charsetName.startsWith("\"") + && charsetName.endsWith("\"")) { + charsetName = + charsetName.substring( + 1, charsetName.length() - 1); + } + charset = Charset.forName(charsetName); + } catch (Exception e) { + // Fallback to ISO-8859-1 per MIME standards if UTF-8 fails + charset = StandardCharsets.ISO_8859_1; + } + } + attachmentData = stringContent.getBytes(charset); } if (attachmentData != null) { - // Check size limit (use default 10MB if request is null) long maxSizeMB = request != null ? request.getMaxAttachmentSizeMB() : 10L; long maxSizeBytes = maxSizeMB * 1024 * 1024; @@ -1051,83 +1355,129 @@ public class EmlToPdf { attachment.setData(attachmentData); attachment.setSizeBytes(attachmentData.length); } else { - // For embedded images, always include data regardless of size - // to ensure inline display works if (attachment.isEmbedded()) { attachment.setData(attachmentData); attachment.setSizeBytes(attachmentData.length); } else { - // Still show attachment info even if too large attachment.setSizeBytes(attachmentData.length); } } } - } catch (Exception e) { - log.warn("Error extracting attachment data: {}", e.getMessage()); + } catch (ReflectiveOperationException | IOException e) { } } - - // Add attachment to the list for display (with or without data) content.getAttachments().add(attachment); } } else if ((Boolean) isMimeType.invoke(part, "multipart/*")) { - // Handle nested multipart content try { Object multipartContent = getContent.invoke(part); - Class multipartClass = Class.forName("jakarta.mail.Multipart"); - if (multipartClass.isInstance(multipartContent)) { - processMultipartAdvanced(multipartContent, content, request); + if (multipartContent != null) { + Class multipartClass = Class.forName("jakarta.mail.Multipart"); + if (multipartClass.isInstance(multipartContent)) { + // Safe recursion with depth limit + processMultipartAdvanced( + multipartContent, + content, + request, + customHtmlSanitizer, + depth + 1); + } } - } catch (Exception e) { - log.warn("Error processing multipart content: {}", e.getMessage()); + } catch (ReflectiveOperationException e) { + // Continue processing other parts if one fails } } - } catch (Exception e) { - log.warn("Error processing multipart part: {}", e.getMessage()); + } catch (ReflectiveOperationException | RuntimeException e) { + if (request != null && request.isIncludeAttachments()) { + String sanitizedMessage = + customHtmlSanitizer != null + ? customHtmlSanitizer.sanitize(e.getMessage()) + : escapeHtml(e.getMessage()); + content.setHtmlBody( + "
Error processing part: " + + sanitizedMessage + + "
"); + } else { + content.setHtmlBody("
Error processing part
"); + } } } - private static String generateEnhancedEmailHtml(EmailContent content, EmlToPdfRequest request) { + private static String generateEnhancedEmailHtml( + EmailContent content, + EmlToPdfRequest request, + CustomHtmlSanitizer customHtmlSanitizer) { StringBuilder html = new StringBuilder(); - html.append("\n"); - html.append("\n"); - html.append("").append(escapeHtml(content.getSubject())).append("\n"); - html.append("\n"); - html.append("\n"); + html.append( + """ + + + """); - html.append("
\n"); - html.append("
\n"); - html.append("

").append(escapeHtml(content.getSubject())).append("

\n"); - html.append("
\n"); - html.append("
From: ") - .append(escapeHtml(content.getFrom())) - .append("
\n"); - html.append("
To: ") - .append(escapeHtml(content.getTo())) - .append("
\n"); + html.append( + String.format( + """ +
+ """); html.append("
\n"); if (content.getHtmlBody() != null && !content.getHtmlBody().trim().isEmpty()) { - html.append(processEmailHtmlBody(content.getHtmlBody(), content)); + html.append(processEmailHtmlBody(content.getHtmlBody(), content, customHtmlSanitizer)); } else if (content.getTextBody() != null && !content.getTextBody().trim().isEmpty()) { - html.append("
"); - html.append(convertTextToHtml(content.getTextBody())); - html.append("
"); + html.append( + String.format( + """ +
%s
""", + convertTextToHtml(content.getTextBody(), customHtmlSanitizer))); } else { - html.append("
"); - html.append("

No content available

"); - html.append("
"); + html.append( + """ +
+

No content available

+
"""); } html.append("
\n"); @@ -1141,49 +1491,56 @@ public class EmlToPdf { if (!content.getAttachments().isEmpty()) { for (EmailAttachment attachment : content.getAttachments()) { - // Create attachment info with paperclip emoji before filename String uniqueId = generateUniqueAttachmentId(attachment.getFilename()); attachment.setEmbeddedFilename( attachment.getEmbeddedFilename() != null ? attachment.getEmbeddedFilename() : attachment.getFilename()); - html.append("
") - .append("") - .append(MimeConstants.ATTACHMENT_MARKER) - .append(" ") - .append("") - .append(escapeHtml(safeMimeDecode(attachment.getFilename()))) - .append(""); - String sizeStr = formatFileSize(attachment.getSizeBytes()); - html.append(" (").append(sizeStr); - if (attachment.getContentType() != null - && !attachment.getContentType().isEmpty()) { - html.append(", ").append(escapeHtml(attachment.getContentType())); - } - html.append(")
\n"); + String contentType = + attachment.getContentType() != null + && !attachment.getContentType().isEmpty() + ? ", " + escapeHtml(attachment.getContentType()) + : ""; + + html.append( + String.format( + """ +
+ %s + %s + (%s%s) +
+ """, + uniqueId, + MimeConstants.ATTACHMENT_MARKER, + escapeHtml(safeMimeDecode(attachment.getFilename())), + sizeStr, + contentType)); } } - if (request.isIncludeAttachments()) { - html.append("
\n"); - html.append("

Attachments are embedded in the file.

\n"); - html.append("
\n"); - } else { - html.append("
\n"); html.append( - "

Attachment information displayed - files not included in PDF.

\n"); - html.append("
\n"); + """ +
+

Attachments are embedded in the file.

+
+ """); + } else { + html.append( + """ +
+

Attachment information displayed - files not included in PDF.

+
+ """); } - html.append("
\n"); } - - html.append("
\n"); - html.append(""); + html.append( + """ +
+ """); return html.toString(); } @@ -1193,121 +1550,186 @@ public class EmlToPdf { List attachments, CustomPDFDocumentFactory pdfDocumentFactory) throws IOException { + + if (attachments == null || attachments.isEmpty()) { + return pdfBytes; + } + try (PDDocument document = pdfDocumentFactory.load(pdfBytes); ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { - if (attachments == null || attachments.isEmpty()) { - document.save(outputStream); - return outputStream.toByteArray(); + // PDF version validation - ensure PDF 1.7+ for advanced features (ISO 32000-1:2008) + float pdfVersion = document.getVersion(); + if (pdfVersion < 1.7f) { + // Log warning but continue - fallback to basic attachment support + // Note: PDF 1.4+ supports basic file attachments, but 1.7+ has better Unicode + // support } - List embeddedFiles = new ArrayList<>(); - - // Set up the embedded files name tree once - if (document.getDocumentCatalog().getNames() == null) { - document.getDocumentCatalog() - .setNames(new PDDocumentNameDictionary(document.getDocumentCatalog())); - } - - PDDocumentNameDictionary names = document.getDocumentCatalog().getNames(); - if (names.getEmbeddedFiles() == null) { - names.setEmbeddedFiles(new PDEmbeddedFilesNameTreeNode()); - } - - PDEmbeddedFilesNameTreeNode efTree = names.getEmbeddedFiles(); - Map efMap = efTree.getNames(); - if (efMap == null) { - efMap = new HashMap<>(); - } - - // Embed each attachment directly into the PDF + List multipartAttachments = new ArrayList<>(); for (EmailAttachment attachment : attachments) { - if (attachment.getData() == null || attachment.getData().length == 0) { - continue; - } - - try { - // Generate unique filename - String filename = attachment.getFilename(); - if (filename == null || filename.trim().isEmpty()) { - filename = "attachment_" + System.currentTimeMillis(); - if (attachment.getContentType() != null - && attachment.getContentType().contains("/")) { - String[] parts = attachment.getContentType().split("/"); - if (parts.length > 1) { - filename += "." + parts[1]; - } - } - } - - // Ensure unique filename - String uniqueFilename = getUniqueFilename(filename, embeddedFiles, efMap); - - // Create embedded file - PDEmbeddedFile embeddedFile = - new PDEmbeddedFile( - document, new ByteArrayInputStream(attachment.getData())); - embeddedFile.setSize(attachment.getData().length); - embeddedFile.setCreationDate(new GregorianCalendar()); - - // Create file specification - PDComplexFileSpecification fileSpec = new PDComplexFileSpecification(); - fileSpec.setFile(uniqueFilename); - fileSpec.setEmbeddedFile(embeddedFile); - if (attachment.getContentType() != null) { - embeddedFile.setSubtype(attachment.getContentType()); - fileSpec.setFileDescription("Email attachment: " + uniqueFilename); - } - - // Add to the map (but don't set it yet) - efMap.put(uniqueFilename, fileSpec); - embeddedFiles.add(uniqueFilename); - - // Store the filename for annotation creation - attachment.setEmbeddedFilename(uniqueFilename); - - } catch (Exception e) { - // Log error but continue with other attachments - log.warn("Failed to embed attachment: {}", attachment.getFilename(), e); + if (attachment.getData() != null && attachment.getData().length > 0) { + multipartAttachments.add(createMultipartFile(attachment)); } } - // Set the complete map once at the end - if (!efMap.isEmpty()) { - efTree.setNames(efMap); + if (!multipartAttachments.isEmpty()) { + addAttachmentsToDocument(document, multipartAttachments); - // Set catalog viewer preferences to automatically show attachments pane setCatalogViewerPreferences(document, PageMode.USE_ATTACHMENTS); - } - // Add attachment annotations to the first page for each embedded file - if (!embeddedFiles.isEmpty()) { addAttachmentAnnotationsToDocument(document, attachments); } document.save(outputStream); return outputStream.toByteArray(); + } catch (RuntimeException e) { + throw new IOException( + "Invalid PDF structure or processing error: " + e.getMessage(), e); + } catch (Exception e) { + throw new IOException("Error attaching files to PDF: " + e.getMessage(), e); } } - private static String getUniqueFilename( - String filename, - List embeddedFiles, - Map efMap) { - String uniqueFilename = filename; - int counter = 1; - while (embeddedFiles.contains(uniqueFilename) || efMap.containsKey(uniqueFilename)) { - String extension = ""; - String baseName = filename; - int lastDot = filename.lastIndexOf('.'); - if (lastDot > 0) { - extension = filename.substring(lastDot); - baseName = filename.substring(0, lastDot); + private static MultipartFile createMultipartFile(EmailAttachment attachment) { + return new MultipartFile() { + @Override + public String getName() { + return "attachment"; } - uniqueFilename = baseName + "_" + counter + extension; - counter++; + + @Override + public String getOriginalFilename() { + return attachment.getFilename() != null + ? attachment.getFilename() + : "attachment_" + System.currentTimeMillis(); + } + + @Override + public String getContentType() { + return attachment.getContentType() != null + ? attachment.getContentType() + : "application/octet-stream"; + } + + @Override + public boolean isEmpty() { + return attachment.getData() == null || attachment.getData().length == 0; + } + + @Override + public long getSize() { + return attachment.getData() != null ? attachment.getData().length : 0; + } + + @Override + public byte[] getBytes() { + return attachment.getData() != null ? attachment.getData() : new byte[0]; + } + + @Override + public InputStream getInputStream() { + byte[] data = attachment.getData(); + return new ByteArrayInputStream(data != null ? data : new byte[0]); + } + + @Override + public void transferTo(File dest) throws IOException, IllegalStateException { + try (FileOutputStream fos = new FileOutputStream(dest)) { + byte[] data = attachment.getData(); + if (data != null) { + fos.write(data); + } + } + } + }; + } + + private static void addAttachmentsToDocument( + PDDocument document, List attachments) throws IOException { + PDDocumentCatalog catalog = document.getDocumentCatalog(); + + if (catalog == null) { + throw new IOException("PDF document catalog is not accessible"); } - return uniqueFilename; + + PDDocumentNameDictionary documentNames = catalog.getNames(); + if (documentNames == null) { + documentNames = new PDDocumentNameDictionary(catalog); + catalog.setNames(documentNames); + } + + PDEmbeddedFilesNameTreeNode embeddedFilesTree = documentNames.getEmbeddedFiles(); + if (embeddedFilesTree == null) { + embeddedFilesTree = new PDEmbeddedFilesNameTreeNode(); + documentNames.setEmbeddedFiles(embeddedFilesTree); + } + + Map existingNames = embeddedFilesTree.getNames(); + if (existingNames == null) { + existingNames = new HashMap<>(); + } + + for (MultipartFile attachment : attachments) { + String filename = attachment.getOriginalFilename(); + if (filename == null || filename.trim().isEmpty()) { + filename = "attachment_" + System.currentTimeMillis(); + } + + String normalizedFilename = + java.text.Normalizer.normalize(filename, java.text.Normalizer.Form.NFC); + String uniqueFilename = + ensureUniqueFilename(normalizedFilename, existingNames.keySet()); + + PDEmbeddedFile embeddedFile = new PDEmbeddedFile(document, attachment.getInputStream()); + embeddedFile.setSize((int) attachment.getSize()); + + GregorianCalendar currentTime = new GregorianCalendar(); + embeddedFile.setCreationDate(currentTime); + embeddedFile.setModDate(currentTime); + + String contentType = attachment.getContentType(); + if (contentType != null && !contentType.trim().isEmpty()) { + embeddedFile.setSubtype(contentType); + } + + PDComplexFileSpecification fileSpecification = new PDComplexFileSpecification(); + fileSpecification.setFile(uniqueFilename); + fileSpecification.setFileUnicode(uniqueFilename); + fileSpecification.setEmbeddedFile(embeddedFile); + fileSpecification.setEmbeddedFileUnicode(embeddedFile); + + existingNames.put(uniqueFilename, fileSpecification); + } + + embeddedFilesTree.setNames(existingNames); + documentNames.setEmbeddedFiles(embeddedFilesTree); + catalog.setNames(documentNames); + } + + private static String ensureUniqueFilename(String filename, Set existingNames) { + if (!existingNames.contains(filename)) { + return filename; + } + + String baseName; + String extension = ""; + int lastDot = filename.lastIndexOf('.'); + if (lastDot > 0) { + baseName = filename.substring(0, lastDot); + extension = filename.substring(lastDot); + } else { + baseName = filename; + } + + int counter = 1; + String uniqueName; + do { + uniqueName = baseName + "_" + counter + extension; + counter++; + } while (existingNames.contains(uniqueName)); + + return uniqueName; } private static void addAttachmentAnnotationsToDocument( @@ -1316,21 +1738,11 @@ public class EmlToPdf { return; } - // 1. Find the screen position of all attachment markers AttachmentMarkerPositionFinder finder = new AttachmentMarkerPositionFinder(); finder.setSortByPosition(true); // Process pages in order finder.getText(document); List markerPositions = finder.getPositions(); - // 2. Warn if the number of markers and attachments don't match - if (markerPositions.size() != attachments.size()) { - log.warn( - "Found {} attachment markers, but there are {} attachments. Annotation count may be incorrect.", - markerPositions.size(), - attachments.size()); - } - - // 3. Create an invisible annotation over each found marker int annotationsToAdd = Math.min(markerPositions.size(), attachments.size()); for (int i = 0; i < annotationsToAdd; i++) { MarkerPosition position = markerPositions.get(i); @@ -1351,27 +1763,47 @@ public class EmlToPdf { PDAnnotationFileAttachment fileAnnotation = new PDAnnotationFileAttachment(); PDRectangle rect = getPdRectangle(page, x, y); + + PDRectangle mediaBox = page.getMediaBox(); + if (rect.getLowerLeftX() < mediaBox.getLowerLeftX() + || rect.getLowerLeftY() < mediaBox.getLowerLeftY() + || rect.getUpperRightX() > mediaBox.getUpperRightX() + || rect.getUpperRightY() > mediaBox.getUpperRightY()) { + float adjustedX = + Math.max( + mediaBox.getLowerLeftX(), + Math.min( + rect.getLowerLeftX(), + mediaBox.getUpperRightX() - rect.getWidth())); + float adjustedY = + Math.max( + mediaBox.getLowerLeftY(), + Math.min( + rect.getLowerLeftY(), + mediaBox.getUpperRightY() - rect.getHeight())); + rect = new PDRectangle(adjustedX, adjustedY, rect.getWidth(), rect.getHeight()); + } + fileAnnotation.setRectangle(rect); - // Remove visual appearance while keeping clickable functionality + fileAnnotation.setPrinted(false); + fileAnnotation.setHidden(false); + fileAnnotation.setNoView(false); + fileAnnotation.setNoZoom(true); + fileAnnotation.setNoRotate(true); + try { PDAppearanceDictionary appearance = new PDAppearanceDictionary(); PDAppearanceStream normalAppearance = new PDAppearanceStream(document); - normalAppearance.setBBox(new PDRectangle(0, 0, 0, 0)); // Zero-size bounding box + + normalAppearance.setBBox(new PDRectangle(0, 0, rect.getWidth(), rect.getHeight())); appearance.setNormalAppearance(normalAppearance); fileAnnotation.setAppearance(appearance); - } catch (Exception e) { - // If appearance manipulation fails, just set it to null + } catch (RuntimeException e) { fileAnnotation.setAppearance(null); } - // Set invisibility flags but keep it functional - fileAnnotation.setInvisible(true); - fileAnnotation.setHidden(false); // Must be false to remain clickable - fileAnnotation.setNoView(false); // Must be false to remain clickable - fileAnnotation.setPrinted(false); - PDEmbeddedFilesNameTreeNode efTree = document.getDocumentCatalog().getNames().getEmbeddedFiles(); if (efTree != null) { @@ -1384,54 +1816,107 @@ public class EmlToPdf { } } - fileAnnotation.setContents("Click to open: " + attachment.getFilename()); + fileAnnotation.setContents("Embedded attachment: " + attachment.getFilename()); fileAnnotation.setAnnotationName("EmbeddedFile_" + attachment.getEmbeddedFilename()); page.getAnnotations().add(fileAnnotation); - - log.info( - "Added attachment annotation for '{}' on page {}", - attachment.getFilename(), - document.getPages().indexOf(page) + 1); } private static @NotNull PDRectangle getPdRectangle(PDPage page, float x, float y) { - PDRectangle mediaBox = page.getMediaBox(); - float pdfY = mediaBox.getHeight() - y; + PDRectangle cropBox = page.getCropBox(); - float iconWidth = - StyleConstants.ATTACHMENT_ICON_WIDTH; // Keep original size for clickability - float iconHeight = - StyleConstants.ATTACHMENT_ICON_HEIGHT; // Keep original size for clickability + // ISO 32000-1:2008 Section 8.3: PDF coordinate system transforms + // Handle page rotation for proper annotation placement + int rotation = page.getRotation(); + float pdfX = x; + float pdfY = cropBox.getHeight() - y; + + // Apply rotation matrix transform if needed + switch (rotation) { + case 90 -> { + float temp = pdfX; + pdfX = pdfY; + pdfY = cropBox.getWidth() - temp; + } + case 180 -> { + pdfX = cropBox.getWidth() - pdfX; + pdfY = y; + } + case 270 -> { + float temp = pdfX; + pdfX = cropBox.getHeight() - pdfY; + pdfY = temp; + } + default -> { + // 0 degrees - no transformation needed + } + } + + float iconWidth = StyleConstants.ATTACHMENT_ICON_WIDTH; + float iconHeight = StyleConstants.ATTACHMENT_ICON_HEIGHT; + + float paddingX = 2.0f; + float paddingY = 2.0f; - // Keep the full-size rectangle so it remains clickable return new PDRectangle( - x + StyleConstants.ANNOTATION_X_OFFSET, - pdfY - iconHeight + StyleConstants.ANNOTATION_Y_OFFSET, + pdfX + StyleConstants.ANNOTATION_X_OFFSET + paddingX, + pdfY - iconHeight + StyleConstants.ANNOTATION_Y_OFFSET + paddingY, iconWidth, iconHeight); } private static String formatEmailDate(Date date) { if (date == null) return ""; - java.text.SimpleDateFormat formatter = - new java.text.SimpleDateFormat("EEE, MMM d, yyyy 'at' h:mm a", Locale.ENGLISH); + + // RFC 5322 compliant date formatting with timezone awareness + SimpleDateFormat formatter = + new SimpleDateFormat("EEE, MMM d, yyyy 'at' h:mm a z", Locale.ENGLISH); + + // Set timezone to UTC for consistent formatting if not specified + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + return formatter.format(date); } private static String formatFileSize(long bytes) { - if (bytes < FileSizeConstants.BYTES_IN_KB) { - return bytes + " B"; - } else if (bytes < FileSizeConstants.BYTES_IN_MB) { - return String.format("%.1f KB", bytes / (double) FileSizeConstants.BYTES_IN_KB); - } else if (bytes < FileSizeConstants.BYTES_IN_GB) { - return String.format("%.1f MB", bytes / (double) FileSizeConstants.BYTES_IN_MB); - } else { - return String.format("%.1f GB", bytes / (double) FileSizeConstants.BYTES_IN_GB); - } + return GeneralUtils.formatBytes(bytes); } - // MIME header decoding functionality for RFC 2047 encoded headers - moved to constants + /** + * Safely decode MIME headers using Jakarta Mail if available, fallback to custom implementation + */ + private static String safeMimeDecode(String headerValue) { + if (headerValue == null || headerValue.trim().isEmpty()) { + return ""; + } + + if (!mimeUtilityChecked) { + initializeMimeUtilityDecoding(); + } + + if (mimeUtilityDecodeTextMethod != null) { + try { + return (String) mimeUtilityDecodeTextMethod.invoke(null, headerValue.trim()); + } catch (ReflectiveOperationException | RuntimeException e) { + } + } + + return decodeMimeHeader(headerValue.trim()); + } + + private static synchronized void initializeMimeUtilityDecoding() { + if (mimeUtilityChecked) { + return; // Already initialized + } + + try { + Class mimeUtilityClass = Class.forName("jakarta.mail.internet.MimeUtility"); + mimeUtilityDecodeTextMethod = mimeUtilityClass.getMethod("decodeText", String.class); + } catch (ClassNotFoundException | NoSuchMethodException e) { + mimeUtilityDecodeTextMethod = null; + } + mimeUtilityChecked = true; + } private static String decodeMimeHeader(String encodedText) { if (encodedText == null || encodedText.trim().isEmpty()) { @@ -1440,47 +1925,64 @@ public class EmlToPdf { try { StringBuilder result = new StringBuilder(); - Matcher matcher = MimeConstants.MIME_ENCODED_PATTERN.matcher(encodedText); + Pattern concatenatedPattern = + Pattern.compile( + "(=\\?[^?]+\\?[BbQq]\\?[^?]*\\?=)(\\s*=\\?[^?]+\\?[BbQq]\\?[^?]*\\?=)+"); + Matcher concatenatedMatcher = concatenatedPattern.matcher(encodedText); + String processedText = + concatenatedMatcher.replaceAll( + match -> + match.group() + .replaceAll( + "\\s+(?==\\?)", + "")); // Remove spaces between encoded words + + Matcher matcher = MimeConstants.MIME_ENCODED_PATTERN.matcher(processedText); int lastEnd = 0; while (matcher.find()) { - // Add any text before the encoded part - result.append(encodedText, lastEnd, matcher.start()); + result.append(processedText, lastEnd, matcher.start()); String charset = matcher.group(1); String encoding = matcher.group(2).toUpperCase(); String encodedValue = matcher.group(3); try { - String decodedValue; - if ("B".equals(encoding)) { - // Base64 decoding - byte[] decodedBytes = Base64.getDecoder().decode(encodedValue); - decodedValue = new String(decodedBytes, Charset.forName(charset)); - } else if ("Q".equals(encoding)) { - // Quoted-printable decoding - decodedValue = decodeQuotedPrintable(encodedValue, charset); - } else { - // Unknown encoding, keep original - decodedValue = matcher.group(0); - } + String decodedValue = + switch (encoding) { + case "B" -> { + String cleanBase64 = encodedValue.replaceAll("\\s", ""); + byte[] decodedBytes = Base64.getDecoder().decode(cleanBase64); + + Charset targetCharset; + try { + targetCharset = Charset.forName(charset); + } catch (Exception e) { + // RFC 2047: fallback to UTF-8, then ISO-8859-1 + try { + targetCharset = StandardCharsets.UTF_8; + } catch (Exception fallbackException) { + targetCharset = StandardCharsets.ISO_8859_1; + } + } + yield new String(decodedBytes, targetCharset); + } + case "Q" -> decodeQuotedPrintable(encodedValue, charset); + default -> matcher.group(0); // Return original if unknown encoding + }; result.append(decodedValue); - } catch (Exception e) { - log.warn("Failed to decode MIME header part: {}", matcher.group(0), e); - // If decoding fails, keep the original encoded text - result.append(matcher.group(0)); + } catch (RuntimeException e) { + result.append(matcher.group(0)); // Keep original on decode error } lastEnd = matcher.end(); } - // Add any remaining text after the last encoded part - result.append(encodedText.substring(lastEnd)); + result.append(processedText.substring(lastEnd)); return result.toString(); } catch (Exception e) { - log.warn("Error decoding MIME header: {}", encodedText, e); - return encodedText; // Return original if decoding fails + return encodedText; // Return original on any parsing error } } @@ -1495,96 +1997,36 @@ public class EmlToPdf { try { int value = Integer.parseInt(hex, 16); result.append((char) value); - i += 2; // Skip the hex digits + i += 2; } catch (NumberFormatException e) { - // If hex parsing fails, keep the original character result.append(c); } + } else if (i + 1 == encodedText.length() + || (i + 2 == encodedText.length() + && encodedText.charAt(i + 1) == '\n')) { + if (i + 1 < encodedText.length() && encodedText.charAt(i + 1) == '\n') { + i++; // Skip the newline too + } } else { result.append(c); } } - case '_' -> // In RFC 2047, underscore represents space - result.append(' '); + case '_' -> result.append(' '); // Space encoding in Q encoding default -> result.append(c); } } - // Convert bytes to proper charset byte[] bytes = result.toString().getBytes(StandardCharsets.ISO_8859_1); - return new String(bytes, Charset.forName(charset)); - } - - private static String safeMimeDecode(String headerValue) { - if (headerValue == null) { - return ""; - } - try { - if (isJakartaMailAvailable()) { - // Use Jakarta Mail's MimeUtility for proper MIME decoding - Class mimeUtilityClass = Class.forName("jakarta.mail.internet.MimeUtility"); - Method decodeText = mimeUtilityClass.getMethod("decodeText", String.class); - return (String) decodeText.invoke(null, headerValue.trim()); - } else { - // Fallback to basic MIME decoding - return decodeMimeHeader(headerValue.trim()); - } + Charset targetCharset = Charset.forName(charset); + return new String(bytes, targetCharset); } catch (Exception e) { - log.warn("Failed to decode MIME header, using original: {}", headerValue, e); - return headerValue; - } - } - - private static boolean isValidJakartaMailPart(Object part) { - if (part == null) return false; - - try { - // Check if the object implements jakarta.mail.Part interface - Class partInterface = Class.forName("jakarta.mail.Part"); - if (!partInterface.isInstance(part)) { - return false; - } - - // Additional check for MimePart + // RFC 2047: Enhanced fallback strategy - try UTF-8 first, then ISO-8859-1 try { - Class mimePartInterface = Class.forName("jakarta.mail.internet.MimePart"); - return mimePartInterface.isInstance(part); - } catch (ClassNotFoundException e) { - // MimePart not available, but Part is sufficient - return true; + return new String(bytes, StandardCharsets.UTF_8); + } catch (Exception fallbackException) { + return new String(bytes, StandardCharsets.ISO_8859_1); } - } catch (ClassNotFoundException e) { - log.debug("Jakarta Mail Part interface not available for validation"); - return false; - } - } - - private static boolean isValidJakartaMailMultipart(Object multipart) { - if (multipart == null) return false; - - try { - // Check if the object implements jakarta.mail.Multipart interface - Class multipartInterface = Class.forName("jakarta.mail.Multipart"); - if (!multipartInterface.isInstance(multipart)) { - return false; - } - - // Additional check for MimeMultipart - try { - Class mimeMultipartClass = Class.forName("jakarta.mail.internet.MimeMultipart"); - if (mimeMultipartClass.isInstance(multipart)) { - log.debug("Found MimeMultipart instance for enhanced processing"); - return true; - } - } catch (ClassNotFoundException e) { - log.debug("MimeMultipart not available, using base Multipart interface"); - } - - return true; - } catch (ClassNotFoundException e) { - log.debug("Jakarta Mail Multipart interface not available for validation"); - return false; } } @@ -1593,6 +2035,8 @@ public class EmlToPdf { private String subject; private String from; private String to; + private String cc; + private String bcc; private Date date; private String htmlBody; private String textBody; @@ -1617,12 +2061,10 @@ public class EmlToPdf { private String embeddedFilename; private long sizeBytes; - // New fields for advanced processing private String contentId; private String disposition; private String transferEncoding; - // Custom setter to maintain size calculation logic public void setData(byte[] data) { this.data = data; if (data != null) { @@ -1646,14 +2088,16 @@ public class EmlToPdf { } } - public static class AttachmentMarkerPositionFinder - extends org.apache.pdfbox.text.PDFTextStripper { + public static class AttachmentMarkerPositionFinder extends PDFTextStripper { @Getter private final List positions = new ArrayList<>(); private int currentPageIndex; protected boolean sortByPosition; private boolean isInAttachmentSection; private boolean attachmentSectionFound; + private static final Pattern ATTACHMENT_SECTION_PATTERN = + Pattern.compile("attachments\\s*\\(\\d+\\)", Pattern.CASE_INSENSITIVE); + public AttachmentMarkerPositionFinder() { super(); this.currentPageIndex = 0; @@ -1663,31 +2107,43 @@ public class EmlToPdf { } @Override - protected void startPage(org.apache.pdfbox.pdmodel.PDPage page) throws IOException { + public String getText(PDDocument document) throws IOException { + super.getText(document); + + if (sortByPosition) { + positions.sort( + (a, b) -> { + int pageCompare = Integer.compare(a.getPageIndex(), b.getPageIndex()); + if (pageCompare != 0) return pageCompare; + return Float.compare( + b.getY(), a.getY()); // Descending Y per PDF coordinate system + }); + } + + return ""; // Return empty string as we only need positions + } + + @Override + protected void startPage(PDPage page) throws IOException { super.startPage(page); } @Override - protected void endPage(org.apache.pdfbox.pdmodel.PDPage page) throws IOException { + protected void endPage(PDPage page) throws IOException { currentPageIndex++; super.endPage(page); } @Override - protected void writeString( - String string, List textPositions) + protected void writeString(String string, List textPositions) throws IOException { - // Check if we are entering or exiting the attachment section String lowerString = string.toLowerCase(); - // Look for attachment section start marker - if (lowerString.contains("attachments (")) { + if (ATTACHMENT_SECTION_PATTERN.matcher(lowerString).find()) { isInAttachmentSection = true; attachmentSectionFound = true; } - // Look for attachment section end markers (common patterns that indicate end of - // attachments) if (isInAttachmentSection && (lowerString.contains("") || lowerString.contains("") @@ -1697,12 +2153,11 @@ public class EmlToPdf { isInAttachmentSection = false; } - // Only look for markers if we are in the attachment section if (isInAttachmentSection) { String attachmentMarker = MimeConstants.ATTACHMENT_MARKER; for (int i = 0; (i = string.indexOf(attachmentMarker, i)) != -1; i++) { if (i < textPositions.size()) { - org.apache.pdfbox.text.TextPosition textPosition = textPositions.get(i); + TextPosition textPosition = textPositions.get(i); MarkerPosition position = new MarkerPosition( currentPageIndex,