refactor logic in to multiple files

This commit is contained in:
Balázs Szücs 2025-08-06 20:17:31 +02:00
parent c5eee8cad9
commit 18cc10eab7
4 changed files with 1935 additions and 2090 deletions

View File

@ -0,0 +1,652 @@
package stirling.software.common.util;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import java.util.regex.Pattern;
import lombok.Data;
import lombok.experimental.UtilityClass;
import stirling.software.common.model.api.converters.EmlToPdfRequest;
@UtilityClass
public class EmlParser {
private static volatile Boolean jakartaMailAvailable = null;
private static volatile Method mimeUtilityDecodeTextMethod = null;
private static volatile boolean mimeUtilityChecked = false;
private static final Pattern MIME_ENCODED_PATTERN =
Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
private static final String DISPOSITION_ATTACHMENT = "attachment";
private static final String TEXT_PLAIN = "text/plain";
private static final String TEXT_HTML = "text/html";
private static final String MULTIPART_PREFIX = "multipart/";
private static final String HEADER_CONTENT_TYPE = "content-type:";
private static final String HEADER_CONTENT_DISPOSITION = "content-disposition:";
private static final String HEADER_CONTENT_TRANSFER_ENCODING = "content-transfer-encoding:";
private static final String HEADER_CONTENT_ID = "Content-ID";
private static final String HEADER_SUBJECT = "Subject:";
private static final String HEADER_FROM = "From:";
private static final String HEADER_TO = "To:";
private static final String HEADER_CC = "Cc:";
private static final String HEADER_BCC = "Bcc:";
private static final String HEADER_DATE = "Date:";
private static synchronized boolean isJakartaMailAvailable() {
if (jakartaMailAvailable == null) {
try {
Class.forName("jakarta.mail.internet.MimeMessage");
Class.forName("jakarta.mail.Session");
Class.forName("jakarta.mail.internet.MimeUtility");
Class.forName("jakarta.mail.internet.MimePart");
Class.forName("jakarta.mail.internet.MimeMultipart");
Class.forName("jakarta.mail.Multipart");
Class.forName("jakarta.mail.Part");
jakartaMailAvailable = true;
} catch (ClassNotFoundException e) {
jakartaMailAvailable = false;
}
}
return jakartaMailAvailable;
}
public static EmailContent extractEmailContent(
byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer)
throws IOException {
EmlProcessingUtils.validateEmlInput(emlBytes);
if (isJakartaMailAvailable()) {
return extractEmailContentAdvanced(emlBytes, request, customHtmlSanitizer);
} else {
return extractEmailContentBasic(emlBytes, request, customHtmlSanitizer);
}
}
private static EmailContent extractEmailContentBasic(
byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) {
String emlContent = new String(emlBytes, StandardCharsets.UTF_8);
EmailContent content = new EmailContent();
content.setSubject(extractBasicHeader(emlContent, HEADER_SUBJECT));
content.setFrom(extractBasicHeader(emlContent, HEADER_FROM));
content.setTo(extractBasicHeader(emlContent, HEADER_TO));
content.setCc(extractBasicHeader(emlContent, HEADER_CC));
content.setBcc(extractBasicHeader(emlContent, HEADER_BCC));
String dateStr = extractBasicHeader(emlContent, HEADER_DATE);
if (!dateStr.isEmpty()) {
content.setDateString(dateStr);
}
String htmlBody = extractHtmlBody(emlContent);
if (htmlBody != null) {
content.setHtmlBody(htmlBody);
} else {
String textBody = extractTextBody(emlContent);
content.setTextBody(textBody != null ? textBody : "Email content could not be parsed");
}
content.getAttachments().addAll(extractAttachmentsBasic(emlContent));
return content;
}
private static EmailContent extractEmailContentAdvanced(
byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) {
try {
Class<?> sessionClass = Class.forName("jakarta.mail.Session");
Class<?> mimeMessageClass = Class.forName("jakarta.mail.internet.MimeMessage");
Method getDefaultInstance =
sessionClass.getMethod("getDefaultInstance", Properties.class);
Object session = getDefaultInstance.invoke(null, new Properties());
Class<?>[] constructorArgs = new Class<?>[] {sessionClass, InputStream.class};
Constructor<?> mimeMessageConstructor =
mimeMessageClass.getConstructor(constructorArgs);
Object message =
mimeMessageConstructor.newInstance(session, new ByteArrayInputStream(emlBytes));
return extractFromMimeMessage(message, request, customHtmlSanitizer);
} catch (ReflectiveOperationException e) {
return extractEmailContentBasic(emlBytes, request, customHtmlSanitizer);
}
}
private static EmailContent extractFromMimeMessage(
Object message, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) {
EmailContent content = new EmailContent();
try {
Class<?> messageClass = message.getClass();
Method getSubject = messageClass.getMethod("getSubject");
String subject = (String) getSubject.invoke(message);
content.setSubject(subject != null ? safeMimeDecode(subject) : "No Subject");
Method getFrom = messageClass.getMethod("getFrom");
Object[] fromAddresses = (Object[]) getFrom.invoke(message);
content.setFrom(buildAddressString(fromAddresses));
extractRecipients(message, messageClass, content);
Method getSentDate = messageClass.getMethod("getSentDate");
content.setDate((Date) getSentDate.invoke(message));
Method getContent = messageClass.getMethod("getContent");
Object messageContent = getContent.invoke(message);
processMessageContent(message, messageContent, content, request, customHtmlSanitizer);
} catch (ReflectiveOperationException | RuntimeException e) {
content.setSubject("Email Conversion");
content.setFrom("Unknown");
content.setTo("Unknown");
content.setCc("");
content.setBcc("");
content.setTextBody("Email content could not be parsed with advanced processing");
}
return content;
}
private static void extractRecipients(
Object message, Class<?> messageClass, EmailContent content) {
try {
Method getRecipients =
messageClass.getMethod(
"getRecipients", Class.forName("jakarta.mail.Message$RecipientType"));
Class<?> recipientTypeClass = Class.forName("jakarta.mail.Message$RecipientType");
Object toType = recipientTypeClass.getField("TO").get(null);
Object[] toRecipients = (Object[]) getRecipients.invoke(message, toType);
content.setTo(buildAddressString(toRecipients));
Object ccType = recipientTypeClass.getField("CC").get(null);
Object[] ccRecipients = (Object[]) getRecipients.invoke(message, ccType);
content.setCc(buildAddressString(ccRecipients));
Object bccType = recipientTypeClass.getField("BCC").get(null);
Object[] bccRecipients = (Object[]) getRecipients.invoke(message, bccType);
content.setBcc(buildAddressString(bccRecipients));
} catch (ReflectiveOperationException e) {
try {
Method getAllRecipients = messageClass.getMethod("getAllRecipients");
Object[] recipients = (Object[]) getAllRecipients.invoke(message);
content.setTo(buildAddressString(recipients));
content.setCc("");
content.setBcc("");
} catch (ReflectiveOperationException ex) {
content.setTo("");
content.setCc("");
content.setBcc("");
}
}
}
private static String buildAddressString(Object[] addresses) {
if (addresses == null || addresses.length == 0) {
return "";
}
StringBuilder builder = new StringBuilder();
for (int i = 0; i < addresses.length; i++) {
if (i > 0) builder.append(", ");
builder.append(safeMimeDecode(addresses[i].toString()));
}
return builder.toString();
}
private static void processMessageContent(
Object message,
Object messageContent,
EmailContent content,
EmlToPdfRequest request,
CustomHtmlSanitizer customHtmlSanitizer) {
try {
if (messageContent instanceof String stringContent) {
Method getContentType = message.getClass().getMethod("getContentType");
String contentType = (String) getContentType.invoke(message);
if (contentType != null && contentType.toLowerCase().contains(TEXT_HTML)) {
content.setHtmlBody(stringContent);
} else {
content.setTextBody(stringContent);
}
} else {
Class<?> multipartClass = Class.forName("jakarta.mail.Multipart");
if (multipartClass.isInstance(messageContent)) {
processMultipart(messageContent, content, request, customHtmlSanitizer, 0);
}
}
} catch (ReflectiveOperationException | ClassCastException e) {
content.setTextBody("Email content could not be parsed with advanced processing");
}
}
private static void processMultipart(
Object multipart,
EmailContent content,
EmlToPdfRequest request,
CustomHtmlSanitizer customHtmlSanitizer,
int depth) {
final int MAX_MULTIPART_DEPTH = 10;
if (depth > MAX_MULTIPART_DEPTH) {
content.setHtmlBody("<div class=\"error\">Maximum multipart depth exceeded</div>");
return;
}
try {
Class<?> multipartClass = multipart.getClass();
Method getCount = multipartClass.getMethod("getCount");
int count = (Integer) getCount.invoke(multipart);
Method getBodyPart = multipartClass.getMethod("getBodyPart", int.class);
for (int i = 0; i < count; i++) {
Object part = getBodyPart.invoke(multipart, i);
processPart(part, content, request, customHtmlSanitizer, depth + 1);
}
} catch (ReflectiveOperationException | ClassCastException e) {
content.setHtmlBody("<div class=\"error\">Error processing multipart content</div>");
}
}
private static void processPart(
Object part,
EmailContent content,
EmlToPdfRequest request,
CustomHtmlSanitizer customHtmlSanitizer,
int depth) {
try {
Class<?> partClass = part.getClass();
Method isMimeType = partClass.getMethod("isMimeType", String.class);
Method getContent = partClass.getMethod("getContent");
Method getDisposition = partClass.getMethod("getDisposition");
Method getFileName = partClass.getMethod("getFileName");
Method getContentType = partClass.getMethod("getContentType");
Method getHeader = partClass.getMethod("getHeader", String.class);
Object disposition = getDisposition.invoke(part);
String filename = (String) getFileName.invoke(part);
String contentType = (String) getContentType.invoke(part);
String normalizedDisposition =
disposition != null ? ((String) disposition).toLowerCase() : null;
if ((Boolean) isMimeType.invoke(part, TEXT_PLAIN) && normalizedDisposition == null) {
Object partContent = getContent.invoke(part);
if (partContent instanceof String stringContent) {
content.setTextBody(stringContent);
}
} else if ((Boolean) isMimeType.invoke(part, TEXT_HTML)
&& normalizedDisposition == null) {
Object partContent = getContent.invoke(part);
if (partContent instanceof String stringContent) {
String htmlBody =
customHtmlSanitizer != null
? customHtmlSanitizer.sanitize(stringContent)
: stringContent;
content.setHtmlBody(htmlBody);
}
} else if ((normalizedDisposition != null
&& normalizedDisposition.contains(DISPOSITION_ATTACHMENT))
|| (filename != null && !filename.trim().isEmpty())) {
processAttachment(
part, content, request, getHeader, getContent, filename, contentType);
} else if ((Boolean) isMimeType.invoke(part, "multipart/*")) {
Object multipartContent = getContent.invoke(part);
if (multipartContent != null) {
Class<?> multipartClass = Class.forName("jakarta.mail.Multipart");
if (multipartClass.isInstance(multipartContent)) {
processMultipart(
multipartContent, content, request, customHtmlSanitizer, depth + 1);
}
}
}
} catch (ReflectiveOperationException | RuntimeException e) {
// Continue processing other parts if one fails
}
}
private static void processAttachment(
Object part,
EmailContent content,
EmlToPdfRequest request,
Method getHeader,
Method getContent,
String filename,
String contentType) {
content.setAttachmentCount(content.getAttachmentCount() + 1);
if (filename != null && !filename.trim().isEmpty()) {
EmailAttachment attachment = new EmailAttachment();
attachment.setFilename(safeMimeDecode(filename));
attachment.setContentType(contentType);
try {
String[] contentIdHeaders = (String[]) getHeader.invoke(part, HEADER_CONTENT_ID);
if (contentIdHeaders != null) {
for (String contentIdHeader : contentIdHeaders) {
if (contentIdHeader != null && !contentIdHeader.trim().isEmpty()) {
attachment.setEmbedded(true);
String contentId = contentIdHeader.trim().replaceAll("[<>]", "");
attachment.setContentId(contentId);
break;
}
}
}
} catch (ReflectiveOperationException e) {
}
if ((request != null && request.isIncludeAttachments()) || attachment.isEmbedded()) {
extractAttachmentData(part, attachment, getContent, request);
}
content.getAttachments().add(attachment);
}
}
private static void extractAttachmentData(
Object part, EmailAttachment attachment, Method getContent, EmlToPdfRequest request) {
try {
Object attachmentContent = getContent.invoke(part);
byte[] attachmentData = null;
if (attachmentContent instanceof InputStream inputStream) {
try (InputStream stream = inputStream) {
attachmentData = stream.readAllBytes();
} catch (IOException e) {
if (attachment.isEmbedded()) {
attachmentData = new byte[0];
} else {
throw new RuntimeException(e);
}
}
} else if (attachmentContent instanceof byte[] byteArray) {
attachmentData = byteArray;
} else if (attachmentContent instanceof String stringContent) {
attachmentData = stringContent.getBytes(StandardCharsets.UTF_8);
}
if (attachmentData != null) {
long maxSizeMB = request != null ? request.getMaxAttachmentSizeMB() : 10L;
long maxSizeBytes = maxSizeMB * 1024 * 1024;
if (attachmentData.length <= maxSizeBytes || attachment.isEmbedded()) {
attachment.setData(attachmentData);
attachment.setSizeBytes(attachmentData.length);
} else {
attachment.setSizeBytes(attachmentData.length);
}
}
} catch (ReflectiveOperationException | RuntimeException e) {
// Continue without attachment data
}
}
private static String extractBasicHeader(String emlContent, String headerName) {
try {
String[] lines = emlContent.split("\r?\n");
for (int i = 0; i < lines.length; i++) {
String line = lines[i];
if (line.toLowerCase().startsWith(headerName.toLowerCase())) {
StringBuilder value =
new StringBuilder(line.substring(headerName.length()).trim());
for (int j = i + 1; j < lines.length; j++) {
if (lines[j].startsWith(" ") || lines[j].startsWith("\t")) {
value.append(" ").append(lines[j].trim());
} else {
break;
}
}
return safeMimeDecode(value.toString());
}
if (line.trim().isEmpty()) break;
}
} catch (RuntimeException e) {
// Ignore errors in header extraction
}
return "";
}
private static String extractHtmlBody(String emlContent) {
try {
String lowerContent = emlContent.toLowerCase();
int htmlStart = lowerContent.indexOf(HEADER_CONTENT_TYPE + " " + TEXT_HTML);
if (htmlStart == -1) return null;
int bodyStart = emlContent.indexOf("\r\n\r\n", htmlStart);
if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n", htmlStart);
if (bodyStart == -1) return null;
bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2;
int bodyEnd = findPartEnd(emlContent, bodyStart);
return emlContent.substring(bodyStart, bodyEnd).trim();
} catch (Exception e) {
return null;
}
}
private static String extractTextBody(String emlContent) {
try {
String lowerContent = emlContent.toLowerCase();
int textStart = lowerContent.indexOf(HEADER_CONTENT_TYPE + " " + TEXT_PLAIN);
if (textStart == -1) {
int bodyStart = emlContent.indexOf("\r\n\r\n");
if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n");
if (bodyStart != -1) {
bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2;
int bodyEnd = findPartEnd(emlContent, bodyStart);
return emlContent.substring(bodyStart, bodyEnd).trim();
}
return null;
}
int bodyStart = emlContent.indexOf("\r\n\r\n", textStart);
if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n", textStart);
if (bodyStart == -1) return null;
bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2;
int bodyEnd = findPartEnd(emlContent, bodyStart);
return emlContent.substring(bodyStart, bodyEnd).trim();
} catch (RuntimeException e) {
return null;
}
}
private static int findPartEnd(String content, int start) {
String[] lines = content.substring(start).split("\r?\n");
StringBuilder result = new StringBuilder();
for (String line : lines) {
if (line.startsWith("--") && line.length() > 10) break;
result.append(line).append("\n");
}
return start + result.length();
}
private static List<EmailAttachment> extractAttachmentsBasic(String emlContent) {
List<EmailAttachment> attachments = new ArrayList<>();
try {
String[] lines = emlContent.split("\r?\n");
boolean inHeaders = true;
String currentContentType = "";
String currentDisposition = "";
String currentFilename = "";
String currentEncoding = "";
for (String line : lines) {
String lowerLine = line.toLowerCase().trim();
if (line.trim().isEmpty()) {
inHeaders = false;
if (isAttachment(currentDisposition, currentFilename, currentContentType)) {
EmailAttachment attachment = new EmailAttachment();
attachment.setFilename(currentFilename);
attachment.setContentType(currentContentType);
attachment.setTransferEncoding(currentEncoding);
attachments.add(attachment);
}
currentContentType = "";
currentDisposition = "";
currentFilename = "";
currentEncoding = "";
inHeaders = true;
continue;
}
if (!inHeaders) continue;
if (lowerLine.startsWith(HEADER_CONTENT_TYPE)) {
currentContentType = line.substring(HEADER_CONTENT_TYPE.length()).trim();
} else if (lowerLine.startsWith(HEADER_CONTENT_DISPOSITION)) {
currentDisposition = line.substring(HEADER_CONTENT_DISPOSITION.length()).trim();
currentFilename = extractFilenameFromDisposition(currentDisposition);
} else if (lowerLine.startsWith(HEADER_CONTENT_TRANSFER_ENCODING)) {
currentEncoding =
line.substring(HEADER_CONTENT_TRANSFER_ENCODING.length()).trim();
}
}
} catch (RuntimeException e) {
// Continue with empty list
}
return attachments;
}
private static boolean isAttachment(String disposition, String filename, String contentType) {
return (disposition.toLowerCase().contains(DISPOSITION_ATTACHMENT) && !filename.isEmpty())
|| (!filename.isEmpty() && !contentType.toLowerCase().startsWith("text/"))
|| (contentType.toLowerCase().contains("application/") && !filename.isEmpty());
}
private static String extractFilenameFromDisposition(String disposition) {
if (disposition == null || !disposition.contains("filename=")) {
return "";
}
// Handle filename*= (RFC 2231 encoded filename)
if (disposition.toLowerCase().contains("filename*=")) {
int filenameStarStart = disposition.toLowerCase().indexOf("filename*=") + 10;
int filenameStarEnd = disposition.indexOf(";", filenameStarStart);
if (filenameStarEnd == -1) filenameStarEnd = disposition.length();
String extendedFilename =
disposition.substring(filenameStarStart, filenameStarEnd).trim();
extendedFilename = extendedFilename.replaceAll("^\"|\"$", "");
if (extendedFilename.contains("'")) {
String[] parts = extendedFilename.split("'", 3);
if (parts.length == 3) {
return EmlProcessingUtils.decodeUrlEncoded(parts[2]);
}
}
}
// Handle regular filename=
int filenameStart = disposition.toLowerCase().indexOf("filename=") + 9;
int filenameEnd = disposition.indexOf(";", filenameStart);
if (filenameEnd == -1) filenameEnd = disposition.length();
String filename = disposition.substring(filenameStart, filenameEnd).trim();
filename = filename.replaceAll("^\"|\"$", "");
return safeMimeDecode(filename);
}
public static String safeMimeDecode(String headerValue) {
if (headerValue == null || headerValue.trim().isEmpty()) {
return "";
}
if (!mimeUtilityChecked) {
synchronized (EmlParser.class) {
if (!mimeUtilityChecked) {
initializeMimeUtilityDecoding();
}
}
}
if (mimeUtilityDecodeTextMethod != null) {
try {
return (String) mimeUtilityDecodeTextMethod.invoke(null, headerValue.trim());
} catch (ReflectiveOperationException | RuntimeException e) {
// Fall through to custom implementation
}
}
return EmlProcessingUtils.decodeMimeHeader(headerValue.trim());
}
private static void initializeMimeUtilityDecoding() {
try {
Class<?> mimeUtilityClass = Class.forName("jakarta.mail.internet.MimeUtility");
mimeUtilityDecodeTextMethod = mimeUtilityClass.getMethod("decodeText", String.class);
} catch (ClassNotFoundException | NoSuchMethodException e) {
mimeUtilityDecodeTextMethod = null;
}
mimeUtilityChecked = true;
}
@Data
public static class EmailContent {
private String subject;
private String from;
private String to;
private String cc;
private String bcc;
private Date date;
private String dateString; // For basic parsing fallback
private String htmlBody;
private String textBody;
private int attachmentCount;
private List<EmailAttachment> attachments = new ArrayList<>();
public void setHtmlBody(String htmlBody) {
this.htmlBody = htmlBody != null ? htmlBody.replaceAll("\r", "") : null;
}
public void setTextBody(String textBody) {
this.textBody = textBody != null ? textBody.replaceAll("\r", "") : null;
}
}
@Data
public static class EmailAttachment {
private String filename;
private String contentType;
private byte[] data;
private boolean embedded;
private String embeddedFilename;
private long sizeBytes;
private String contentId;
private String disposition;
private String transferEncoding;
public void setData(byte[] data) {
this.data = data;
if (data != null) {
this.sizeBytes = data.length;
}
}
}
}

View File

@ -0,0 +1,601 @@
package stirling.software.common.util;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.experimental.UtilityClass;
import stirling.software.common.model.api.converters.EmlToPdfRequest;
import stirling.software.common.model.api.converters.HTMLToPdfRequest;
@UtilityClass
public class EmlProcessingUtils {
// Style constants
private static final int DEFAULT_FONT_SIZE = 12;
private static final String DEFAULT_FONT_FAMILY = "Helvetica, sans-serif";
private static final float DEFAULT_LINE_HEIGHT = 1.4f;
private static final String DEFAULT_ZOOM = "1.0";
private static final String DEFAULT_TEXT_COLOR = "#202124";
private static final String DEFAULT_BACKGROUND_COLOR = "#ffffff";
private static final String DEFAULT_BORDER_COLOR = "#e8eaed";
private static final String ATTACHMENT_BACKGROUND_COLOR = "#f9f9f9";
private static final String ATTACHMENT_BORDER_COLOR = "#eeeeee";
private static final int EML_CHECK_LENGTH = 8192;
private static final int MIN_HEADER_COUNT_FOR_VALID_EML = 2;
// MIME type detection
private static final Map<String, String> EXTENSION_TO_MIME_TYPE =
Map.of(
".png", "image/png",
".jpg", "image/jpeg",
".jpeg", "image/jpeg",
".gif", "image/gif",
".bmp", "image/bmp",
".webp", "image/webp",
".svg", "image/svg+xml",
".ico", "image/x-icon",
".tiff", "image/tiff",
".tif", "image/tiff");
public static void validateEmlInput(byte[] emlBytes) {
if (emlBytes == null || emlBytes.length == 0) {
throw new IllegalArgumentException("EML file is empty or null");
}
if (isInvalidEmlFormat(emlBytes)) {
throw new IllegalArgumentException("Invalid EML file format");
}
}
private static boolean isInvalidEmlFormat(byte[] emlBytes) {
try {
int checkLength = Math.min(emlBytes.length, EML_CHECK_LENGTH);
String content;
try {
content = new String(emlBytes, 0, checkLength, StandardCharsets.UTF_8);
if (content.contains("\uFFFD")) {
content = new String(emlBytes, 0, checkLength, StandardCharsets.ISO_8859_1);
}
} catch (Exception e) {
content = new String(emlBytes, 0, checkLength, StandardCharsets.ISO_8859_1);
}
String lowerContent = content.toLowerCase(Locale.ROOT);
boolean hasFrom =
lowerContent.contains("from:") || lowerContent.contains("return-path:");
boolean hasSubject = lowerContent.contains("subject:");
boolean hasMessageId = lowerContent.contains("message-id:");
boolean hasDate = lowerContent.contains("date:");
boolean hasTo =
lowerContent.contains("to:")
|| lowerContent.contains("cc:")
|| lowerContent.contains("bcc:");
boolean hasMimeStructure =
lowerContent.contains("multipart/")
|| lowerContent.contains("text/plain")
|| lowerContent.contains("text/html")
|| lowerContent.contains("boundary=");
int headerCount = 0;
if (hasFrom) headerCount++;
if (hasSubject) headerCount++;
if (hasMessageId) headerCount++;
if (hasDate) headerCount++;
if (hasTo) headerCount++;
return headerCount < MIN_HEADER_COUNT_FOR_VALID_EML && !hasMimeStructure;
} catch (RuntimeException e) {
return false;
}
}
public static String generateEnhancedEmailHtml(
EmlParser.EmailContent content,
EmlToPdfRequest request,
CustomHtmlSanitizer customHtmlSanitizer) {
StringBuilder html = new StringBuilder();
html.append(
String.format(
"""
<!DOCTYPE html>
<html lang="en"><head><meta charset="UTF-8">
<title>%s</title>
<style>
""",
sanitizeText(content.getSubject(), customHtmlSanitizer)));
appendEnhancedStyles(html);
html.append(
"""
</style>
</head><body>
""");
html.append(
String.format(
"""
<div class="email-container">
<div class="email-header">
<h1>%s</h1>
<div class="email-meta">
<div><strong>From:</strong> %s</div>
<div><strong>To:</strong> %s</div>
""",
sanitizeText(content.getSubject(), customHtmlSanitizer),
sanitizeText(content.getFrom(), customHtmlSanitizer),
sanitizeText(content.getTo(), customHtmlSanitizer)));
if (content.getCc() != null && !content.getCc().trim().isEmpty()) {
html.append(
String.format(
"<div><strong>CC:</strong> %s</div>\n",
sanitizeText(content.getCc(), customHtmlSanitizer)));
}
if (content.getBcc() != null && !content.getBcc().trim().isEmpty()) {
html.append(
String.format(
"<div><strong>BCC:</strong> %s</div>\n",
sanitizeText(content.getBcc(), customHtmlSanitizer)));
}
if (content.getDate() != null) {
html.append(
String.format(
"<div><strong>Date:</strong> %s</div>\n",
PdfAttachmentHandler.formatEmailDate(content.getDate())));
} else if (content.getDateString() != null && !content.getDateString().trim().isEmpty()) {
html.append(
String.format(
"<div><strong>Date:</strong> %s</div>\n",
sanitizeText(content.getDateString(), customHtmlSanitizer)));
}
html.append("</div></div>\n");
html.append("<div class=\"email-body\">\n");
if (content.getHtmlBody() != null && !content.getHtmlBody().trim().isEmpty()) {
String processedHtml =
processEmailHtmlBody(content.getHtmlBody(), content, customHtmlSanitizer);
html.append(processedHtml);
} else if (content.getTextBody() != null && !content.getTextBody().trim().isEmpty()) {
html.append(
String.format(
"<div class=\"text-body\">%s</div>",
convertTextToHtml(content.getTextBody(), customHtmlSanitizer)));
} else {
html.append("<div class=\"no-content\"><p><em>No content available</em></p></div>");
}
html.append("</div>\n");
if (content.getAttachmentCount() > 0 || !content.getAttachments().isEmpty()) {
appendAttachmentsSection(html, content, request, customHtmlSanitizer);
}
html.append("</div>\n</body></html>");
return html.toString();
}
public static String processEmailHtmlBody(
String htmlBody,
EmlParser.EmailContent emailContent,
CustomHtmlSanitizer customHtmlSanitizer) {
if (htmlBody == null) return "";
String processed =
customHtmlSanitizer != null ? customHtmlSanitizer.sanitize(htmlBody) : htmlBody;
processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*fixed[^;]*;?", "");
processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*absolute[^;]*;?", "");
if (emailContent != null && !emailContent.getAttachments().isEmpty()) {
processed = PdfAttachmentHandler.processInlineImages(processed, emailContent);
}
return processed;
}
public static String convertTextToHtml(
String textBody, CustomHtmlSanitizer customHtmlSanitizer) {
if (textBody == null) return "";
String html =
customHtmlSanitizer != null
? customHtmlSanitizer.sanitize(textBody)
: escapeHtml(textBody);
html = html.replace("\r\n", "\n").replace("\r", "\n");
html = html.replace("\n", "<br>\n");
html =
html.replaceAll(
"(https?://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+)",
"<a href=\"$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
html =
html.replaceAll(
"([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63})",
"<a href=\"mailto:$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
return html;
}
private static void appendEnhancedStyles(StringBuilder html) {
String css =
String.format(
"""
body {
font-family: %s;
font-size: %dpx;
line-height: %s;
color: %s;
margin: 0;
padding: 16px;
background-color: %s;
}
.email-container {
width: 100%%;
max-width: 100%%;
margin: 0 auto;
}
.email-header {
padding-bottom: 10px;
border-bottom: 1px solid %s;
margin-bottom: 10px;
}
.email-header h1 {
margin: 0 0 10px 0;
font-size: %dpx;
font-weight: bold;
}
.email-meta div {
margin-bottom: 2px;
font-size: %dpx;
}
.email-body {
word-wrap: break-word;
}
.attachment-section {
margin-top: 15px;
padding: 10px;
background-color: %s;
border: 1px solid %s;
border-radius: 3px;
}
.attachment-section h3 {
margin: 0 0 8px 0;
font-size: %dpx;
}
.attachment-item {
padding: 5px 0;
}
.attachment-icon {
margin-right: 5px;
}
.attachment-details, .attachment-type {
font-size: %dpx;
color: #555555;
}
.attachment-inclusion-note, .attachment-info-note {
margin-top: 8px;
padding: 6px;
font-size: %dpx;
border-radius: 3px;
}
.attachment-inclusion-note {
background-color: #e6ffed;
border: 1px solid #d4f7dc;
color: #006420;
}
.attachment-info-note {
background-color: #fff9e6;
border: 1px solid #fff0c2;
color: #664d00;
}
.attachment-link-container {
display: flex;
align-items: center;
padding: 8px;
background-color: #f8f9fa;
border: 1px solid #dee2e6;
border-radius: 4px;
margin: 4px 0;
}
.attachment-link-container:hover {
background-color: #e9ecef;
}
.attachment-note {
font-size: %dpx;
color: #6c757d;
font-style: italic;
margin-left: 8px;
}
.no-content {
padding: 20px;
text-align: center;
color: #666;
font-style: italic;
}
.text-body {
white-space: pre-wrap;
}
img {
max-width: 100%%;
height: auto;
display: block;
}
""",
DEFAULT_FONT_FAMILY,
DEFAULT_FONT_SIZE,
DEFAULT_LINE_HEIGHT,
DEFAULT_TEXT_COLOR,
DEFAULT_BACKGROUND_COLOR,
DEFAULT_BORDER_COLOR,
DEFAULT_FONT_SIZE + 4,
DEFAULT_FONT_SIZE - 1,
ATTACHMENT_BACKGROUND_COLOR,
ATTACHMENT_BORDER_COLOR,
DEFAULT_FONT_SIZE + 1,
DEFAULT_FONT_SIZE - 2,
DEFAULT_FONT_SIZE - 2,
DEFAULT_FONT_SIZE - 3);
html.append(css);
}
private static void appendAttachmentsSection(
StringBuilder html,
EmlParser.EmailContent content,
EmlToPdfRequest request,
CustomHtmlSanitizer customHtmlSanitizer) {
html.append("<div class=\"attachment-section\">\n");
int displayedAttachmentCount =
content.getAttachmentCount() > 0
? content.getAttachmentCount()
: content.getAttachments().size();
html.append("<h3>Attachments (").append(displayedAttachmentCount).append(")</h3>\n");
if (!content.getAttachments().isEmpty()) {
for (int i = 0; i < content.getAttachments().size(); i++) {
EmlParser.EmailAttachment attachment = content.getAttachments().get(i);
String embeddedFilename =
attachment.getFilename() != null
? attachment.getFilename()
: ("attachment_" + i);
attachment.setEmbeddedFilename(embeddedFilename);
String sizeStr = GeneralUtils.formatBytes(attachment.getSizeBytes());
String contentType =
attachment.getContentType() != null
&& !attachment.getContentType().isEmpty()
? ", " + escapeHtml(attachment.getContentType())
: "";
String attachmentId = "attachment_" + i;
html.append(
String.format(
"""
<div class="attachment-item" id="%s">
<span class="attachment-icon" data-filename="%s">@</span>
<span class="attachment-name">%s</span>
<span class="attachment-details">(%s%s)</span>
</div>
""",
attachmentId,
escapeHtml(embeddedFilename),
escapeHtml(EmlParser.safeMimeDecode(attachment.getFilename())),
sizeStr,
contentType));
}
}
if (request != null && request.isIncludeAttachments()) {
html.append(
"""
<div class="attachment-info-note">
<p><em>Attachments are embedded in the file.</em></p>
</div>
""");
} else {
html.append(
"""
<div class="attachment-info-note">
<p><em>Attachment information displayed - files not included in PDF.</em></p>
</div>
""");
}
html.append("</div>\n");
}
public static HTMLToPdfRequest createHtmlRequest(EmlToPdfRequest request) {
HTMLToPdfRequest htmlRequest = new HTMLToPdfRequest();
if (request != null) {
htmlRequest.setFileInput(request.getFileInput());
}
htmlRequest.setZoom(Float.parseFloat(DEFAULT_ZOOM));
return htmlRequest;
}
public static String detectMimeType(String filename, String existingMimeType) {
if (existingMimeType != null && !existingMimeType.isEmpty()) {
return existingMimeType;
}
if (filename != null) {
String lowerFilename = filename.toLowerCase();
for (Map.Entry<String, String> entry : EXTENSION_TO_MIME_TYPE.entrySet()) {
if (lowerFilename.endsWith(entry.getKey())) {
return entry.getValue();
}
}
}
return "image/png";
}
public static String decodeUrlEncoded(String encoded) {
try {
return java.net.URLDecoder.decode(encoded, StandardCharsets.UTF_8);
} catch (Exception e) {
return encoded; // Return original if decoding fails
}
}
public static String decodeMimeHeader(String encodedText) {
if (encodedText == null || encodedText.trim().isEmpty()) {
return encodedText;
}
try {
StringBuilder result = new StringBuilder();
Pattern concatenatedPattern =
Pattern.compile(
"(=\\?[^?]+\\?[BbQq]\\?[^?]*\\?=)(\\s*=\\?[^?]+\\?[BbQq]\\?[^?]*\\?=)+");
Matcher concatenatedMatcher = concatenatedPattern.matcher(encodedText);
String processedText =
concatenatedMatcher.replaceAll(
match -> match.group().replaceAll("\\s+(?==\\?)", ""));
Pattern mimePattern = Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
Matcher matcher = mimePattern.matcher(processedText);
int lastEnd = 0;
while (matcher.find()) {
result.append(processedText, lastEnd, matcher.start());
String charset = matcher.group(1);
String encoding = matcher.group(2).toUpperCase();
String encodedValue = matcher.group(3);
try {
String decodedValue =
switch (encoding) {
case "B" -> {
String cleanBase64 = encodedValue.replaceAll("\\s", "");
byte[] decodedBytes = Base64.getDecoder().decode(cleanBase64);
Charset targetCharset;
try {
targetCharset = Charset.forName(charset);
} catch (Exception e) {
targetCharset = StandardCharsets.UTF_8;
}
yield new String(decodedBytes, targetCharset);
}
case "Q" -> decodeQuotedPrintable(encodedValue, charset);
default -> matcher.group(0); // Return original if unknown encoding
};
result.append(decodedValue);
} catch (RuntimeException e) {
result.append(matcher.group(0)); // Keep original on decode error
}
lastEnd = matcher.end();
}
result.append(processedText.substring(lastEnd));
return result.toString();
} catch (Exception e) {
return encodedText; // Return original on any parsing error
}
}
private static String decodeQuotedPrintable(String encodedText, String charset) {
StringBuilder result = new StringBuilder();
for (int i = 0; i < encodedText.length(); i++) {
char c = encodedText.charAt(i);
switch (c) {
case '=' -> {
if (i + 2 < encodedText.length()) {
String hex = encodedText.substring(i + 1, i + 3);
try {
int value = Integer.parseInt(hex, 16);
result.append((char) value);
i += 2;
} catch (NumberFormatException e) {
result.append(c);
}
} else if (i + 1 == encodedText.length()
|| (i + 2 == encodedText.length()
&& encodedText.charAt(i + 1) == '\n')) {
if (i + 1 < encodedText.length() && encodedText.charAt(i + 1) == '\n') {
i++; // Skip the newline too
}
} else {
result.append(c);
}
}
case '_' -> result.append(' '); // Space encoding in Q encoding
default -> result.append(c);
}
}
byte[] bytes = result.toString().getBytes(StandardCharsets.ISO_8859_1);
try {
Charset targetCharset = Charset.forName(charset);
return new String(bytes, targetCharset);
} catch (Exception e) {
try {
return new String(bytes, StandardCharsets.UTF_8);
} catch (Exception fallbackException) {
return new String(bytes, StandardCharsets.ISO_8859_1);
}
}
}
public static String escapeHtml(String text) {
if (text == null) return "";
return text.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace("\"", "&quot;")
.replace("'", "&#39;");
}
public static String sanitizeText(String text, CustomHtmlSanitizer customHtmlSanitizer) {
if (customHtmlSanitizer != null) {
return customHtmlSanitizer.sanitize(text);
} else {
return escapeHtml(text);
}
}
public static String simplifyHtmlContent(String htmlContent) {
String simplified = htmlContent.replaceAll("(?i)<script[^>]*>.*?</script>", "");
simplified = simplified.replaceAll("(?i)<style[^>]*>.*?</style>", "");
return simplified;
}
}

View File

@ -0,0 +1,667 @@
package stirling.software.common.util;
import static stirling.software.common.util.AttachmentUtils.setCatalogViewerPreferences;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PageMode;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import org.jetbrains.annotations.NotNull;
import org.springframework.web.multipart.MultipartFile;
import lombok.Data;
import lombok.Getter;
import lombok.experimental.UtilityClass;
import stirling.software.common.service.CustomPDFDocumentFactory;
@UtilityClass
public class PdfAttachmentHandler {
// Note: This class is designed for EML attachments, not general PDF attachments.
private static final String ATTACHMENT_MARKER = "@";
private static final float ATTACHMENT_ICON_WIDTH = 12f;
private static final float ATTACHMENT_ICON_HEIGHT = 14f;
private static final float ANNOTATION_X_OFFSET = 2f;
private static final float ANNOTATION_Y_OFFSET = 10f;
public static byte[] attachFilesToPdf(
byte[] pdfBytes,
List<EmlParser.EmailAttachment> attachments,
CustomPDFDocumentFactory pdfDocumentFactory)
throws IOException {
if (attachments == null || attachments.isEmpty()) {
return pdfBytes;
}
try (PDDocument document = pdfDocumentFactory.load(pdfBytes);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
List<MultipartFile> multipartAttachments = new ArrayList<>();
for (int i = 0; i < attachments.size(); i++) {
EmlParser.EmailAttachment attachment = attachments.get(i);
if (attachment.getData() != null && attachment.getData().length > 0) {
String embeddedFilename =
attachment.getFilename() != null
? attachment.getFilename()
: ("attachment_" + i);
attachment.setEmbeddedFilename(embeddedFilename);
multipartAttachments.add(createMultipartFile(attachment));
}
}
if (!multipartAttachments.isEmpty()) {
Map<Integer, String> indexToFilenameMap =
addAttachmentsToDocumentWithMapping(
document, multipartAttachments, attachments);
setCatalogViewerPreferences(document, PageMode.USE_ATTACHMENTS);
addAttachmentAnnotationsToDocumentWithMapping(
document, attachments, indexToFilenameMap);
}
document.save(outputStream);
return outputStream.toByteArray();
} catch (RuntimeException e) {
throw new IOException(
"Invalid PDF structure or processing error: " + e.getMessage(), e);
} catch (Exception e) {
throw new IOException("Error attaching files to PDF: " + e.getMessage(), e);
}
}
private static MultipartFile createMultipartFile(EmlParser.EmailAttachment attachment) {
return new MultipartFile() {
@Override
public @NotNull String getName() {
return "attachment";
}
@Override
public String getOriginalFilename() {
return attachment.getFilename() != null
? attachment.getFilename()
: "attachment_" + System.currentTimeMillis();
}
@Override
public String getContentType() {
return attachment.getContentType() != null
? attachment.getContentType()
: "application/octet-stream";
}
@Override
public boolean isEmpty() {
return attachment.getData() == null || attachment.getData().length == 0;
}
@Override
public long getSize() {
return attachment.getData() != null ? attachment.getData().length : 0;
}
@Override
public byte @NotNull [] getBytes() {
return attachment.getData() != null ? attachment.getData() : new byte[0];
}
@Override
public @NotNull InputStream getInputStream() {
byte[] data = attachment.getData();
return new ByteArrayInputStream(data != null ? data : new byte[0]);
}
@Override
public void transferTo(@NotNull File dest) throws IOException, IllegalStateException {
try (FileOutputStream fos = new FileOutputStream(dest)) {
byte[] data = attachment.getData();
if (data != null) {
fos.write(data);
}
}
}
};
}
private static String ensureUniqueFilename(String filename, Set<String> existingNames) {
if (!existingNames.contains(filename)) {
return filename;
}
String baseName;
String extension = "";
int lastDot = filename.lastIndexOf('.');
if (lastDot > 0) {
baseName = filename.substring(0, lastDot);
extension = filename.substring(lastDot);
} else {
baseName = filename;
}
int counter = 1;
String uniqueName;
do {
uniqueName = baseName + "_" + counter + extension;
counter++;
} while (existingNames.contains(uniqueName));
return uniqueName;
}
private static @NotNull PDRectangle calculateAnnotationRectangle(
PDPage page, float x, float y) {
PDRectangle cropBox = page.getCropBox();
// ISO 32000-1:2008 Section 8.3: PDF coordinate system transforms
int rotation = page.getRotation();
float pdfX = x;
float pdfY = cropBox.getHeight() - y;
switch (rotation) {
case 90 -> {
float temp = pdfX;
pdfX = pdfY;
pdfY = cropBox.getWidth() - temp;
}
case 180 -> {
pdfX = cropBox.getWidth() - pdfX;
pdfY = y;
}
case 270 -> {
float temp = pdfX;
pdfX = cropBox.getHeight() - pdfY;
pdfY = temp;
}
default -> {}
}
float iconHeight = ATTACHMENT_ICON_HEIGHT;
float paddingX = 2.0f;
float paddingY = 2.0f;
PDRectangle rect =
new PDRectangle(
pdfX + ANNOTATION_X_OFFSET + paddingX,
pdfY - iconHeight + ANNOTATION_Y_OFFSET + paddingY,
ATTACHMENT_ICON_WIDTH,
iconHeight);
PDRectangle mediaBox = page.getMediaBox();
if (rect.getLowerLeftX() < mediaBox.getLowerLeftX()
|| rect.getLowerLeftY() < mediaBox.getLowerLeftY()
|| rect.getUpperRightX() > mediaBox.getUpperRightX()
|| rect.getUpperRightY() > mediaBox.getUpperRightY()) {
float adjustedX =
Math.max(
mediaBox.getLowerLeftX(),
Math.min(
rect.getLowerLeftX(),
mediaBox.getUpperRightX() - rect.getWidth()));
float adjustedY =
Math.max(
mediaBox.getLowerLeftY(),
Math.min(
rect.getLowerLeftY(),
mediaBox.getUpperRightY() - rect.getHeight()));
rect = new PDRectangle(adjustedX, adjustedY, rect.getWidth(), rect.getHeight());
}
return rect;
}
public static String processInlineImages(
String htmlContent, EmlParser.EmailContent emailContent) {
if (htmlContent == null || emailContent == null) return htmlContent;
Map<String, EmlParser.EmailAttachment> contentIdMap = new HashMap<>();
for (EmlParser.EmailAttachment attachment : emailContent.getAttachments()) {
if (attachment.isEmbedded()
&& attachment.getContentId() != null
&& attachment.getData() != null) {
contentIdMap.put(attachment.getContentId(), attachment);
}
}
if (contentIdMap.isEmpty()) return htmlContent;
Pattern cidPattern =
Pattern.compile(
"(?i)<img[^>]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>",
Pattern.CASE_INSENSITIVE);
Matcher matcher = cidPattern.matcher(htmlContent);
StringBuilder result = new StringBuilder();
while (matcher.find()) {
String contentId = matcher.group(1);
EmlParser.EmailAttachment attachment = contentIdMap.get(contentId);
if (attachment != null && attachment.getData() != null) {
String mimeType =
EmlProcessingUtils.detectMimeType(
attachment.getFilename(), attachment.getContentType());
String base64Data = Base64.getEncoder().encodeToString(attachment.getData());
String dataUri = "data:" + mimeType + ";base64," + base64Data;
String replacement =
matcher.group(0).replaceFirst("cid:" + Pattern.quote(contentId), dataUri);
matcher.appendReplacement(result, Matcher.quoteReplacement(replacement));
} else {
matcher.appendReplacement(result, Matcher.quoteReplacement(matcher.group(0)));
}
}
matcher.appendTail(result);
return result.toString();
}
public static String formatEmailDate(Date date) {
if (date == null) return "";
SimpleDateFormat formatter =
new SimpleDateFormat("EEE, MMM d, yyyy 'at' h:mm a z", Locale.ENGLISH);
formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
return formatter.format(date);
}
@Data
public static class MarkerPosition {
private int pageIndex;
private float x;
private float y;
private String character;
private String filename;
public MarkerPosition(int pageIndex, float x, float y, String character, String filename) {
this.pageIndex = pageIndex;
this.x = x;
this.y = y;
this.character = character;
this.filename = filename;
}
}
public static class AttachmentMarkerPositionFinder extends PDFTextStripper {
@Getter private final List<MarkerPosition> positions = new ArrayList<>();
private int currentPageIndex;
protected boolean sortByPosition;
private boolean isInAttachmentSection;
private boolean attachmentSectionFound;
private final StringBuilder currentText = new StringBuilder();
private static final Pattern ATTACHMENT_SECTION_PATTERN =
Pattern.compile("attachments\\s*\\(\\d+\\)", Pattern.CASE_INSENSITIVE);
private static final Pattern FILENAME_PATTERN =
Pattern.compile("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)");
public AttachmentMarkerPositionFinder() {
super();
this.currentPageIndex = 0;
this.sortByPosition = false; // Disable sorting to preserve document order
this.isInAttachmentSection = false;
this.attachmentSectionFound = false;
}
@Override
public String getText(PDDocument document) throws IOException {
super.getText(document);
if (sortByPosition) {
positions.sort(
(a, b) -> {
int pageCompare = Integer.compare(a.getPageIndex(), b.getPageIndex());
if (pageCompare != 0) return pageCompare;
return Float.compare(
b.getY(), a.getY()); // Descending Y per PDF coordinate system
});
}
return ""; // Return empty string as we only need positions
}
@Override
protected void startPage(PDPage page) throws IOException {
super.startPage(page);
}
@Override
protected void endPage(PDPage page) throws IOException {
currentPageIndex++;
super.endPage(page);
}
@Override
protected void writeString(String string, List<TextPosition> textPositions)
throws IOException {
String lowerString = string.toLowerCase();
if (ATTACHMENT_SECTION_PATTERN.matcher(lowerString).find()) {
isInAttachmentSection = true;
attachmentSectionFound = true;
}
if (isInAttachmentSection
&& (lowerString.contains("</body>")
|| lowerString.contains("</html>")
|| (attachmentSectionFound
&& lowerString.trim().isEmpty()
&& string.length() > 50))) {
isInAttachmentSection = false;
}
if (isInAttachmentSection) {
currentText.append(string);
for (int i = 0; (i = string.indexOf(ATTACHMENT_MARKER, i)) != -1; i++) {
if (i < textPositions.size()) {
TextPosition textPosition = textPositions.get(i);
String filename = extractFilenameAfterMarker(string, i);
MarkerPosition position =
new MarkerPosition(
currentPageIndex,
textPosition.getXDirAdj(),
textPosition.getYDirAdj(),
ATTACHMENT_MARKER,
filename);
positions.add(position);
}
}
}
super.writeString(string, textPositions);
}
@Override
public void setSortByPosition(boolean sortByPosition) {
this.sortByPosition = sortByPosition;
}
private String extractFilenameAfterMarker(String text, int markerIndex) {
String afterMarker = text.substring(markerIndex + 1);
Matcher matcher = FILENAME_PATTERN.matcher("@" + afterMarker);
if (matcher.find()) {
return matcher.group(1);
}
String[] parts = afterMarker.split("[\\s\\(\\)]+");
for (String part : parts) {
part = part.trim();
if (part.length() > 3 && part.contains(".")) {
return part;
}
}
return null;
}
}
private static Map<Integer, String> addAttachmentsToDocumentWithMapping(
PDDocument document,
List<MultipartFile> attachments,
List<EmlParser.EmailAttachment> originalAttachments)
throws IOException {
PDDocumentCatalog catalog = document.getDocumentCatalog();
if (catalog == null) {
throw new IOException("PDF document catalog is not accessible");
}
PDDocumentNameDictionary documentNames = catalog.getNames();
if (documentNames == null) {
documentNames = new PDDocumentNameDictionary(catalog);
catalog.setNames(documentNames);
}
PDEmbeddedFilesNameTreeNode embeddedFilesTree = documentNames.getEmbeddedFiles();
if (embeddedFilesTree == null) {
embeddedFilesTree = new PDEmbeddedFilesNameTreeNode();
documentNames.setEmbeddedFiles(embeddedFilesTree);
}
Map<String, PDComplexFileSpecification> existingNames = embeddedFilesTree.getNames();
if (existingNames == null) {
existingNames = new HashMap<>();
}
Map<Integer, String> indexToFilenameMap = new HashMap<>();
for (int i = 0; i < attachments.size(); i++) {
MultipartFile attachment = attachments.get(i);
String filename = attachment.getOriginalFilename();
if (filename == null || filename.trim().isEmpty()) {
filename = "attachment_" + i;
}
String normalizedFilename =
java.text.Normalizer.normalize(filename, java.text.Normalizer.Form.NFC);
String uniqueFilename =
ensureUniqueFilename(normalizedFilename, existingNames.keySet());
indexToFilenameMap.put(i, uniqueFilename);
PDEmbeddedFile embeddedFile = new PDEmbeddedFile(document, attachment.getInputStream());
embeddedFile.setSize((int) attachment.getSize());
GregorianCalendar currentTime = new GregorianCalendar();
embeddedFile.setCreationDate(currentTime);
embeddedFile.setModDate(currentTime);
String contentType = attachment.getContentType();
if (contentType != null && !contentType.trim().isEmpty()) {
embeddedFile.setSubtype(contentType);
}
PDComplexFileSpecification fileSpecification = new PDComplexFileSpecification();
fileSpecification.setFile(uniqueFilename);
fileSpecification.setFileUnicode(uniqueFilename);
fileSpecification.setEmbeddedFile(embeddedFile);
fileSpecification.setEmbeddedFileUnicode(embeddedFile);
existingNames.put(uniqueFilename, fileSpecification);
}
embeddedFilesTree.setNames(existingNames);
documentNames.setEmbeddedFiles(embeddedFilesTree);
catalog.setNames(documentNames);
return indexToFilenameMap;
}
private static void addAttachmentAnnotationsToDocumentWithMapping(
PDDocument document,
List<EmlParser.EmailAttachment> attachments,
Map<Integer, String> indexToFilenameMap)
throws IOException {
if (document.getNumberOfPages() == 0 || attachments == null || attachments.isEmpty()) {
return;
}
AttachmentMarkerPositionFinder finder = new AttachmentMarkerPositionFinder();
finder.setSortByPosition(false); // Keep document order to maintain pairing
finder.getText(document);
List<MarkerPosition> markerPositions = finder.getPositions();
int annotationsToAdd = Math.min(markerPositions.size(), attachments.size());
for (int i = 0; i < annotationsToAdd; i++) {
MarkerPosition position = markerPositions.get(i);
String filenameNearMarker = position.getFilename();
EmlParser.EmailAttachment matchingAttachment =
findAttachmentByFilename(attachments, filenameNearMarker);
if (matchingAttachment != null) {
String embeddedFilename =
findEmbeddedFilenameForAttachment(matchingAttachment, indexToFilenameMap);
if (embeddedFilename != null) {
PDPage page = document.getPage(position.getPageIndex());
addAttachmentAnnotationToPageWithMapping(
document,
page,
matchingAttachment,
embeddedFilename,
position.getX(),
position.getY(),
i);
} else {
// No embedded filename found for attachment
}
} else {
// No matching attachment found for filename near marker
}
}
}
private static EmlParser.EmailAttachment findAttachmentByFilename(
List<EmlParser.EmailAttachment> attachments, String targetFilename) {
if (targetFilename == null || targetFilename.trim().isEmpty()) {
return null;
}
String normalizedTarget = normalizeFilename(targetFilename);
// First try exact match
for (EmlParser.EmailAttachment attachment : attachments) {
if (attachment.getFilename() != null) {
String normalizedAttachment = normalizeFilename(attachment.getFilename());
if (normalizedAttachment.equals(normalizedTarget)) {
return attachment;
}
}
}
// Then try contains match
for (EmlParser.EmailAttachment attachment : attachments) {
if (attachment.getFilename() != null) {
String normalizedAttachment = normalizeFilename(attachment.getFilename());
if (normalizedAttachment.contains(normalizedTarget)
|| normalizedTarget.contains(normalizedAttachment)) {
return attachment;
}
}
}
return null;
}
private static String findEmbeddedFilenameForAttachment(
EmlParser.EmailAttachment attachment, Map<Integer, String> indexToFilenameMap) {
String attachmentFilename = attachment.getFilename();
if (attachmentFilename == null) {
return null;
}
for (Map.Entry<Integer, String> entry : indexToFilenameMap.entrySet()) {
String embeddedFilename = entry.getValue();
if (embeddedFilename != null
&& (embeddedFilename.equals(attachmentFilename)
|| embeddedFilename.contains(attachmentFilename)
|| attachmentFilename.contains(embeddedFilename))) {
return embeddedFilename;
}
}
return null;
}
private static String normalizeFilename(String filename) {
if (filename == null) return "";
return filename.toLowerCase()
.trim()
.replaceAll("\\s+", " ")
.replaceAll("[^a-zA-Z0-9._-]", "");
}
private static void addAttachmentAnnotationToPageWithMapping(
PDDocument document,
PDPage page,
EmlParser.EmailAttachment attachment,
String embeddedFilename,
float x,
float y,
int attachmentIndex)
throws IOException {
PDAnnotationFileAttachment fileAnnotation = new PDAnnotationFileAttachment();
PDRectangle rect = calculateAnnotationRectangle(page, x, y);
fileAnnotation.setRectangle(rect);
fileAnnotation.setPrinted(false);
fileAnnotation.setHidden(false);
fileAnnotation.setNoView(false);
fileAnnotation.setNoZoom(true);
fileAnnotation.setNoRotate(true);
try {
PDAppearanceDictionary appearance = new PDAppearanceDictionary();
PDAppearanceStream normalAppearance = new PDAppearanceStream(document);
normalAppearance.setBBox(new PDRectangle(0, 0, rect.getWidth(), rect.getHeight()));
appearance.setNormalAppearance(normalAppearance);
fileAnnotation.setAppearance(appearance);
} catch (RuntimeException e) {
fileAnnotation.setAppearance(null);
}
PDEmbeddedFilesNameTreeNode efTree =
document.getDocumentCatalog().getNames().getEmbeddedFiles();
if (efTree != null) {
Map<String, PDComplexFileSpecification> efMap = efTree.getNames();
if (efMap != null) {
PDComplexFileSpecification fileSpec = efMap.get(embeddedFilename);
if (fileSpec != null) {
fileAnnotation.setFile(fileSpec);
} else {
// Could not find embedded file
}
}
}
fileAnnotation.setContents(
"Attachment " + (attachmentIndex + 1) + ": " + attachment.getFilename());
fileAnnotation.setAnnotationName(
"EmbeddedFile_" + attachmentIndex + "_" + embeddedFilename);
page.getAnnotations().add(fileAnnotation);
}
}