diff --git a/app/common/build.gradle b/app/common/build.gradle
index e5bc8b274..169f7a503 100644
--- a/app/common/build.gradle
+++ b/app/common/build.gradle
@@ -44,6 +44,9 @@ dependencies {
api 'jakarta.servlet:jakarta.servlet-api:6.1.0'
api 'org.snakeyaml:snakeyaml-engine:2.10'
api "org.springdoc:springdoc-openapi-starter-webmvc-ui:2.8.14"
+ // Simple Java Mail for EML/MSG parsing (replaces direct Angus Mail usage)
+ api 'org.simplejavamail:simple-java-mail:8.12.6'
+ api 'org.simplejavamail:outlook-module:8.12.6' // MSG file support
api 'jakarta.mail:jakarta.mail-api:2.1.5'
runtimeOnly 'org.eclipse.angus:angus-mail:2.0.5'
}
diff --git a/app/common/src/main/java/stirling/software/common/util/EmlParser.java b/app/common/src/main/java/stirling/software/common/util/EmlParser.java
index 642bc3a5e..bdc5cc3d0 100644
--- a/app/common/src/main/java/stirling/software/common/util/EmlParser.java
+++ b/app/common/src/main/java/stirling/software/common/util/EmlParser.java
@@ -1,651 +1,417 @@
package stirling.software.common.util;
import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Method;
-import java.nio.charset.StandardCharsets;
-import java.time.ZoneId;
+import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Locale;
-import java.util.Properties;
-import java.util.regex.Pattern;
+import java.util.Objects;
+import java.util.stream.Collectors;
-import org.springframework.http.MediaType;
+import org.simplejavamail.api.email.AttachmentResource;
+import org.simplejavamail.api.email.Email;
+import org.simplejavamail.api.email.Recipient;
+import org.simplejavamail.converter.EmailConverter;
+
+import jakarta.activation.DataSource;
+import jakarta.mail.Message.RecipientType;
import lombok.Data;
import lombok.experimental.UtilityClass;
+import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.api.converters.EmlToPdfRequest;
+@Slf4j
@UtilityClass
public class EmlParser {
- private static volatile Boolean jakartaMailAvailable = null;
- private static volatile Method mimeUtilityDecodeTextMethod = null;
- private static volatile boolean mimeUtilityChecked = false;
+ // Configuration constants
+ private final int DEFAULT_MAX_ATTACHMENT_MB = 10;
+ private final long MAX_SIZE_ESTIMATION_BYTES = 500L * 1024 * 1024; // 500MB
- private static final Pattern MIME_ENCODED_PATTERN =
- RegexPatternUtils.getInstance().getMimeEncodedWordPattern();
+ // Message constants
+ private final String NO_CONTENT_MESSAGE = "Email content could not be parsed";
+ private final String ATTACHMENT_PREFIX = "attachment-";
- private static final String DISPOSITION_ATTACHMENT = "attachment";
- private static final String TEXT_PLAIN = MediaType.TEXT_PLAIN_VALUE;
- private static final String TEXT_HTML = MediaType.TEXT_HTML_VALUE;
- private static final String MULTIPART_PREFIX = "multipart/";
-
- private static final String HEADER_CONTENT_TYPE = "content-type:";
- private static final String HEADER_CONTENT_DISPOSITION = "content-disposition:";
- private static final String HEADER_CONTENT_TRANSFER_ENCODING = "content-transfer-encoding:";
- private static final String HEADER_CONTENT_ID = "Content-ID";
- private static final String HEADER_SUBJECT = "Subject:";
- private static final String HEADER_FROM = "From:";
- private static final String HEADER_TO = "To:";
- private static final String HEADER_CC = "Cc:";
- private static final String HEADER_BCC = "Bcc:";
- private static final String HEADER_DATE = "Date:";
-
- private static synchronized boolean isJakartaMailAvailable() {
- if (jakartaMailAvailable == null) {
- try {
- Class.forName("jakarta.mail.internet.MimeMessage");
- Class.forName("jakarta.mail.Session");
- Class.forName("jakarta.mail.internet.MimeUtility");
- Class.forName("jakarta.mail.internet.MimePart");
- Class.forName("jakarta.mail.internet.MimeMultipart");
- Class.forName("jakarta.mail.Multipart");
- Class.forName("jakarta.mail.Part");
- jakartaMailAvailable = true;
- } catch (ClassNotFoundException e) {
- jakartaMailAvailable = false;
- }
- }
- return jakartaMailAvailable;
- }
-
- public static EmailContent extractEmailContent(
+ public EmailContent extractEmailContent(
byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer)
throws IOException {
+
EmlProcessingUtils.validateEmlInput(emlBytes);
- if (isJakartaMailAvailable()) {
- return extractEmailContentAdvanced(emlBytes, request, customHtmlSanitizer);
- } else {
- return extractEmailContentBasic(emlBytes, customHtmlSanitizer);
- }
+ Email email = parseEmail(emlBytes);
+ return buildEmailContent(email, request, customHtmlSanitizer);
}
- private static EmailContent extractEmailContentBasic(
- byte[] emlBytes, CustomHtmlSanitizer customHtmlSanitizer) {
- String emlContent = new String(emlBytes, StandardCharsets.UTF_8);
- EmailContent content = new EmailContent();
-
- content.setSubject(extractBasicHeader(emlContent, HEADER_SUBJECT));
- content.setFrom(extractBasicHeader(emlContent, HEADER_FROM));
- content.setTo(extractBasicHeader(emlContent, HEADER_TO));
- content.setCc(extractBasicHeader(emlContent, HEADER_CC));
- content.setBcc(extractBasicHeader(emlContent, HEADER_BCC));
-
- String dateStr = extractBasicHeader(emlContent, HEADER_DATE);
- if (!dateStr.isEmpty()) {
- content.setDateString(dateStr);
- }
-
- String htmlBody = extractHtmlBody(emlContent);
- if (htmlBody != null) {
- content.setHtmlBody(htmlBody);
- } else {
- String textBody = extractTextBody(emlContent);
- content.setTextBody(textBody != null ? textBody : "Email content could not be parsed");
- }
-
- content.getAttachments().addAll(extractAttachmentsBasic(emlContent));
-
- return content;
- }
-
- private static EmailContent extractEmailContentAdvanced(
- byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) {
- try {
- Class> sessionClass = Class.forName("jakarta.mail.Session");
- Class> mimeMessageClass = Class.forName("jakarta.mail.internet.MimeMessage");
-
- Method getDefaultInstance =
- sessionClass.getMethod("getDefaultInstance", Properties.class);
- Object session = getDefaultInstance.invoke(null, new Properties());
-
- Class>[] constructorArgs = new Class>[] {sessionClass, InputStream.class};
- Constructor> mimeMessageConstructor =
- mimeMessageClass.getConstructor(constructorArgs);
- Object message =
- mimeMessageConstructor.newInstance(session, new ByteArrayInputStream(emlBytes));
-
- return extractFromMimeMessage(message, request, customHtmlSanitizer);
-
- } catch (ReflectiveOperationException e) {
- return extractEmailContentBasic(emlBytes, customHtmlSanitizer);
- }
- }
-
- private static EmailContent extractFromMimeMessage(
- Object message, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) {
- EmailContent content = new EmailContent();
-
- try {
- Class> messageClass = message.getClass();
-
- Method getSubject = messageClass.getMethod("getSubject");
- String subject = (String) getSubject.invoke(message);
- content.setSubject(subject != null ? safeMimeDecode(subject) : "No Subject");
-
- Method getFrom = messageClass.getMethod("getFrom");
- Object[] fromAddresses = (Object[]) getFrom.invoke(message);
- content.setFrom(buildAddressString(fromAddresses));
-
- extractRecipients(message, messageClass, content);
-
- Method getSentDate = messageClass.getMethod("getSentDate");
- Date legacyDate = (Date) getSentDate.invoke(message);
- if (legacyDate != null) {
- content.setDate(
- ZonedDateTime.ofInstant(legacyDate.toInstant(), ZoneId.systemDefault()));
- }
-
- Method getContent = messageClass.getMethod("getContent");
- Object messageContent = getContent.invoke(message);
-
- processMessageContent(message, messageContent, content, request, customHtmlSanitizer);
-
- } catch (ReflectiveOperationException | RuntimeException e) {
- content.setSubject("Email Conversion");
- content.setFrom("Unknown");
- content.setTo("Unknown");
- content.setCc("");
- content.setBcc("");
- content.setTextBody("Email content could not be parsed with advanced processing");
- }
-
- return content;
- }
-
- private static void extractRecipients(
- Object message, Class> messageClass, EmailContent content) {
- try {
- Method getRecipients =
- messageClass.getMethod(
- "getRecipients", Class.forName("jakarta.mail.Message$RecipientType"));
- Class> recipientTypeClass = Class.forName("jakarta.mail.Message$RecipientType");
-
- Object toType = recipientTypeClass.getField("TO").get(null);
- Object[] toRecipients = (Object[]) getRecipients.invoke(message, toType);
- content.setTo(buildAddressString(toRecipients));
-
- Object ccType = recipientTypeClass.getField("CC").get(null);
- Object[] ccRecipients = (Object[]) getRecipients.invoke(message, ccType);
- content.setCc(buildAddressString(ccRecipients));
-
- Object bccType = recipientTypeClass.getField("BCC").get(null);
- Object[] bccRecipients = (Object[]) getRecipients.invoke(message, bccType);
- content.setBcc(buildAddressString(bccRecipients));
-
- } catch (ReflectiveOperationException e) {
- try {
- Method getAllRecipients = messageClass.getMethod("getAllRecipients");
- Object[] recipients = (Object[]) getAllRecipients.invoke(message);
- content.setTo(buildAddressString(recipients));
- content.setCc("");
- content.setBcc("");
- } catch (ReflectiveOperationException ex) {
- content.setTo("");
- content.setCc("");
- content.setBcc("");
- }
- }
- }
-
- private static String buildAddressString(Object[] addresses) {
- if (addresses == null || addresses.length == 0) {
- return "";
- }
-
- StringBuilder builder = new StringBuilder();
- for (int i = 0; i < addresses.length; i++) {
- if (i > 0) builder.append(", ");
- builder.append(safeMimeDecode(addresses[i].toString()));
- }
- return builder.toString();
- }
-
- private static void processMessageContent(
- Object message,
- Object messageContent,
- EmailContent content,
- EmlToPdfRequest request,
- CustomHtmlSanitizer customHtmlSanitizer) {
- try {
- if (messageContent instanceof String stringContent) {
- Method getContentType = message.getClass().getMethod("getContentType");
- String contentType = (String) getContentType.invoke(message);
-
- if (contentType != null
- && contentType.toLowerCase(Locale.ROOT).contains(TEXT_HTML)) {
- content.setHtmlBody(stringContent);
- } else {
- content.setTextBody(stringContent);
+ private Email parseEmail(byte[] emlBytes) throws IOException {
+ boolean isMsgFile = EmlProcessingUtils.isMsgFile(emlBytes);
+ try (ByteArrayInputStream input = new ByteArrayInputStream(emlBytes)) {
+ Email email;
+ if (isMsgFile) {
+ try {
+ email = EmailConverter.outlookMsgToEmail(input);
+ } catch (Exception e) {
+ // OLE2 magic bytes match but parsing failed - might be DOC/XLS/other OLE2 file
+ throw new IOException(
+ "The file appears to be an OLE2 file (MSG/DOC/XLS) but could not be "
+ + "parsed as an Outlook email. Ensure it is a valid .msg file: "
+ + e.getMessage(),
+ e);
}
} else {
- Class> multipartClass = Class.forName("jakarta.mail.Multipart");
- if (multipartClass.isInstance(messageContent)) {
- processMultipart(messageContent, content, request, customHtmlSanitizer, 0);
- }
- }
- } catch (ReflectiveOperationException | ClassCastException e) {
- content.setTextBody("Email content could not be parsed with advanced processing");
- }
- }
-
- private static void processMultipart(
- Object multipart,
- EmailContent content,
- EmlToPdfRequest request,
- CustomHtmlSanitizer customHtmlSanitizer,
- int depth) {
-
- final int MAX_MULTIPART_DEPTH = 10;
- if (depth > MAX_MULTIPART_DEPTH) {
- content.setHtmlBody("
Maximum multipart depth exceeded
");
- return;
- }
-
- try {
- Class> multipartClass = multipart.getClass();
- Method getCount = multipartClass.getMethod("getCount");
- int count = (Integer) getCount.invoke(multipart);
-
- Method getBodyPart = multipartClass.getMethod("getBodyPart", int.class);
-
- for (int i = 0; i < count; i++) {
- Object part = getBodyPart.invoke(multipart, i);
- processPart(part, content, request, customHtmlSanitizer, depth + 1);
+ email = EmailConverter.emlToEmail(input);
}
- } catch (ReflectiveOperationException | ClassCastException e) {
- content.setHtmlBody("Error processing multipart content
");
- }
- }
-
- private static void processPart(
- Object part,
- EmailContent content,
- EmlToPdfRequest request,
- CustomHtmlSanitizer customHtmlSanitizer,
- int depth) {
- try {
- Class> partClass = part.getClass();
-
- Method isMimeType = partClass.getMethod("isMimeType", String.class);
- Method getContent = partClass.getMethod("getContent");
- Method getDisposition = partClass.getMethod("getDisposition");
- Method getFileName = partClass.getMethod("getFileName");
- Method getContentType = partClass.getMethod("getContentType");
- Method getHeader = partClass.getMethod("getHeader", String.class);
-
- Object disposition = getDisposition.invoke(part);
- String filename = (String) getFileName.invoke(part);
- String contentType = (String) getContentType.invoke(part);
-
- String normalizedDisposition =
- disposition != null ? ((String) disposition).toLowerCase(Locale.ROOT) : null;
-
- if ((Boolean) isMimeType.invoke(part, TEXT_PLAIN) && normalizedDisposition == null) {
- Object partContent = getContent.invoke(part);
- if (partContent instanceof String stringContent) {
- content.setTextBody(stringContent);
- }
- } else if ((Boolean) isMimeType.invoke(part, TEXT_HTML)
- && normalizedDisposition == null) {
- Object partContent = getContent.invoke(part);
- if (partContent instanceof String stringContent) {
- String htmlBody =
- customHtmlSanitizer != null
- ? customHtmlSanitizer.sanitize(stringContent)
- : stringContent;
- content.setHtmlBody(htmlBody);
- }
- } else if ((normalizedDisposition != null
- && normalizedDisposition.contains(DISPOSITION_ATTACHMENT))
- || (filename != null && !filename.trim().isEmpty())) {
-
- processAttachment(
- part, content, request, getHeader, getContent, filename, contentType);
- } else if ((Boolean) isMimeType.invoke(part, "multipart/*")) {
- Object multipartContent = getContent.invoke(part);
- if (multipartContent != null) {
- Class> multipartClass = Class.forName("jakarta.mail.Multipart");
- if (multipartClass.isInstance(multipartContent)) {
- processMultipart(
- multipartContent, content, request, customHtmlSanitizer, depth + 1);
- }
- }
- }
-
- } catch (ReflectiveOperationException | RuntimeException e) {
- // Continue processing other parts if one fails
- }
- }
-
- private static void processAttachment(
- Object part,
- EmailContent content,
- EmlToPdfRequest request,
- Method getHeader,
- Method getContent,
- String filename,
- String contentType) {
-
- content.setAttachmentCount(content.getAttachmentCount() + 1);
-
- if (filename != null && !filename.trim().isEmpty()) {
- EmailAttachment attachment = new EmailAttachment();
- attachment.setFilename(safeMimeDecode(filename));
- attachment.setContentType(contentType);
-
- try {
- String[] contentIdHeaders = (String[]) getHeader.invoke(part, HEADER_CONTENT_ID);
- if (contentIdHeaders != null) {
- for (String contentIdHeader : contentIdHeaders) {
- if (contentIdHeader != null && !contentIdHeader.trim().isEmpty()) {
- attachment.setEmbedded(true);
- String contentId =
- RegexPatternUtils.getInstance()
- .getAngleBracketsPattern()
- .matcher(contentIdHeader.trim())
- .replaceAll("");
- attachment.setContentId(contentId);
- break;
- }
- }
- }
- } catch (ReflectiveOperationException e) {
- }
-
- if ((request != null && request.isIncludeAttachments()) || attachment.isEmbedded()) {
- extractAttachmentData(part, attachment, getContent, request);
- }
-
- content.getAttachments().add(attachment);
- }
- }
-
- private static void extractAttachmentData(
- Object part, EmailAttachment attachment, Method getContent, EmlToPdfRequest request) {
- try {
- Object attachmentContent = getContent.invoke(part);
- byte[] attachmentData = null;
-
- if (attachmentContent instanceof InputStream inputStream) {
- try (InputStream stream = inputStream) {
- attachmentData = stream.readAllBytes();
- } catch (IOException e) {
- if (attachment.isEmbedded()) {
- attachmentData = new byte[0];
- } else {
- throw new RuntimeException(e);
- }
- }
- } else if (attachmentContent instanceof byte[] byteArray) {
- attachmentData = byteArray;
- } else if (attachmentContent instanceof String stringContent) {
- attachmentData = stringContent.getBytes(StandardCharsets.UTF_8);
- }
-
- if (attachmentData != null) {
- long maxSizeMB = request != null ? request.getMaxAttachmentSizeMB() : 10L;
- long maxSizeBytes = maxSizeMB * 1024 * 1024;
-
- if (attachmentData.length <= maxSizeBytes || attachment.isEmbedded()) {
- attachment.setData(attachmentData);
- attachment.setSizeBytes(attachmentData.length);
- } else {
- attachment.setSizeBytes(attachmentData.length);
- }
- }
- } catch (ReflectiveOperationException | RuntimeException e) {
- // Continue without attachment data
- }
- }
-
- private static String extractBasicHeader(String emlContent, String headerName) {
- try {
- String[] lines =
- RegexPatternUtils.getInstance().getNewlineSplitPattern().split(emlContent);
- for (int i = 0; i < lines.length; i++) {
- String line = lines[i];
- if (line.toLowerCase(Locale.ROOT).startsWith(headerName.toLowerCase(Locale.ROOT))) {
- StringBuilder value =
- new StringBuilder(line.substring(headerName.length()).trim());
- for (int j = i + 1; j < lines.length; j++) {
- if (lines[j].startsWith(" ") || lines[j].startsWith("\t")) {
- value.append(" ").append(lines[j].trim());
- } else {
- break;
- }
- }
- return safeMimeDecode(value.toString());
- }
- if (line.trim().isEmpty()) break;
- }
- } catch (RuntimeException e) {
- // Ignore errors in header extraction
- }
- return "";
- }
-
- private static String extractHtmlBody(String emlContent) {
- try {
- String lowerContent = emlContent.toLowerCase(Locale.ROOT);
- int htmlStart = lowerContent.indexOf(HEADER_CONTENT_TYPE + " " + TEXT_HTML);
- if (htmlStart == -1) return null;
-
- int bodyStart = emlContent.indexOf("\r\n\r\n", htmlStart);
- if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n", htmlStart);
- if (bodyStart == -1) return null;
-
- bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2;
- int bodyEnd = findPartEnd(emlContent, bodyStart);
-
- return emlContent.substring(bodyStart, bodyEnd).trim();
+ return email;
+ } catch (IOException e) {
+ throw e; // Re-throw IOException as-is
} catch (Exception e) {
- return null;
+ throw new IOException(
+ String.format(
+ "Failed to parse EML file with Simple Java Mail: %s", e.getMessage()),
+ e);
}
}
- private static String extractTextBody(String emlContent) {
- try {
- String lowerContent = emlContent.toLowerCase(Locale.ROOT);
- int textStart = lowerContent.indexOf(HEADER_CONTENT_TYPE + " " + TEXT_PLAIN);
- if (textStart == -1) {
- int bodyStart = emlContent.indexOf("\r\n\r\n");
- if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n");
- if (bodyStart != -1) {
- bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2;
- int bodyEnd = findPartEnd(emlContent, bodyStart);
- return emlContent.substring(bodyStart, bodyEnd).trim();
- }
- return null;
- }
+ private EmailContent buildEmailContent(
+ Email email, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer)
+ throws IOException {
- int bodyStart = emlContent.indexOf("\r\n\r\n", textStart);
- if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n", textStart);
- if (bodyStart == -1) return null;
+ EmailContent content = new EmailContent();
+ content.setSubject(defaultString(email.getSubject()));
+ content.setFrom(formatRecipient(email.getFromRecipient()));
+ content.setTo(formatRecipients(email.getRecipients(), RecipientType.TO));
+ content.setCc(formatRecipients(email.getRecipients(), RecipientType.CC));
+ content.setBcc(formatRecipients(email.getRecipients(), RecipientType.BCC));
- bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2;
- int bodyEnd = findPartEnd(emlContent, bodyStart);
-
- return emlContent.substring(bodyStart, bodyEnd).trim();
- } catch (RuntimeException e) {
- return null;
- }
- }
-
- private static int findPartEnd(String content, int start) {
- String[] lines =
- RegexPatternUtils.getInstance()
- .getNewlineSplitPattern()
- .split(content.substring(start));
- StringBuilder result = new StringBuilder();
-
- for (String line : lines) {
- if (line.startsWith("--") && line.length() > 10) break;
- result.append(line).append("\n");
+ Date sentDate = email.getSentDate();
+ if (sentDate != null) {
+ // Use UTC for consistent timezone handling across deployments
+ content.setDate(ZonedDateTime.ofInstant(sentDate.toInstant(), ZoneOffset.UTC));
}
- return start + result.length();
- }
+ String htmlBody = email.getHTMLText();
+ if (customHtmlSanitizer != null && htmlBody != null) {
+ htmlBody = customHtmlSanitizer.sanitize(htmlBody);
+ }
+ content.setHtmlBody(htmlBody);
+
+ String textBody = email.getPlainText();
+ if (customHtmlSanitizer != null && textBody != null) {
+ textBody = customHtmlSanitizer.sanitize(textBody);
+ }
+ content.setTextBody(textBody);
+
+ if (isBlank(content.getHtmlBody()) && isBlank(content.getTextBody())) {
+ content.setTextBody(NO_CONTENT_MESSAGE);
+ }
- private static List extractAttachmentsBasic(String emlContent) {
List attachments = new ArrayList<>();
- try {
- String[] lines =
- RegexPatternUtils.getInstance().getNewlineSplitPattern().split(emlContent);
- boolean inHeaders = true;
- String currentContentType = "";
- String currentDisposition = "";
- String currentFilename = "";
- String currentEncoding = "";
+ attachments.addAll(mapResources(email.getEmbeddedImages(), request, true));
+ attachments.addAll(mapResources(email.getAttachments(), request, false));
+ content.setAttachments(attachments);
+ content.setAttachmentCount(attachments.size());
- for (String line : lines) {
- String lowerLine = line.toLowerCase(Locale.ROOT).trim();
+ return content;
+ }
- if (line.trim().isEmpty()) {
- inHeaders = false;
- if (isAttachment(currentDisposition, currentFilename, currentContentType)) {
- EmailAttachment attachment = new EmailAttachment();
- attachment.setFilename(currentFilename);
- attachment.setContentType(currentContentType);
- attachment.setTransferEncoding(currentEncoding);
- attachments.add(attachment);
+ private List mapResources(
+ List resources, EmlToPdfRequest request, boolean embedded)
+ throws IOException {
+
+ if (resources == null || resources.isEmpty()) {
+ return List.of();
+ }
+
+ List mapped = new ArrayList<>(resources.size());
+ int unnamedCounter = 0; // Start at 0, increment before use
+
+ for (AttachmentResource resource : resources) {
+ if (resource == null) {
+ continue; // Skip null resources early
+ }
+
+ // Pre-determine if this resource needs a generated filename
+ boolean needsGeneratedName = !embedded && needsGeneratedFilename(resource);
+
+ if (needsGeneratedName) {
+ unnamedCounter++;
+ }
+
+ EmailAttachment attachment =
+ toEmailAttachment(resource, request, embedded, unnamedCounter);
+ if (attachment != null) {
+ mapped.add(attachment);
+ }
+ }
+ return mapped;
+ }
+
+ /** Checks if a resource needs a generated filename (has no usable name). */
+ private boolean needsGeneratedFilename(AttachmentResource resource) {
+ if (resource == null) {
+ return false;
+ }
+ String resourceName = resource.getName();
+ if (!isBlank(resourceName)) {
+ return false;
+ }
+ DataSource dataSource = resource.getDataSource();
+ return isBlank(dataSource.getName());
+ }
+
+ private EmailAttachment toEmailAttachment(
+ AttachmentResource resource, EmlToPdfRequest request, boolean embedded, int counter)
+ throws IOException {
+
+ if (resource == null) {
+ return null;
+ }
+
+ EmailAttachment attachment = new EmailAttachment();
+ attachment.setEmbedded(embedded);
+
+ String resourceName = defaultString(resource.getName());
+ String filename = resourceName;
+ DataSource dataSource = resource.getDataSource();
+ String contentType = dataSource.getContentType();
+
+ if (!isBlank(dataSource.getName())) {
+ filename = dataSource.getName();
+ }
+ filename = safeMimeDecode(filename);
+
+ // Generate unique filename for unnamed attachments
+ if (isBlank(filename)) {
+ String extension = detectExtensionFromMimeType(contentType);
+ filename = embedded ? resourceName : (ATTACHMENT_PREFIX + counter + extension);
+ }
+ attachment.setFilename(filename);
+
+ String contentId = embedded ? stripCid(resourceName) : null;
+ attachment.setContentId(contentId);
+
+ String detectedContentType = EmlProcessingUtils.detectMimeType(filename, contentType);
+ attachment.setContentType(detectedContentType);
+
+ // Read data with size limit to prevent OOM
+ ReadResult readResult = readData(dataSource, embedded, request);
+ if (readResult != null) {
+ attachment.setSizeBytes(readResult.totalSize);
+ if (shouldIncludeAttachmentData(embedded, request, readResult)) {
+ attachment.setData(readResult.data);
+ }
+ }
+
+ return attachment;
+ }
+
+ private boolean shouldIncludeAttachmentData(
+ boolean embedded, EmlToPdfRequest request, ReadResult readResult) {
+ // Always include embedded images for proper rendering
+ if (embedded) {
+ return readResult != null && readResult.data() != null;
+ }
+ // Check if attachments are requested and data is available within size limit
+ if (request == null || !request.isIncludeAttachments()) {
+ return false;
+ }
+ if (readResult == null || readResult.data() == null) {
+ return false;
+ }
+ return readResult.data().length <= getMaxAttachmentSizeBytes(request);
+ }
+
+ private String detectExtensionFromMimeType(String mimeType) {
+ if (mimeType == null) {
+ return "";
+ }
+
+ String lower = mimeType.toLowerCase(Locale.ROOT);
+
+ // Remove any parameters (e.g., "text/plain; charset=utf-8" -> "text/plain")
+ int semicolon = lower.indexOf(';');
+ if (semicolon > 0) {
+ lower = lower.substring(0, semicolon).trim();
+ }
+
+ // Match exact MIME types first, then fall back to contains() for variants
+ return switch (lower) {
+ case "application/pdf" -> ".pdf";
+ case "image/png" -> ".png";
+ case "image/jpeg", "image/jpg" -> ".jpg";
+ case "image/gif" -> ".gif";
+ case "image/webp" -> ".webp";
+ case "image/bmp" -> ".bmp";
+ case "text/plain" -> ".txt";
+ case "text/html" -> ".html";
+ case "text/xml", "application/xml" -> ".xml";
+ case "application/json" -> ".json";
+ case "application/zip" -> ".zip";
+ case "application/octet-stream" -> ".bin";
+ default -> {
+ if (lower.contains("wordprocessingml") || lower.contains("msword")) yield ".docx";
+ if (lower.contains("spreadsheetml") || lower.contains("excel")) yield ".xlsx";
+ if (lower.contains("presentationml") || lower.contains("powerpoint")) yield ".pptx";
+ if (lower.contains("opendocument.text")) yield ".odt";
+ if (lower.contains("opendocument.spreadsheet")) yield ".ods";
+ yield "";
+ }
+ };
+ }
+
+ private ReadResult readData(DataSource dataSource, boolean embedded, EmlToPdfRequest request)
+ throws IOException {
+ if (dataSource == null) {
+ return null;
+ }
+
+ long maxBytes = getMaxAttachmentSizeBytes(request);
+
+ try (InputStream input = dataSource.getInputStream()) {
+ // Embedded images are usually needed for display regardless of size,
+ // but regular attachments should be guarded against OOM
+ if (!embedded && request != null) {
+ byte[] buffer = new byte[8192];
+ ByteArrayOutputStream output = new ByteArrayOutputStream();
+ int bytesRead;
+ long totalBytes = 0;
+ while ((bytesRead = input.read(buffer)) != -1) {
+ totalBytes += bytesRead;
+ if (totalBytes > maxBytes) {
+ // Attachment too large - skip remaining data but estimate total size
+ long remainingBytes = countRemainingBytes(input, totalBytes);
+ log.debug(
+ "Attachment exceeds size limit: {} bytes (max: {} bytes), skipping",
+ remainingBytes,
+ maxBytes);
+ return new ReadResult(null, remainingBytes);
}
- currentContentType = "";
- currentDisposition = "";
- currentFilename = "";
- currentEncoding = "";
- inHeaders = true;
- continue;
- }
-
- if (!inHeaders) continue;
-
- if (lowerLine.startsWith(HEADER_CONTENT_TYPE)) {
- currentContentType = line.substring(HEADER_CONTENT_TYPE.length()).trim();
- } else if (lowerLine.startsWith(HEADER_CONTENT_DISPOSITION)) {
- currentDisposition = line.substring(HEADER_CONTENT_DISPOSITION.length()).trim();
- currentFilename = extractFilenameFromDisposition(currentDisposition);
- } else if (lowerLine.startsWith(HEADER_CONTENT_TRANSFER_ENCODING)) {
- currentEncoding =
- line.substring(HEADER_CONTENT_TRANSFER_ENCODING.length()).trim();
+ output.write(buffer, 0, bytesRead);
}
+ byte[] data = output.toByteArray();
+ return new ReadResult(data, data.length);
+ } else {
+ byte[] data = input.readAllBytes();
+ return new ReadResult(data, data.length);
}
- } catch (RuntimeException e) {
- // Continue with empty list
+ } catch (IOException e) {
+ if (embedded) {
+ log.debug(
+ "Failed to read embedded image, using empty placeholder: {}",
+ e.getMessage());
+ return new ReadResult(new byte[0], 0);
+ }
+ throw e;
}
- return attachments;
}
- private static boolean isAttachment(String disposition, String filename, String contentType) {
- return (disposition.toLowerCase(Locale.ROOT).contains(DISPOSITION_ATTACHMENT)
- && !filename.isEmpty())
- || (!filename.isEmpty()
- && !contentType.toLowerCase(Locale.ROOT).startsWith("text/"))
- || (contentType.toLowerCase(Locale.ROOT).contains("application/")
- && !filename.isEmpty());
+ private long countRemainingBytes(InputStream input, long alreadyRead) throws IOException {
+ long count = alreadyRead;
+
+ long skipped;
+ while (count < MAX_SIZE_ESTIMATION_BYTES
+ && (skipped = input.skip(MAX_SIZE_ESTIMATION_BYTES - count)) > 0) {
+ count += skipped;
+ }
+
+ if (count < MAX_SIZE_ESTIMATION_BYTES && input.available() > 0) {
+ byte[] buffer = new byte[8192];
+ int read;
+ while ((read = input.read(buffer)) != -1 && count < MAX_SIZE_ESTIMATION_BYTES) {
+ count += read;
+ }
+ }
+
+ return count;
}
- private static String extractFilenameFromDisposition(String disposition) {
- if (disposition == null || !disposition.contains("filename=")) {
+ private String formatRecipients(List recipients, RecipientType type) {
+ if (recipients == null || type == null) {
return "";
}
- // Handle filename*= (RFC 2231 encoded filename)
- if (disposition.toLowerCase(Locale.ROOT).contains("filename*=")) {
- int filenameStarStart = disposition.toLowerCase(Locale.ROOT).indexOf("filename*=") + 10;
- int filenameStarEnd = disposition.indexOf(";", filenameStarStart);
- if (filenameStarEnd == -1) filenameStarEnd = disposition.length();
- String extendedFilename =
- disposition.substring(filenameStarStart, filenameStarEnd).trim();
- extendedFilename =
- RegexPatternUtils.getInstance()
- .getQuotesRemovalPattern()
- .matcher(extendedFilename)
- .replaceAll("");
-
- if (extendedFilename.contains("'")) {
- String[] parts = extendedFilename.split("'", 3);
- if (parts.length == 3) {
- return EmlProcessingUtils.decodeUrlEncoded(parts[2]);
- }
- }
- }
-
- // Handle regular filename=
- int filenameStart = disposition.toLowerCase(Locale.ROOT).indexOf("filename=") + 9;
- int filenameEnd = disposition.indexOf(";", filenameStart);
- if (filenameEnd == -1) filenameEnd = disposition.length();
- String filename = disposition.substring(filenameStart, filenameEnd).trim();
- filename =
- RegexPatternUtils.getInstance()
- .getQuotesRemovalPattern()
- .matcher(filename)
- .replaceAll("");
- return safeMimeDecode(filename);
+ return recipients.stream()
+ .filter(Objects::nonNull)
+ // Use type.equals() for null-safe comparison (recipient.getType() may be null)
+ .filter(recipient -> type.equals(recipient.getType()))
+ .map(EmlParser::formatRecipient)
+ .filter(string -> !isBlank(string))
+ .collect(Collectors.joining(", "));
}
- public static String safeMimeDecode(String headerValue) {
- if (headerValue == null || headerValue.trim().isEmpty()) {
+ private String formatRecipient(Recipient recipient) {
+ if (recipient == null) {
return "";
}
- if (!mimeUtilityChecked) {
- synchronized (EmlParser.class) {
- if (!mimeUtilityChecked) {
- initializeMimeUtilityDecoding();
- }
- }
- }
+ String name = safeMimeDecode(recipient.getName());
+ String address = safeMimeDecode(recipient.getAddress());
- if (mimeUtilityDecodeTextMethod != null) {
- try {
- return (String) mimeUtilityDecodeTextMethod.invoke(null, headerValue.trim());
- } catch (ReflectiveOperationException | RuntimeException e) {
- // Fall through to custom implementation
- }
+ if (!isBlank(name) && !isBlank(address)) {
+ return name + " <" + address + ">";
}
+ return !isBlank(name) ? name : address;
+ }
+ public String safeMimeDecode(String headerValue) {
+ if (isBlank(headerValue)) {
+ return "";
+ }
return EmlProcessingUtils.decodeMimeHeader(headerValue.trim());
}
- private static void initializeMimeUtilityDecoding() {
- try {
- Class> mimeUtilityClass = Class.forName("jakarta.mail.internet.MimeUtility");
- mimeUtilityDecodeTextMethod = mimeUtilityClass.getMethod("decodeText", String.class);
- } catch (ClassNotFoundException | NoSuchMethodException e) {
- mimeUtilityDecodeTextMethod = null;
+ private String stripCid(String contentId) {
+ if (contentId == null) {
+ return null;
+ }
+ return RegexPatternUtils.getInstance()
+ .getAngleBracketsPattern()
+ .matcher(contentId)
+ .replaceAll("")
+ .trim();
+ }
+
+ private long getMaxAttachmentSizeBytes(EmlToPdfRequest request) {
+ long maxMb = request != null ? request.getMaxAttachmentSizeMB() : DEFAULT_MAX_ATTACHMENT_MB;
+ return maxMb * 1024L * 1024L;
+ }
+
+ private boolean isBlank(String value) {
+ return value == null || value.trim().isEmpty();
+ }
+
+ private String defaultString(String value) {
+ return value != null ? value : "";
+ }
+
+ private record ReadResult(byte[] data, long totalSize) {
+ public ReadResult {
+ if (totalSize < 0) {
+ throw new IllegalArgumentException("Size cannot be negative: " + totalSize);
+ }
+ if (data != null && data.length > totalSize) {
+ throw new IllegalArgumentException(
+ "Data length (" + data.length + ") exceeds total size (" + totalSize + ")");
+ }
}
- mimeUtilityChecked = true;
}
@Data
- public static class EmailContent {
+ public class EmailContent {
private String subject;
private String from;
private String to;
private String cc;
private String bcc;
private ZonedDateTime date;
- private String dateString; // For basic parsing fallback
+ private String dateString; // Maintained for compatibility
private String htmlBody;
private String textBody;
private int attachmentCount;
@@ -673,7 +439,7 @@ public class EmlParser {
}
@Data
- public static class EmailAttachment {
+ public class EmailAttachment {
private String filename;
private String contentType;
private byte[] data;
diff --git a/app/common/src/main/java/stirling/software/common/util/EmlProcessingUtils.java b/app/common/src/main/java/stirling/software/common/util/EmlProcessingUtils.java
index 69b181161..25af604c8 100644
--- a/app/common/src/main/java/stirling/software/common/util/EmlProcessingUtils.java
+++ b/app/common/src/main/java/stirling/software/common/util/EmlProcessingUtils.java
@@ -1,5 +1,7 @@
package stirling.software.common.util;
+import java.io.IOException;
+import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
@@ -8,32 +10,41 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.springframework.core.io.ClassPathResource;
import org.springframework.http.MediaType;
+import lombok.Synchronized;
import lombok.experimental.UtilityClass;
+import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.api.converters.EmlToPdfRequest;
import stirling.software.common.model.api.converters.HTMLToPdfRequest;
+@Slf4j
@UtilityClass
public class EmlProcessingUtils {
// Style constants
- private static final int DEFAULT_FONT_SIZE = 12;
- private static final String DEFAULT_FONT_FAMILY = "Helvetica, sans-serif";
- private static final float DEFAULT_LINE_HEIGHT = 1.4f;
- private static final String DEFAULT_ZOOM = "1.0";
- private static final String DEFAULT_TEXT_COLOR = "#202124";
- private static final String DEFAULT_BACKGROUND_COLOR = "#ffffff";
- private static final String DEFAULT_BORDER_COLOR = "#e8eaed";
- private static final String ATTACHMENT_BACKGROUND_COLOR = "#f9f9f9";
- private static final String ATTACHMENT_BORDER_COLOR = "#eeeeee";
+ private final int DEFAULT_FONT_SIZE = 12;
+ private final String DEFAULT_FONT_FAMILY = "Helvetica, sans-serif";
+ private final float DEFAULT_LINE_HEIGHT = 1.4f;
+ private final String DEFAULT_ZOOM = "1.0";
+ private final String DEFAULT_TEXT_COLOR = "#202124";
+ private final String DEFAULT_BACKGROUND_COLOR = "#ffffff";
+ private final String DEFAULT_BORDER_COLOR = "#e8eaed";
+ private final String ATTACHMENT_BACKGROUND_COLOR = "#f9f9f9";
+ private final String ATTACHMENT_BORDER_COLOR = "#eeeeee";
- private static final int EML_CHECK_LENGTH = 8192;
- private static final int MIN_HEADER_COUNT_FOR_VALID_EML = 2;
-
- // MIME type detection
- private static final Map EXTENSION_TO_MIME_TYPE =
+ private final String CSS_RESOURCE_PATH = "templates/email-pdf-styles.css";
+ private final int EML_CHECK_LENGTH = 8192;
+ private final int MIN_HEADER_COUNT_FOR_VALID_EML = 2;
+ // MSG file magic bytes (Compound File Binary Format / OLE2)
+ // D0 CF 11 E0 A1 B1 1A E1
+ private final byte[] MSG_MAGIC_BYTES = {
+ (byte) 0xD0, (byte) 0xCF, (byte) 0x11, (byte) 0xE0,
+ (byte) 0xA1, (byte) 0xB1, (byte) 0x1A, (byte) 0xE1
+ };
+ private final Map EXTENSION_TO_MIME_TYPE =
Map.of(
".png", MediaType.IMAGE_PNG_VALUE,
".jpg", MediaType.IMAGE_JPEG_VALUE,
@@ -45,18 +56,36 @@ public class EmlProcessingUtils {
".ico", "image/x-icon",
".tiff", "image/tiff",
".tif", "image/tiff");
+ private volatile String cachedCssContent = null;
- public static void validateEmlInput(byte[] emlBytes) {
+ public void validateEmlInput(byte[] emlBytes) {
if (emlBytes == null || emlBytes.length == 0) {
throw ExceptionUtils.createEmlEmptyException();
}
+ if (isMsgFile(emlBytes)) {
+ return; // Valid MSG file, no further EML validation needed
+ }
+
if (isInvalidEmlFormat(emlBytes)) {
throw ExceptionUtils.createEmlInvalidFormatException();
}
}
- private static boolean isInvalidEmlFormat(byte[] emlBytes) {
+ public boolean isMsgFile(byte[] fileBytes) {
+ if (fileBytes == null || fileBytes.length < MSG_MAGIC_BYTES.length) {
+ return false;
+ }
+
+ for (int i = 0; i < MSG_MAGIC_BYTES.length; i++) {
+ if (fileBytes[i] != MSG_MAGIC_BYTES[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private boolean isInvalidEmlFormat(byte[] emlBytes) {
try {
int checkLength = Math.min(emlBytes.length, EML_CHECK_LENGTH);
String content;
@@ -101,7 +130,7 @@ public class EmlProcessingUtils {
}
}
- public static String generateEnhancedEmailHtml(
+ public String generateEnhancedEmailHtml(
EmlParser.EmailContent content,
EmlToPdfRequest request,
CustomHtmlSanitizer customHtmlSanitizer) {
@@ -145,7 +174,7 @@ public class EmlProcessingUtils {
html.append(
String.format(
Locale.ROOT,
- "CC: %s
\n",
+ "CC: %s
%n",
sanitizeText(content.getCc(), customHtmlSanitizer)));
}
@@ -153,7 +182,7 @@ public class EmlProcessingUtils {
html.append(
String.format(
Locale.ROOT,
- "BCC: %s
\n",
+ "BCC: %s
%n",
sanitizeText(content.getBcc(), customHtmlSanitizer)));
}
@@ -161,19 +190,19 @@ public class EmlProcessingUtils {
html.append(
String.format(
Locale.ROOT,
- "Date: %s
\n",
+ "Date: %s
%n",
PdfAttachmentHandler.formatEmailDate(content.getDate())));
} else if (content.getDateString() != null && !content.getDateString().trim().isEmpty()) {
html.append(
String.format(
Locale.ROOT,
- "Date: %s
\n",
+ "Date: %s
%n",
sanitizeText(content.getDateString(), customHtmlSanitizer)));
}
- html.append("\n");
+ html.append(String.format(Locale.ROOT, "%n"));
- html.append("\n");
+ html.append(String.format(Locale.ROOT, "
%n"));
if (content.getHtmlBody() != null && !content.getHtmlBody().trim().isEmpty()) {
String processedHtml =
processEmailHtmlBody(content.getHtmlBody(), content, customHtmlSanitizer);
@@ -187,17 +216,17 @@ public class EmlProcessingUtils {
} else {
html.append("
");
}
- html.append("
\n");
+ html.append(String.format(Locale.ROOT, "
%n"));
if (content.getAttachmentCount() > 0 || !content.getAttachments().isEmpty()) {
- appendAttachmentsSection(html, content, request, customHtmlSanitizer);
+ appendAttachmentsSection(html, content, request);
}
- html.append("\n