mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-08-11 13:48:37 +02:00
refactor logic in to multiple files
This commit is contained in:
parent
c5eee8cad9
commit
18cc10eab7
@ -0,0 +1,652 @@
|
|||||||
|
package stirling.software.common.util;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.lang.reflect.Constructor;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Properties;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.experimental.UtilityClass;
|
||||||
|
|
||||||
|
import stirling.software.common.model.api.converters.EmlToPdfRequest;
|
||||||
|
|
||||||
|
@UtilityClass
|
||||||
|
public class EmlParser {
|
||||||
|
|
||||||
|
private static volatile Boolean jakartaMailAvailable = null;
|
||||||
|
private static volatile Method mimeUtilityDecodeTextMethod = null;
|
||||||
|
private static volatile boolean mimeUtilityChecked = false;
|
||||||
|
|
||||||
|
private static final Pattern MIME_ENCODED_PATTERN =
|
||||||
|
Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
|
||||||
|
|
||||||
|
private static final String DISPOSITION_ATTACHMENT = "attachment";
|
||||||
|
private static final String TEXT_PLAIN = "text/plain";
|
||||||
|
private static final String TEXT_HTML = "text/html";
|
||||||
|
private static final String MULTIPART_PREFIX = "multipart/";
|
||||||
|
|
||||||
|
private static final String HEADER_CONTENT_TYPE = "content-type:";
|
||||||
|
private static final String HEADER_CONTENT_DISPOSITION = "content-disposition:";
|
||||||
|
private static final String HEADER_CONTENT_TRANSFER_ENCODING = "content-transfer-encoding:";
|
||||||
|
private static final String HEADER_CONTENT_ID = "Content-ID";
|
||||||
|
private static final String HEADER_SUBJECT = "Subject:";
|
||||||
|
private static final String HEADER_FROM = "From:";
|
||||||
|
private static final String HEADER_TO = "To:";
|
||||||
|
private static final String HEADER_CC = "Cc:";
|
||||||
|
private static final String HEADER_BCC = "Bcc:";
|
||||||
|
private static final String HEADER_DATE = "Date:";
|
||||||
|
|
||||||
|
private static synchronized boolean isJakartaMailAvailable() {
|
||||||
|
if (jakartaMailAvailable == null) {
|
||||||
|
try {
|
||||||
|
Class.forName("jakarta.mail.internet.MimeMessage");
|
||||||
|
Class.forName("jakarta.mail.Session");
|
||||||
|
Class.forName("jakarta.mail.internet.MimeUtility");
|
||||||
|
Class.forName("jakarta.mail.internet.MimePart");
|
||||||
|
Class.forName("jakarta.mail.internet.MimeMultipart");
|
||||||
|
Class.forName("jakarta.mail.Multipart");
|
||||||
|
Class.forName("jakarta.mail.Part");
|
||||||
|
jakartaMailAvailable = true;
|
||||||
|
} catch (ClassNotFoundException e) {
|
||||||
|
jakartaMailAvailable = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return jakartaMailAvailable;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static EmailContent extractEmailContent(
|
||||||
|
byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer)
|
||||||
|
throws IOException {
|
||||||
|
EmlProcessingUtils.validateEmlInput(emlBytes);
|
||||||
|
|
||||||
|
if (isJakartaMailAvailable()) {
|
||||||
|
return extractEmailContentAdvanced(emlBytes, request, customHtmlSanitizer);
|
||||||
|
} else {
|
||||||
|
return extractEmailContentBasic(emlBytes, request, customHtmlSanitizer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static EmailContent extractEmailContentBasic(
|
||||||
|
byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) {
|
||||||
|
String emlContent = new String(emlBytes, StandardCharsets.UTF_8);
|
||||||
|
EmailContent content = new EmailContent();
|
||||||
|
|
||||||
|
content.setSubject(extractBasicHeader(emlContent, HEADER_SUBJECT));
|
||||||
|
content.setFrom(extractBasicHeader(emlContent, HEADER_FROM));
|
||||||
|
content.setTo(extractBasicHeader(emlContent, HEADER_TO));
|
||||||
|
content.setCc(extractBasicHeader(emlContent, HEADER_CC));
|
||||||
|
content.setBcc(extractBasicHeader(emlContent, HEADER_BCC));
|
||||||
|
|
||||||
|
String dateStr = extractBasicHeader(emlContent, HEADER_DATE);
|
||||||
|
if (!dateStr.isEmpty()) {
|
||||||
|
content.setDateString(dateStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
String htmlBody = extractHtmlBody(emlContent);
|
||||||
|
if (htmlBody != null) {
|
||||||
|
content.setHtmlBody(htmlBody);
|
||||||
|
} else {
|
||||||
|
String textBody = extractTextBody(emlContent);
|
||||||
|
content.setTextBody(textBody != null ? textBody : "Email content could not be parsed");
|
||||||
|
}
|
||||||
|
|
||||||
|
content.getAttachments().addAll(extractAttachmentsBasic(emlContent));
|
||||||
|
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static EmailContent extractEmailContentAdvanced(
|
||||||
|
byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) {
|
||||||
|
try {
|
||||||
|
Class<?> sessionClass = Class.forName("jakarta.mail.Session");
|
||||||
|
Class<?> mimeMessageClass = Class.forName("jakarta.mail.internet.MimeMessage");
|
||||||
|
|
||||||
|
Method getDefaultInstance =
|
||||||
|
sessionClass.getMethod("getDefaultInstance", Properties.class);
|
||||||
|
Object session = getDefaultInstance.invoke(null, new Properties());
|
||||||
|
|
||||||
|
Class<?>[] constructorArgs = new Class<?>[] {sessionClass, InputStream.class};
|
||||||
|
Constructor<?> mimeMessageConstructor =
|
||||||
|
mimeMessageClass.getConstructor(constructorArgs);
|
||||||
|
Object message =
|
||||||
|
mimeMessageConstructor.newInstance(session, new ByteArrayInputStream(emlBytes));
|
||||||
|
|
||||||
|
return extractFromMimeMessage(message, request, customHtmlSanitizer);
|
||||||
|
|
||||||
|
} catch (ReflectiveOperationException e) {
|
||||||
|
return extractEmailContentBasic(emlBytes, request, customHtmlSanitizer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static EmailContent extractFromMimeMessage(
|
||||||
|
Object message, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) {
|
||||||
|
EmailContent content = new EmailContent();
|
||||||
|
|
||||||
|
try {
|
||||||
|
Class<?> messageClass = message.getClass();
|
||||||
|
|
||||||
|
Method getSubject = messageClass.getMethod("getSubject");
|
||||||
|
String subject = (String) getSubject.invoke(message);
|
||||||
|
content.setSubject(subject != null ? safeMimeDecode(subject) : "No Subject");
|
||||||
|
|
||||||
|
Method getFrom = messageClass.getMethod("getFrom");
|
||||||
|
Object[] fromAddresses = (Object[]) getFrom.invoke(message);
|
||||||
|
content.setFrom(buildAddressString(fromAddresses));
|
||||||
|
|
||||||
|
extractRecipients(message, messageClass, content);
|
||||||
|
|
||||||
|
Method getSentDate = messageClass.getMethod("getSentDate");
|
||||||
|
content.setDate((Date) getSentDate.invoke(message));
|
||||||
|
|
||||||
|
Method getContent = messageClass.getMethod("getContent");
|
||||||
|
Object messageContent = getContent.invoke(message);
|
||||||
|
|
||||||
|
processMessageContent(message, messageContent, content, request, customHtmlSanitizer);
|
||||||
|
|
||||||
|
} catch (ReflectiveOperationException | RuntimeException e) {
|
||||||
|
content.setSubject("Email Conversion");
|
||||||
|
content.setFrom("Unknown");
|
||||||
|
content.setTo("Unknown");
|
||||||
|
content.setCc("");
|
||||||
|
content.setBcc("");
|
||||||
|
content.setTextBody("Email content could not be parsed with advanced processing");
|
||||||
|
}
|
||||||
|
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void extractRecipients(
|
||||||
|
Object message, Class<?> messageClass, EmailContent content) {
|
||||||
|
try {
|
||||||
|
Method getRecipients =
|
||||||
|
messageClass.getMethod(
|
||||||
|
"getRecipients", Class.forName("jakarta.mail.Message$RecipientType"));
|
||||||
|
Class<?> recipientTypeClass = Class.forName("jakarta.mail.Message$RecipientType");
|
||||||
|
|
||||||
|
Object toType = recipientTypeClass.getField("TO").get(null);
|
||||||
|
Object[] toRecipients = (Object[]) getRecipients.invoke(message, toType);
|
||||||
|
content.setTo(buildAddressString(toRecipients));
|
||||||
|
|
||||||
|
Object ccType = recipientTypeClass.getField("CC").get(null);
|
||||||
|
Object[] ccRecipients = (Object[]) getRecipients.invoke(message, ccType);
|
||||||
|
content.setCc(buildAddressString(ccRecipients));
|
||||||
|
|
||||||
|
Object bccType = recipientTypeClass.getField("BCC").get(null);
|
||||||
|
Object[] bccRecipients = (Object[]) getRecipients.invoke(message, bccType);
|
||||||
|
content.setBcc(buildAddressString(bccRecipients));
|
||||||
|
|
||||||
|
} catch (ReflectiveOperationException e) {
|
||||||
|
try {
|
||||||
|
Method getAllRecipients = messageClass.getMethod("getAllRecipients");
|
||||||
|
Object[] recipients = (Object[]) getAllRecipients.invoke(message);
|
||||||
|
content.setTo(buildAddressString(recipients));
|
||||||
|
content.setCc("");
|
||||||
|
content.setBcc("");
|
||||||
|
} catch (ReflectiveOperationException ex) {
|
||||||
|
content.setTo("");
|
||||||
|
content.setCc("");
|
||||||
|
content.setBcc("");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String buildAddressString(Object[] addresses) {
|
||||||
|
if (addresses == null || addresses.length == 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
for (int i = 0; i < addresses.length; i++) {
|
||||||
|
if (i > 0) builder.append(", ");
|
||||||
|
builder.append(safeMimeDecode(addresses[i].toString()));
|
||||||
|
}
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void processMessageContent(
|
||||||
|
Object message,
|
||||||
|
Object messageContent,
|
||||||
|
EmailContent content,
|
||||||
|
EmlToPdfRequest request,
|
||||||
|
CustomHtmlSanitizer customHtmlSanitizer) {
|
||||||
|
try {
|
||||||
|
if (messageContent instanceof String stringContent) {
|
||||||
|
Method getContentType = message.getClass().getMethod("getContentType");
|
||||||
|
String contentType = (String) getContentType.invoke(message);
|
||||||
|
|
||||||
|
if (contentType != null && contentType.toLowerCase().contains(TEXT_HTML)) {
|
||||||
|
content.setHtmlBody(stringContent);
|
||||||
|
} else {
|
||||||
|
content.setTextBody(stringContent);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Class<?> multipartClass = Class.forName("jakarta.mail.Multipart");
|
||||||
|
if (multipartClass.isInstance(messageContent)) {
|
||||||
|
processMultipart(messageContent, content, request, customHtmlSanitizer, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (ReflectiveOperationException | ClassCastException e) {
|
||||||
|
content.setTextBody("Email content could not be parsed with advanced processing");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void processMultipart(
|
||||||
|
Object multipart,
|
||||||
|
EmailContent content,
|
||||||
|
EmlToPdfRequest request,
|
||||||
|
CustomHtmlSanitizer customHtmlSanitizer,
|
||||||
|
int depth) {
|
||||||
|
|
||||||
|
final int MAX_MULTIPART_DEPTH = 10;
|
||||||
|
if (depth > MAX_MULTIPART_DEPTH) {
|
||||||
|
content.setHtmlBody("<div class=\"error\">Maximum multipart depth exceeded</div>");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
Class<?> multipartClass = multipart.getClass();
|
||||||
|
Method getCount = multipartClass.getMethod("getCount");
|
||||||
|
int count = (Integer) getCount.invoke(multipart);
|
||||||
|
|
||||||
|
Method getBodyPart = multipartClass.getMethod("getBodyPart", int.class);
|
||||||
|
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
Object part = getBodyPart.invoke(multipart, i);
|
||||||
|
processPart(part, content, request, customHtmlSanitizer, depth + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (ReflectiveOperationException | ClassCastException e) {
|
||||||
|
content.setHtmlBody("<div class=\"error\">Error processing multipart content</div>");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void processPart(
|
||||||
|
Object part,
|
||||||
|
EmailContent content,
|
||||||
|
EmlToPdfRequest request,
|
||||||
|
CustomHtmlSanitizer customHtmlSanitizer,
|
||||||
|
int depth) {
|
||||||
|
try {
|
||||||
|
Class<?> partClass = part.getClass();
|
||||||
|
|
||||||
|
Method isMimeType = partClass.getMethod("isMimeType", String.class);
|
||||||
|
Method getContent = partClass.getMethod("getContent");
|
||||||
|
Method getDisposition = partClass.getMethod("getDisposition");
|
||||||
|
Method getFileName = partClass.getMethod("getFileName");
|
||||||
|
Method getContentType = partClass.getMethod("getContentType");
|
||||||
|
Method getHeader = partClass.getMethod("getHeader", String.class);
|
||||||
|
|
||||||
|
Object disposition = getDisposition.invoke(part);
|
||||||
|
String filename = (String) getFileName.invoke(part);
|
||||||
|
String contentType = (String) getContentType.invoke(part);
|
||||||
|
|
||||||
|
String normalizedDisposition =
|
||||||
|
disposition != null ? ((String) disposition).toLowerCase() : null;
|
||||||
|
|
||||||
|
if ((Boolean) isMimeType.invoke(part, TEXT_PLAIN) && normalizedDisposition == null) {
|
||||||
|
Object partContent = getContent.invoke(part);
|
||||||
|
if (partContent instanceof String stringContent) {
|
||||||
|
content.setTextBody(stringContent);
|
||||||
|
}
|
||||||
|
} else if ((Boolean) isMimeType.invoke(part, TEXT_HTML)
|
||||||
|
&& normalizedDisposition == null) {
|
||||||
|
Object partContent = getContent.invoke(part);
|
||||||
|
if (partContent instanceof String stringContent) {
|
||||||
|
String htmlBody =
|
||||||
|
customHtmlSanitizer != null
|
||||||
|
? customHtmlSanitizer.sanitize(stringContent)
|
||||||
|
: stringContent;
|
||||||
|
content.setHtmlBody(htmlBody);
|
||||||
|
}
|
||||||
|
} else if ((normalizedDisposition != null
|
||||||
|
&& normalizedDisposition.contains(DISPOSITION_ATTACHMENT))
|
||||||
|
|| (filename != null && !filename.trim().isEmpty())) {
|
||||||
|
|
||||||
|
processAttachment(
|
||||||
|
part, content, request, getHeader, getContent, filename, contentType);
|
||||||
|
} else if ((Boolean) isMimeType.invoke(part, "multipart/*")) {
|
||||||
|
Object multipartContent = getContent.invoke(part);
|
||||||
|
if (multipartContent != null) {
|
||||||
|
Class<?> multipartClass = Class.forName("jakarta.mail.Multipart");
|
||||||
|
if (multipartClass.isInstance(multipartContent)) {
|
||||||
|
processMultipart(
|
||||||
|
multipartContent, content, request, customHtmlSanitizer, depth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (ReflectiveOperationException | RuntimeException e) {
|
||||||
|
// Continue processing other parts if one fails
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void processAttachment(
|
||||||
|
Object part,
|
||||||
|
EmailContent content,
|
||||||
|
EmlToPdfRequest request,
|
||||||
|
Method getHeader,
|
||||||
|
Method getContent,
|
||||||
|
String filename,
|
||||||
|
String contentType) {
|
||||||
|
|
||||||
|
content.setAttachmentCount(content.getAttachmentCount() + 1);
|
||||||
|
|
||||||
|
if (filename != null && !filename.trim().isEmpty()) {
|
||||||
|
EmailAttachment attachment = new EmailAttachment();
|
||||||
|
attachment.setFilename(safeMimeDecode(filename));
|
||||||
|
attachment.setContentType(contentType);
|
||||||
|
|
||||||
|
try {
|
||||||
|
String[] contentIdHeaders = (String[]) getHeader.invoke(part, HEADER_CONTENT_ID);
|
||||||
|
if (contentIdHeaders != null) {
|
||||||
|
for (String contentIdHeader : contentIdHeaders) {
|
||||||
|
if (contentIdHeader != null && !contentIdHeader.trim().isEmpty()) {
|
||||||
|
attachment.setEmbedded(true);
|
||||||
|
String contentId = contentIdHeader.trim().replaceAll("[<>]", "");
|
||||||
|
attachment.setContentId(contentId);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (ReflectiveOperationException e) {
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((request != null && request.isIncludeAttachments()) || attachment.isEmbedded()) {
|
||||||
|
extractAttachmentData(part, attachment, getContent, request);
|
||||||
|
}
|
||||||
|
|
||||||
|
content.getAttachments().add(attachment);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void extractAttachmentData(
|
||||||
|
Object part, EmailAttachment attachment, Method getContent, EmlToPdfRequest request) {
|
||||||
|
try {
|
||||||
|
Object attachmentContent = getContent.invoke(part);
|
||||||
|
byte[] attachmentData = null;
|
||||||
|
|
||||||
|
if (attachmentContent instanceof InputStream inputStream) {
|
||||||
|
try (InputStream stream = inputStream) {
|
||||||
|
attachmentData = stream.readAllBytes();
|
||||||
|
} catch (IOException e) {
|
||||||
|
if (attachment.isEmbedded()) {
|
||||||
|
attachmentData = new byte[0];
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (attachmentContent instanceof byte[] byteArray) {
|
||||||
|
attachmentData = byteArray;
|
||||||
|
} else if (attachmentContent instanceof String stringContent) {
|
||||||
|
attachmentData = stringContent.getBytes(StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (attachmentData != null) {
|
||||||
|
long maxSizeMB = request != null ? request.getMaxAttachmentSizeMB() : 10L;
|
||||||
|
long maxSizeBytes = maxSizeMB * 1024 * 1024;
|
||||||
|
|
||||||
|
if (attachmentData.length <= maxSizeBytes || attachment.isEmbedded()) {
|
||||||
|
attachment.setData(attachmentData);
|
||||||
|
attachment.setSizeBytes(attachmentData.length);
|
||||||
|
} else {
|
||||||
|
attachment.setSizeBytes(attachmentData.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (ReflectiveOperationException | RuntimeException e) {
|
||||||
|
// Continue without attachment data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String extractBasicHeader(String emlContent, String headerName) {
|
||||||
|
try {
|
||||||
|
String[] lines = emlContent.split("\r?\n");
|
||||||
|
for (int i = 0; i < lines.length; i++) {
|
||||||
|
String line = lines[i];
|
||||||
|
if (line.toLowerCase().startsWith(headerName.toLowerCase())) {
|
||||||
|
StringBuilder value =
|
||||||
|
new StringBuilder(line.substring(headerName.length()).trim());
|
||||||
|
for (int j = i + 1; j < lines.length; j++) {
|
||||||
|
if (lines[j].startsWith(" ") || lines[j].startsWith("\t")) {
|
||||||
|
value.append(" ").append(lines[j].trim());
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return safeMimeDecode(value.toString());
|
||||||
|
}
|
||||||
|
if (line.trim().isEmpty()) break;
|
||||||
|
}
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
// Ignore errors in header extraction
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String extractHtmlBody(String emlContent) {
|
||||||
|
try {
|
||||||
|
String lowerContent = emlContent.toLowerCase();
|
||||||
|
int htmlStart = lowerContent.indexOf(HEADER_CONTENT_TYPE + " " + TEXT_HTML);
|
||||||
|
if (htmlStart == -1) return null;
|
||||||
|
|
||||||
|
int bodyStart = emlContent.indexOf("\r\n\r\n", htmlStart);
|
||||||
|
if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n", htmlStart);
|
||||||
|
if (bodyStart == -1) return null;
|
||||||
|
|
||||||
|
bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2;
|
||||||
|
int bodyEnd = findPartEnd(emlContent, bodyStart);
|
||||||
|
|
||||||
|
return emlContent.substring(bodyStart, bodyEnd).trim();
|
||||||
|
} catch (Exception e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String extractTextBody(String emlContent) {
|
||||||
|
try {
|
||||||
|
String lowerContent = emlContent.toLowerCase();
|
||||||
|
int textStart = lowerContent.indexOf(HEADER_CONTENT_TYPE + " " + TEXT_PLAIN);
|
||||||
|
if (textStart == -1) {
|
||||||
|
int bodyStart = emlContent.indexOf("\r\n\r\n");
|
||||||
|
if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n");
|
||||||
|
if (bodyStart != -1) {
|
||||||
|
bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2;
|
||||||
|
int bodyEnd = findPartEnd(emlContent, bodyStart);
|
||||||
|
return emlContent.substring(bodyStart, bodyEnd).trim();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bodyStart = emlContent.indexOf("\r\n\r\n", textStart);
|
||||||
|
if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n", textStart);
|
||||||
|
if (bodyStart == -1) return null;
|
||||||
|
|
||||||
|
bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2;
|
||||||
|
int bodyEnd = findPartEnd(emlContent, bodyStart);
|
||||||
|
|
||||||
|
return emlContent.substring(bodyStart, bodyEnd).trim();
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int findPartEnd(String content, int start) {
|
||||||
|
String[] lines = content.substring(start).split("\r?\n");
|
||||||
|
StringBuilder result = new StringBuilder();
|
||||||
|
|
||||||
|
for (String line : lines) {
|
||||||
|
if (line.startsWith("--") && line.length() > 10) break;
|
||||||
|
result.append(line).append("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
return start + result.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<EmailAttachment> extractAttachmentsBasic(String emlContent) {
|
||||||
|
List<EmailAttachment> attachments = new ArrayList<>();
|
||||||
|
try {
|
||||||
|
String[] lines = emlContent.split("\r?\n");
|
||||||
|
boolean inHeaders = true;
|
||||||
|
String currentContentType = "";
|
||||||
|
String currentDisposition = "";
|
||||||
|
String currentFilename = "";
|
||||||
|
String currentEncoding = "";
|
||||||
|
|
||||||
|
for (String line : lines) {
|
||||||
|
String lowerLine = line.toLowerCase().trim();
|
||||||
|
|
||||||
|
if (line.trim().isEmpty()) {
|
||||||
|
inHeaders = false;
|
||||||
|
if (isAttachment(currentDisposition, currentFilename, currentContentType)) {
|
||||||
|
EmailAttachment attachment = new EmailAttachment();
|
||||||
|
attachment.setFilename(currentFilename);
|
||||||
|
attachment.setContentType(currentContentType);
|
||||||
|
attachment.setTransferEncoding(currentEncoding);
|
||||||
|
attachments.add(attachment);
|
||||||
|
}
|
||||||
|
currentContentType = "";
|
||||||
|
currentDisposition = "";
|
||||||
|
currentFilename = "";
|
||||||
|
currentEncoding = "";
|
||||||
|
inHeaders = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!inHeaders) continue;
|
||||||
|
|
||||||
|
if (lowerLine.startsWith(HEADER_CONTENT_TYPE)) {
|
||||||
|
currentContentType = line.substring(HEADER_CONTENT_TYPE.length()).trim();
|
||||||
|
} else if (lowerLine.startsWith(HEADER_CONTENT_DISPOSITION)) {
|
||||||
|
currentDisposition = line.substring(HEADER_CONTENT_DISPOSITION.length()).trim();
|
||||||
|
currentFilename = extractFilenameFromDisposition(currentDisposition);
|
||||||
|
} else if (lowerLine.startsWith(HEADER_CONTENT_TRANSFER_ENCODING)) {
|
||||||
|
currentEncoding =
|
||||||
|
line.substring(HEADER_CONTENT_TRANSFER_ENCODING.length()).trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
// Continue with empty list
|
||||||
|
}
|
||||||
|
return attachments;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isAttachment(String disposition, String filename, String contentType) {
|
||||||
|
return (disposition.toLowerCase().contains(DISPOSITION_ATTACHMENT) && !filename.isEmpty())
|
||||||
|
|| (!filename.isEmpty() && !contentType.toLowerCase().startsWith("text/"))
|
||||||
|
|| (contentType.toLowerCase().contains("application/") && !filename.isEmpty());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String extractFilenameFromDisposition(String disposition) {
|
||||||
|
if (disposition == null || !disposition.contains("filename=")) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle filename*= (RFC 2231 encoded filename)
|
||||||
|
if (disposition.toLowerCase().contains("filename*=")) {
|
||||||
|
int filenameStarStart = disposition.toLowerCase().indexOf("filename*=") + 10;
|
||||||
|
int filenameStarEnd = disposition.indexOf(";", filenameStarStart);
|
||||||
|
if (filenameStarEnd == -1) filenameStarEnd = disposition.length();
|
||||||
|
String extendedFilename =
|
||||||
|
disposition.substring(filenameStarStart, filenameStarEnd).trim();
|
||||||
|
extendedFilename = extendedFilename.replaceAll("^\"|\"$", "");
|
||||||
|
|
||||||
|
if (extendedFilename.contains("'")) {
|
||||||
|
String[] parts = extendedFilename.split("'", 3);
|
||||||
|
if (parts.length == 3) {
|
||||||
|
return EmlProcessingUtils.decodeUrlEncoded(parts[2]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle regular filename=
|
||||||
|
int filenameStart = disposition.toLowerCase().indexOf("filename=") + 9;
|
||||||
|
int filenameEnd = disposition.indexOf(";", filenameStart);
|
||||||
|
if (filenameEnd == -1) filenameEnd = disposition.length();
|
||||||
|
String filename = disposition.substring(filenameStart, filenameEnd).trim();
|
||||||
|
filename = filename.replaceAll("^\"|\"$", "");
|
||||||
|
return safeMimeDecode(filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String safeMimeDecode(String headerValue) {
|
||||||
|
if (headerValue == null || headerValue.trim().isEmpty()) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!mimeUtilityChecked) {
|
||||||
|
synchronized (EmlParser.class) {
|
||||||
|
if (!mimeUtilityChecked) {
|
||||||
|
initializeMimeUtilityDecoding();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mimeUtilityDecodeTextMethod != null) {
|
||||||
|
try {
|
||||||
|
return (String) mimeUtilityDecodeTextMethod.invoke(null, headerValue.trim());
|
||||||
|
} catch (ReflectiveOperationException | RuntimeException e) {
|
||||||
|
// Fall through to custom implementation
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return EmlProcessingUtils.decodeMimeHeader(headerValue.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void initializeMimeUtilityDecoding() {
|
||||||
|
try {
|
||||||
|
Class<?> mimeUtilityClass = Class.forName("jakarta.mail.internet.MimeUtility");
|
||||||
|
mimeUtilityDecodeTextMethod = mimeUtilityClass.getMethod("decodeText", String.class);
|
||||||
|
} catch (ClassNotFoundException | NoSuchMethodException e) {
|
||||||
|
mimeUtilityDecodeTextMethod = null;
|
||||||
|
}
|
||||||
|
mimeUtilityChecked = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Data
|
||||||
|
public static class EmailContent {
|
||||||
|
private String subject;
|
||||||
|
private String from;
|
||||||
|
private String to;
|
||||||
|
private String cc;
|
||||||
|
private String bcc;
|
||||||
|
private Date date;
|
||||||
|
private String dateString; // For basic parsing fallback
|
||||||
|
private String htmlBody;
|
||||||
|
private String textBody;
|
||||||
|
private int attachmentCount;
|
||||||
|
private List<EmailAttachment> attachments = new ArrayList<>();
|
||||||
|
|
||||||
|
public void setHtmlBody(String htmlBody) {
|
||||||
|
this.htmlBody = htmlBody != null ? htmlBody.replaceAll("\r", "") : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTextBody(String textBody) {
|
||||||
|
this.textBody = textBody != null ? textBody.replaceAll("\r", "") : null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Data
|
||||||
|
public static class EmailAttachment {
|
||||||
|
private String filename;
|
||||||
|
private String contentType;
|
||||||
|
private byte[] data;
|
||||||
|
private boolean embedded;
|
||||||
|
private String embeddedFilename;
|
||||||
|
private long sizeBytes;
|
||||||
|
private String contentId;
|
||||||
|
private String disposition;
|
||||||
|
private String transferEncoding;
|
||||||
|
|
||||||
|
public void setData(byte[] data) {
|
||||||
|
this.data = data;
|
||||||
|
if (data != null) {
|
||||||
|
this.sizeBytes = data.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,601 @@
|
|||||||
|
package stirling.software.common.util;
|
||||||
|
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Base64;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import lombok.experimental.UtilityClass;
|
||||||
|
|
||||||
|
import stirling.software.common.model.api.converters.EmlToPdfRequest;
|
||||||
|
import stirling.software.common.model.api.converters.HTMLToPdfRequest;
|
||||||
|
|
||||||
|
@UtilityClass
|
||||||
|
public class EmlProcessingUtils {
|
||||||
|
|
||||||
|
// Style constants
|
||||||
|
private static final int DEFAULT_FONT_SIZE = 12;
|
||||||
|
private static final String DEFAULT_FONT_FAMILY = "Helvetica, sans-serif";
|
||||||
|
private static final float DEFAULT_LINE_HEIGHT = 1.4f;
|
||||||
|
private static final String DEFAULT_ZOOM = "1.0";
|
||||||
|
private static final String DEFAULT_TEXT_COLOR = "#202124";
|
||||||
|
private static final String DEFAULT_BACKGROUND_COLOR = "#ffffff";
|
||||||
|
private static final String DEFAULT_BORDER_COLOR = "#e8eaed";
|
||||||
|
private static final String ATTACHMENT_BACKGROUND_COLOR = "#f9f9f9";
|
||||||
|
private static final String ATTACHMENT_BORDER_COLOR = "#eeeeee";
|
||||||
|
|
||||||
|
private static final int EML_CHECK_LENGTH = 8192;
|
||||||
|
private static final int MIN_HEADER_COUNT_FOR_VALID_EML = 2;
|
||||||
|
|
||||||
|
// MIME type detection
|
||||||
|
private static final Map<String, String> EXTENSION_TO_MIME_TYPE =
|
||||||
|
Map.of(
|
||||||
|
".png", "image/png",
|
||||||
|
".jpg", "image/jpeg",
|
||||||
|
".jpeg", "image/jpeg",
|
||||||
|
".gif", "image/gif",
|
||||||
|
".bmp", "image/bmp",
|
||||||
|
".webp", "image/webp",
|
||||||
|
".svg", "image/svg+xml",
|
||||||
|
".ico", "image/x-icon",
|
||||||
|
".tiff", "image/tiff",
|
||||||
|
".tif", "image/tiff");
|
||||||
|
|
||||||
|
public static void validateEmlInput(byte[] emlBytes) {
|
||||||
|
if (emlBytes == null || emlBytes.length == 0) {
|
||||||
|
throw new IllegalArgumentException("EML file is empty or null");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isInvalidEmlFormat(emlBytes)) {
|
||||||
|
throw new IllegalArgumentException("Invalid EML file format");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isInvalidEmlFormat(byte[] emlBytes) {
|
||||||
|
try {
|
||||||
|
int checkLength = Math.min(emlBytes.length, EML_CHECK_LENGTH);
|
||||||
|
String content;
|
||||||
|
|
||||||
|
try {
|
||||||
|
content = new String(emlBytes, 0, checkLength, StandardCharsets.UTF_8);
|
||||||
|
if (content.contains("\uFFFD")) {
|
||||||
|
content = new String(emlBytes, 0, checkLength, StandardCharsets.ISO_8859_1);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
content = new String(emlBytes, 0, checkLength, StandardCharsets.ISO_8859_1);
|
||||||
|
}
|
||||||
|
|
||||||
|
String lowerContent = content.toLowerCase(Locale.ROOT);
|
||||||
|
|
||||||
|
boolean hasFrom =
|
||||||
|
lowerContent.contains("from:") || lowerContent.contains("return-path:");
|
||||||
|
boolean hasSubject = lowerContent.contains("subject:");
|
||||||
|
boolean hasMessageId = lowerContent.contains("message-id:");
|
||||||
|
boolean hasDate = lowerContent.contains("date:");
|
||||||
|
boolean hasTo =
|
||||||
|
lowerContent.contains("to:")
|
||||||
|
|| lowerContent.contains("cc:")
|
||||||
|
|| lowerContent.contains("bcc:");
|
||||||
|
boolean hasMimeStructure =
|
||||||
|
lowerContent.contains("multipart/")
|
||||||
|
|| lowerContent.contains("text/plain")
|
||||||
|
|| lowerContent.contains("text/html")
|
||||||
|
|| lowerContent.contains("boundary=");
|
||||||
|
|
||||||
|
int headerCount = 0;
|
||||||
|
if (hasFrom) headerCount++;
|
||||||
|
if (hasSubject) headerCount++;
|
||||||
|
if (hasMessageId) headerCount++;
|
||||||
|
if (hasDate) headerCount++;
|
||||||
|
if (hasTo) headerCount++;
|
||||||
|
|
||||||
|
return headerCount < MIN_HEADER_COUNT_FOR_VALID_EML && !hasMimeStructure;
|
||||||
|
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String generateEnhancedEmailHtml(
|
||||||
|
EmlParser.EmailContent content,
|
||||||
|
EmlToPdfRequest request,
|
||||||
|
CustomHtmlSanitizer customHtmlSanitizer) {
|
||||||
|
StringBuilder html = new StringBuilder();
|
||||||
|
|
||||||
|
html.append(
|
||||||
|
String.format(
|
||||||
|
"""
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en"><head><meta charset="UTF-8">
|
||||||
|
<title>%s</title>
|
||||||
|
<style>
|
||||||
|
""",
|
||||||
|
sanitizeText(content.getSubject(), customHtmlSanitizer)));
|
||||||
|
|
||||||
|
appendEnhancedStyles(html);
|
||||||
|
|
||||||
|
html.append(
|
||||||
|
"""
|
||||||
|
</style>
|
||||||
|
</head><body>
|
||||||
|
""");
|
||||||
|
|
||||||
|
html.append(
|
||||||
|
String.format(
|
||||||
|
"""
|
||||||
|
<div class="email-container">
|
||||||
|
<div class="email-header">
|
||||||
|
<h1>%s</h1>
|
||||||
|
<div class="email-meta">
|
||||||
|
<div><strong>From:</strong> %s</div>
|
||||||
|
<div><strong>To:</strong> %s</div>
|
||||||
|
""",
|
||||||
|
sanitizeText(content.getSubject(), customHtmlSanitizer),
|
||||||
|
sanitizeText(content.getFrom(), customHtmlSanitizer),
|
||||||
|
sanitizeText(content.getTo(), customHtmlSanitizer)));
|
||||||
|
|
||||||
|
if (content.getCc() != null && !content.getCc().trim().isEmpty()) {
|
||||||
|
html.append(
|
||||||
|
String.format(
|
||||||
|
"<div><strong>CC:</strong> %s</div>\n",
|
||||||
|
sanitizeText(content.getCc(), customHtmlSanitizer)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (content.getBcc() != null && !content.getBcc().trim().isEmpty()) {
|
||||||
|
html.append(
|
||||||
|
String.format(
|
||||||
|
"<div><strong>BCC:</strong> %s</div>\n",
|
||||||
|
sanitizeText(content.getBcc(), customHtmlSanitizer)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (content.getDate() != null) {
|
||||||
|
html.append(
|
||||||
|
String.format(
|
||||||
|
"<div><strong>Date:</strong> %s</div>\n",
|
||||||
|
PdfAttachmentHandler.formatEmailDate(content.getDate())));
|
||||||
|
} else if (content.getDateString() != null && !content.getDateString().trim().isEmpty()) {
|
||||||
|
html.append(
|
||||||
|
String.format(
|
||||||
|
"<div><strong>Date:</strong> %s</div>\n",
|
||||||
|
sanitizeText(content.getDateString(), customHtmlSanitizer)));
|
||||||
|
}
|
||||||
|
|
||||||
|
html.append("</div></div>\n");
|
||||||
|
|
||||||
|
html.append("<div class=\"email-body\">\n");
|
||||||
|
if (content.getHtmlBody() != null && !content.getHtmlBody().trim().isEmpty()) {
|
||||||
|
String processedHtml =
|
||||||
|
processEmailHtmlBody(content.getHtmlBody(), content, customHtmlSanitizer);
|
||||||
|
html.append(processedHtml);
|
||||||
|
} else if (content.getTextBody() != null && !content.getTextBody().trim().isEmpty()) {
|
||||||
|
html.append(
|
||||||
|
String.format(
|
||||||
|
"<div class=\"text-body\">%s</div>",
|
||||||
|
convertTextToHtml(content.getTextBody(), customHtmlSanitizer)));
|
||||||
|
} else {
|
||||||
|
html.append("<div class=\"no-content\"><p><em>No content available</em></p></div>");
|
||||||
|
}
|
||||||
|
html.append("</div>\n");
|
||||||
|
|
||||||
|
if (content.getAttachmentCount() > 0 || !content.getAttachments().isEmpty()) {
|
||||||
|
appendAttachmentsSection(html, content, request, customHtmlSanitizer);
|
||||||
|
}
|
||||||
|
|
||||||
|
html.append("</div>\n</body></html>");
|
||||||
|
return html.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String processEmailHtmlBody(
|
||||||
|
String htmlBody,
|
||||||
|
EmlParser.EmailContent emailContent,
|
||||||
|
CustomHtmlSanitizer customHtmlSanitizer) {
|
||||||
|
if (htmlBody == null) return "";
|
||||||
|
|
||||||
|
String processed =
|
||||||
|
customHtmlSanitizer != null ? customHtmlSanitizer.sanitize(htmlBody) : htmlBody;
|
||||||
|
|
||||||
|
processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*fixed[^;]*;?", "");
|
||||||
|
processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*absolute[^;]*;?", "");
|
||||||
|
|
||||||
|
if (emailContent != null && !emailContent.getAttachments().isEmpty()) {
|
||||||
|
processed = PdfAttachmentHandler.processInlineImages(processed, emailContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
return processed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String convertTextToHtml(
|
||||||
|
String textBody, CustomHtmlSanitizer customHtmlSanitizer) {
|
||||||
|
if (textBody == null) return "";
|
||||||
|
|
||||||
|
String html =
|
||||||
|
customHtmlSanitizer != null
|
||||||
|
? customHtmlSanitizer.sanitize(textBody)
|
||||||
|
: escapeHtml(textBody);
|
||||||
|
|
||||||
|
html = html.replace("\r\n", "\n").replace("\r", "\n");
|
||||||
|
html = html.replace("\n", "<br>\n");
|
||||||
|
|
||||||
|
html =
|
||||||
|
html.replaceAll(
|
||||||
|
"(https?://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+)",
|
||||||
|
"<a href=\"$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
|
||||||
|
|
||||||
|
html =
|
||||||
|
html.replaceAll(
|
||||||
|
"([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63})",
|
||||||
|
"<a href=\"mailto:$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
|
||||||
|
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void appendEnhancedStyles(StringBuilder html) {
|
||||||
|
String css =
|
||||||
|
String.format(
|
||||||
|
"""
|
||||||
|
body {
|
||||||
|
font-family: %s;
|
||||||
|
font-size: %dpx;
|
||||||
|
line-height: %s;
|
||||||
|
color: %s;
|
||||||
|
margin: 0;
|
||||||
|
padding: 16px;
|
||||||
|
background-color: %s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.email-container {
|
||||||
|
width: 100%%;
|
||||||
|
max-width: 100%%;
|
||||||
|
margin: 0 auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.email-header {
|
||||||
|
padding-bottom: 10px;
|
||||||
|
border-bottom: 1px solid %s;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.email-header h1 {
|
||||||
|
margin: 0 0 10px 0;
|
||||||
|
font-size: %dpx;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.email-meta div {
|
||||||
|
margin-bottom: 2px;
|
||||||
|
font-size: %dpx;
|
||||||
|
}
|
||||||
|
|
||||||
|
.email-body {
|
||||||
|
word-wrap: break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
.attachment-section {
|
||||||
|
margin-top: 15px;
|
||||||
|
padding: 10px;
|
||||||
|
background-color: %s;
|
||||||
|
border: 1px solid %s;
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.attachment-section h3 {
|
||||||
|
margin: 0 0 8px 0;
|
||||||
|
font-size: %dpx;
|
||||||
|
}
|
||||||
|
|
||||||
|
.attachment-item {
|
||||||
|
padding: 5px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.attachment-icon {
|
||||||
|
margin-right: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.attachment-details, .attachment-type {
|
||||||
|
font-size: %dpx;
|
||||||
|
color: #555555;
|
||||||
|
}
|
||||||
|
|
||||||
|
.attachment-inclusion-note, .attachment-info-note {
|
||||||
|
margin-top: 8px;
|
||||||
|
padding: 6px;
|
||||||
|
font-size: %dpx;
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.attachment-inclusion-note {
|
||||||
|
background-color: #e6ffed;
|
||||||
|
border: 1px solid #d4f7dc;
|
||||||
|
color: #006420;
|
||||||
|
}
|
||||||
|
|
||||||
|
.attachment-info-note {
|
||||||
|
background-color: #fff9e6;
|
||||||
|
border: 1px solid #fff0c2;
|
||||||
|
color: #664d00;
|
||||||
|
}
|
||||||
|
|
||||||
|
.attachment-link-container {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
padding: 8px;
|
||||||
|
background-color: #f8f9fa;
|
||||||
|
border: 1px solid #dee2e6;
|
||||||
|
border-radius: 4px;
|
||||||
|
margin: 4px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.attachment-link-container:hover {
|
||||||
|
background-color: #e9ecef;
|
||||||
|
}
|
||||||
|
|
||||||
|
.attachment-note {
|
||||||
|
font-size: %dpx;
|
||||||
|
color: #6c757d;
|
||||||
|
font-style: italic;
|
||||||
|
margin-left: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.no-content {
|
||||||
|
padding: 20px;
|
||||||
|
text-align: center;
|
||||||
|
color: #666;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
.text-body {
|
||||||
|
white-space: pre-wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
img {
|
||||||
|
max-width: 100%%;
|
||||||
|
height: auto;
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
DEFAULT_FONT_FAMILY,
|
||||||
|
DEFAULT_FONT_SIZE,
|
||||||
|
DEFAULT_LINE_HEIGHT,
|
||||||
|
DEFAULT_TEXT_COLOR,
|
||||||
|
DEFAULT_BACKGROUND_COLOR,
|
||||||
|
DEFAULT_BORDER_COLOR,
|
||||||
|
DEFAULT_FONT_SIZE + 4,
|
||||||
|
DEFAULT_FONT_SIZE - 1,
|
||||||
|
ATTACHMENT_BACKGROUND_COLOR,
|
||||||
|
ATTACHMENT_BORDER_COLOR,
|
||||||
|
DEFAULT_FONT_SIZE + 1,
|
||||||
|
DEFAULT_FONT_SIZE - 2,
|
||||||
|
DEFAULT_FONT_SIZE - 2,
|
||||||
|
DEFAULT_FONT_SIZE - 3);
|
||||||
|
|
||||||
|
html.append(css);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void appendAttachmentsSection(
|
||||||
|
StringBuilder html,
|
||||||
|
EmlParser.EmailContent content,
|
||||||
|
EmlToPdfRequest request,
|
||||||
|
CustomHtmlSanitizer customHtmlSanitizer) {
|
||||||
|
html.append("<div class=\"attachment-section\">\n");
|
||||||
|
int displayedAttachmentCount =
|
||||||
|
content.getAttachmentCount() > 0
|
||||||
|
? content.getAttachmentCount()
|
||||||
|
: content.getAttachments().size();
|
||||||
|
html.append("<h3>Attachments (").append(displayedAttachmentCount).append(")</h3>\n");
|
||||||
|
|
||||||
|
if (!content.getAttachments().isEmpty()) {
|
||||||
|
for (int i = 0; i < content.getAttachments().size(); i++) {
|
||||||
|
EmlParser.EmailAttachment attachment = content.getAttachments().get(i);
|
||||||
|
|
||||||
|
String embeddedFilename =
|
||||||
|
attachment.getFilename() != null
|
||||||
|
? attachment.getFilename()
|
||||||
|
: ("attachment_" + i);
|
||||||
|
attachment.setEmbeddedFilename(embeddedFilename);
|
||||||
|
|
||||||
|
String sizeStr = GeneralUtils.formatBytes(attachment.getSizeBytes());
|
||||||
|
String contentType =
|
||||||
|
attachment.getContentType() != null
|
||||||
|
&& !attachment.getContentType().isEmpty()
|
||||||
|
? ", " + escapeHtml(attachment.getContentType())
|
||||||
|
: "";
|
||||||
|
|
||||||
|
String attachmentId = "attachment_" + i;
|
||||||
|
html.append(
|
||||||
|
String.format(
|
||||||
|
"""
|
||||||
|
<div class="attachment-item" id="%s">
|
||||||
|
<span class="attachment-icon" data-filename="%s">@</span>
|
||||||
|
<span class="attachment-name">%s</span>
|
||||||
|
<span class="attachment-details">(%s%s)</span>
|
||||||
|
</div>
|
||||||
|
""",
|
||||||
|
attachmentId,
|
||||||
|
escapeHtml(embeddedFilename),
|
||||||
|
escapeHtml(EmlParser.safeMimeDecode(attachment.getFilename())),
|
||||||
|
sizeStr,
|
||||||
|
contentType));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (request != null && request.isIncludeAttachments()) {
|
||||||
|
html.append(
|
||||||
|
"""
|
||||||
|
<div class="attachment-info-note">
|
||||||
|
<p><em>Attachments are embedded in the file.</em></p>
|
||||||
|
</div>
|
||||||
|
""");
|
||||||
|
} else {
|
||||||
|
html.append(
|
||||||
|
"""
|
||||||
|
<div class="attachment-info-note">
|
||||||
|
<p><em>Attachment information displayed - files not included in PDF.</em></p>
|
||||||
|
</div>
|
||||||
|
""");
|
||||||
|
}
|
||||||
|
html.append("</div>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HTMLToPdfRequest createHtmlRequest(EmlToPdfRequest request) {
|
||||||
|
HTMLToPdfRequest htmlRequest = new HTMLToPdfRequest();
|
||||||
|
|
||||||
|
if (request != null) {
|
||||||
|
htmlRequest.setFileInput(request.getFileInput());
|
||||||
|
}
|
||||||
|
|
||||||
|
htmlRequest.setZoom(Float.parseFloat(DEFAULT_ZOOM));
|
||||||
|
return htmlRequest;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String detectMimeType(String filename, String existingMimeType) {
|
||||||
|
if (existingMimeType != null && !existingMimeType.isEmpty()) {
|
||||||
|
return existingMimeType;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filename != null) {
|
||||||
|
String lowerFilename = filename.toLowerCase();
|
||||||
|
for (Map.Entry<String, String> entry : EXTENSION_TO_MIME_TYPE.entrySet()) {
|
||||||
|
if (lowerFilename.endsWith(entry.getKey())) {
|
||||||
|
return entry.getValue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "image/png";
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String decodeUrlEncoded(String encoded) {
|
||||||
|
try {
|
||||||
|
return java.net.URLDecoder.decode(encoded, StandardCharsets.UTF_8);
|
||||||
|
} catch (Exception e) {
|
||||||
|
return encoded; // Return original if decoding fails
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String decodeMimeHeader(String encodedText) {
|
||||||
|
if (encodedText == null || encodedText.trim().isEmpty()) {
|
||||||
|
return encodedText;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
StringBuilder result = new StringBuilder();
|
||||||
|
Pattern concatenatedPattern =
|
||||||
|
Pattern.compile(
|
||||||
|
"(=\\?[^?]+\\?[BbQq]\\?[^?]*\\?=)(\\s*=\\?[^?]+\\?[BbQq]\\?[^?]*\\?=)+");
|
||||||
|
Matcher concatenatedMatcher = concatenatedPattern.matcher(encodedText);
|
||||||
|
String processedText =
|
||||||
|
concatenatedMatcher.replaceAll(
|
||||||
|
match -> match.group().replaceAll("\\s+(?==\\?)", ""));
|
||||||
|
|
||||||
|
Pattern mimePattern = Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
|
||||||
|
Matcher matcher = mimePattern.matcher(processedText);
|
||||||
|
int lastEnd = 0;
|
||||||
|
|
||||||
|
while (matcher.find()) {
|
||||||
|
result.append(processedText, lastEnd, matcher.start());
|
||||||
|
|
||||||
|
String charset = matcher.group(1);
|
||||||
|
String encoding = matcher.group(2).toUpperCase();
|
||||||
|
String encodedValue = matcher.group(3);
|
||||||
|
|
||||||
|
try {
|
||||||
|
String decodedValue =
|
||||||
|
switch (encoding) {
|
||||||
|
case "B" -> {
|
||||||
|
String cleanBase64 = encodedValue.replaceAll("\\s", "");
|
||||||
|
byte[] decodedBytes = Base64.getDecoder().decode(cleanBase64);
|
||||||
|
Charset targetCharset;
|
||||||
|
try {
|
||||||
|
targetCharset = Charset.forName(charset);
|
||||||
|
} catch (Exception e) {
|
||||||
|
targetCharset = StandardCharsets.UTF_8;
|
||||||
|
}
|
||||||
|
yield new String(decodedBytes, targetCharset);
|
||||||
|
}
|
||||||
|
case "Q" -> decodeQuotedPrintable(encodedValue, charset);
|
||||||
|
default -> matcher.group(0); // Return original if unknown encoding
|
||||||
|
};
|
||||||
|
result.append(decodedValue);
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
result.append(matcher.group(0)); // Keep original on decode error
|
||||||
|
}
|
||||||
|
|
||||||
|
lastEnd = matcher.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
result.append(processedText.substring(lastEnd));
|
||||||
|
return result.toString();
|
||||||
|
} catch (Exception e) {
|
||||||
|
return encodedText; // Return original on any parsing error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String decodeQuotedPrintable(String encodedText, String charset) {
|
||||||
|
StringBuilder result = new StringBuilder();
|
||||||
|
for (int i = 0; i < encodedText.length(); i++) {
|
||||||
|
char c = encodedText.charAt(i);
|
||||||
|
switch (c) {
|
||||||
|
case '=' -> {
|
||||||
|
if (i + 2 < encodedText.length()) {
|
||||||
|
String hex = encodedText.substring(i + 1, i + 3);
|
||||||
|
try {
|
||||||
|
int value = Integer.parseInt(hex, 16);
|
||||||
|
result.append((char) value);
|
||||||
|
i += 2;
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
result.append(c);
|
||||||
|
}
|
||||||
|
} else if (i + 1 == encodedText.length()
|
||||||
|
|| (i + 2 == encodedText.length()
|
||||||
|
&& encodedText.charAt(i + 1) == '\n')) {
|
||||||
|
if (i + 1 < encodedText.length() && encodedText.charAt(i + 1) == '\n') {
|
||||||
|
i++; // Skip the newline too
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result.append(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case '_' -> result.append(' '); // Space encoding in Q encoding
|
||||||
|
default -> result.append(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] bytes = result.toString().getBytes(StandardCharsets.ISO_8859_1);
|
||||||
|
try {
|
||||||
|
Charset targetCharset = Charset.forName(charset);
|
||||||
|
return new String(bytes, targetCharset);
|
||||||
|
} catch (Exception e) {
|
||||||
|
try {
|
||||||
|
return new String(bytes, StandardCharsets.UTF_8);
|
||||||
|
} catch (Exception fallbackException) {
|
||||||
|
return new String(bytes, StandardCharsets.ISO_8859_1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String escapeHtml(String text) {
|
||||||
|
if (text == null) return "";
|
||||||
|
return text.replace("&", "&")
|
||||||
|
.replace("<", "<")
|
||||||
|
.replace(">", ">")
|
||||||
|
.replace("\"", """)
|
||||||
|
.replace("'", "'");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String sanitizeText(String text, CustomHtmlSanitizer customHtmlSanitizer) {
|
||||||
|
if (customHtmlSanitizer != null) {
|
||||||
|
return customHtmlSanitizer.sanitize(text);
|
||||||
|
} else {
|
||||||
|
return escapeHtml(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String simplifyHtmlContent(String htmlContent) {
|
||||||
|
String simplified = htmlContent.replaceAll("(?i)<script[^>]*>.*?</script>", "");
|
||||||
|
simplified = simplified.replaceAll("(?i)<style[^>]*>.*?</style>", "");
|
||||||
|
return simplified;
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,667 @@
|
|||||||
|
package stirling.software.common.util;
|
||||||
|
|
||||||
|
import static stirling.software.common.util.AttachmentUtils.setCatalogViewerPreferences;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Base64;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.GregorianCalendar;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TimeZone;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
|
import org.apache.pdfbox.pdmodel.PageMode;
|
||||||
|
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||||
|
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
|
||||||
|
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
|
||||||
|
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment;
|
||||||
|
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary;
|
||||||
|
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
|
||||||
|
import org.apache.pdfbox.text.PDFTextStripper;
|
||||||
|
import org.apache.pdfbox.text.TextPosition;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.experimental.UtilityClass;
|
||||||
|
|
||||||
|
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||||
|
|
||||||
|
@UtilityClass
|
||||||
|
public class PdfAttachmentHandler {
|
||||||
|
// Note: This class is designed for EML attachments, not general PDF attachments.
|
||||||
|
|
||||||
|
private static final String ATTACHMENT_MARKER = "@";
|
||||||
|
private static final float ATTACHMENT_ICON_WIDTH = 12f;
|
||||||
|
private static final float ATTACHMENT_ICON_HEIGHT = 14f;
|
||||||
|
private static final float ANNOTATION_X_OFFSET = 2f;
|
||||||
|
private static final float ANNOTATION_Y_OFFSET = 10f;
|
||||||
|
|
||||||
|
public static byte[] attachFilesToPdf(
|
||||||
|
byte[] pdfBytes,
|
||||||
|
List<EmlParser.EmailAttachment> attachments,
|
||||||
|
CustomPDFDocumentFactory pdfDocumentFactory)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
if (attachments == null || attachments.isEmpty()) {
|
||||||
|
return pdfBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
try (PDDocument document = pdfDocumentFactory.load(pdfBytes);
|
||||||
|
ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
|
||||||
|
|
||||||
|
List<MultipartFile> multipartAttachments = new ArrayList<>();
|
||||||
|
for (int i = 0; i < attachments.size(); i++) {
|
||||||
|
EmlParser.EmailAttachment attachment = attachments.get(i);
|
||||||
|
if (attachment.getData() != null && attachment.getData().length > 0) {
|
||||||
|
String embeddedFilename =
|
||||||
|
attachment.getFilename() != null
|
||||||
|
? attachment.getFilename()
|
||||||
|
: ("attachment_" + i);
|
||||||
|
attachment.setEmbeddedFilename(embeddedFilename);
|
||||||
|
multipartAttachments.add(createMultipartFile(attachment));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!multipartAttachments.isEmpty()) {
|
||||||
|
Map<Integer, String> indexToFilenameMap =
|
||||||
|
addAttachmentsToDocumentWithMapping(
|
||||||
|
document, multipartAttachments, attachments);
|
||||||
|
setCatalogViewerPreferences(document, PageMode.USE_ATTACHMENTS);
|
||||||
|
addAttachmentAnnotationsToDocumentWithMapping(
|
||||||
|
document, attachments, indexToFilenameMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
document.save(outputStream);
|
||||||
|
return outputStream.toByteArray();
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
throw new IOException(
|
||||||
|
"Invalid PDF structure or processing error: " + e.getMessage(), e);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IOException("Error attaching files to PDF: " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static MultipartFile createMultipartFile(EmlParser.EmailAttachment attachment) {
|
||||||
|
return new MultipartFile() {
|
||||||
|
@Override
|
||||||
|
public @NotNull String getName() {
|
||||||
|
return "attachment";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getOriginalFilename() {
|
||||||
|
return attachment.getFilename() != null
|
||||||
|
? attachment.getFilename()
|
||||||
|
: "attachment_" + System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getContentType() {
|
||||||
|
return attachment.getContentType() != null
|
||||||
|
? attachment.getContentType()
|
||||||
|
: "application/octet-stream";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return attachment.getData() == null || attachment.getData().length == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSize() {
|
||||||
|
return attachment.getData() != null ? attachment.getData().length : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte @NotNull [] getBytes() {
|
||||||
|
return attachment.getData() != null ? attachment.getData() : new byte[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public @NotNull InputStream getInputStream() {
|
||||||
|
byte[] data = attachment.getData();
|
||||||
|
return new ByteArrayInputStream(data != null ? data : new byte[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void transferTo(@NotNull File dest) throws IOException, IllegalStateException {
|
||||||
|
try (FileOutputStream fos = new FileOutputStream(dest)) {
|
||||||
|
byte[] data = attachment.getData();
|
||||||
|
if (data != null) {
|
||||||
|
fos.write(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String ensureUniqueFilename(String filename, Set<String> existingNames) {
|
||||||
|
if (!existingNames.contains(filename)) {
|
||||||
|
return filename;
|
||||||
|
}
|
||||||
|
|
||||||
|
String baseName;
|
||||||
|
String extension = "";
|
||||||
|
int lastDot = filename.lastIndexOf('.');
|
||||||
|
if (lastDot > 0) {
|
||||||
|
baseName = filename.substring(0, lastDot);
|
||||||
|
extension = filename.substring(lastDot);
|
||||||
|
} else {
|
||||||
|
baseName = filename;
|
||||||
|
}
|
||||||
|
|
||||||
|
int counter = 1;
|
||||||
|
String uniqueName;
|
||||||
|
do {
|
||||||
|
uniqueName = baseName + "_" + counter + extension;
|
||||||
|
counter++;
|
||||||
|
} while (existingNames.contains(uniqueName));
|
||||||
|
|
||||||
|
return uniqueName;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static @NotNull PDRectangle calculateAnnotationRectangle(
|
||||||
|
PDPage page, float x, float y) {
|
||||||
|
PDRectangle cropBox = page.getCropBox();
|
||||||
|
|
||||||
|
// ISO 32000-1:2008 Section 8.3: PDF coordinate system transforms
|
||||||
|
int rotation = page.getRotation();
|
||||||
|
float pdfX = x;
|
||||||
|
float pdfY = cropBox.getHeight() - y;
|
||||||
|
|
||||||
|
switch (rotation) {
|
||||||
|
case 90 -> {
|
||||||
|
float temp = pdfX;
|
||||||
|
pdfX = pdfY;
|
||||||
|
pdfY = cropBox.getWidth() - temp;
|
||||||
|
}
|
||||||
|
case 180 -> {
|
||||||
|
pdfX = cropBox.getWidth() - pdfX;
|
||||||
|
pdfY = y;
|
||||||
|
}
|
||||||
|
case 270 -> {
|
||||||
|
float temp = pdfX;
|
||||||
|
pdfX = cropBox.getHeight() - pdfY;
|
||||||
|
pdfY = temp;
|
||||||
|
}
|
||||||
|
default -> {}
|
||||||
|
}
|
||||||
|
|
||||||
|
float iconHeight = ATTACHMENT_ICON_HEIGHT;
|
||||||
|
float paddingX = 2.0f;
|
||||||
|
float paddingY = 2.0f;
|
||||||
|
|
||||||
|
PDRectangle rect =
|
||||||
|
new PDRectangle(
|
||||||
|
pdfX + ANNOTATION_X_OFFSET + paddingX,
|
||||||
|
pdfY - iconHeight + ANNOTATION_Y_OFFSET + paddingY,
|
||||||
|
ATTACHMENT_ICON_WIDTH,
|
||||||
|
iconHeight);
|
||||||
|
|
||||||
|
PDRectangle mediaBox = page.getMediaBox();
|
||||||
|
if (rect.getLowerLeftX() < mediaBox.getLowerLeftX()
|
||||||
|
|| rect.getLowerLeftY() < mediaBox.getLowerLeftY()
|
||||||
|
|| rect.getUpperRightX() > mediaBox.getUpperRightX()
|
||||||
|
|| rect.getUpperRightY() > mediaBox.getUpperRightY()) {
|
||||||
|
|
||||||
|
float adjustedX =
|
||||||
|
Math.max(
|
||||||
|
mediaBox.getLowerLeftX(),
|
||||||
|
Math.min(
|
||||||
|
rect.getLowerLeftX(),
|
||||||
|
mediaBox.getUpperRightX() - rect.getWidth()));
|
||||||
|
float adjustedY =
|
||||||
|
Math.max(
|
||||||
|
mediaBox.getLowerLeftY(),
|
||||||
|
Math.min(
|
||||||
|
rect.getLowerLeftY(),
|
||||||
|
mediaBox.getUpperRightY() - rect.getHeight()));
|
||||||
|
rect = new PDRectangle(adjustedX, adjustedY, rect.getWidth(), rect.getHeight());
|
||||||
|
}
|
||||||
|
|
||||||
|
return rect;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String processInlineImages(
|
||||||
|
String htmlContent, EmlParser.EmailContent emailContent) {
|
||||||
|
if (htmlContent == null || emailContent == null) return htmlContent;
|
||||||
|
|
||||||
|
Map<String, EmlParser.EmailAttachment> contentIdMap = new HashMap<>();
|
||||||
|
for (EmlParser.EmailAttachment attachment : emailContent.getAttachments()) {
|
||||||
|
if (attachment.isEmbedded()
|
||||||
|
&& attachment.getContentId() != null
|
||||||
|
&& attachment.getData() != null) {
|
||||||
|
contentIdMap.put(attachment.getContentId(), attachment);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (contentIdMap.isEmpty()) return htmlContent;
|
||||||
|
|
||||||
|
Pattern cidPattern =
|
||||||
|
Pattern.compile(
|
||||||
|
"(?i)<img[^>]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>",
|
||||||
|
Pattern.CASE_INSENSITIVE);
|
||||||
|
Matcher matcher = cidPattern.matcher(htmlContent);
|
||||||
|
|
||||||
|
StringBuilder result = new StringBuilder();
|
||||||
|
while (matcher.find()) {
|
||||||
|
String contentId = matcher.group(1);
|
||||||
|
EmlParser.EmailAttachment attachment = contentIdMap.get(contentId);
|
||||||
|
|
||||||
|
if (attachment != null && attachment.getData() != null) {
|
||||||
|
String mimeType =
|
||||||
|
EmlProcessingUtils.detectMimeType(
|
||||||
|
attachment.getFilename(), attachment.getContentType());
|
||||||
|
|
||||||
|
String base64Data = Base64.getEncoder().encodeToString(attachment.getData());
|
||||||
|
String dataUri = "data:" + mimeType + ";base64," + base64Data;
|
||||||
|
|
||||||
|
String replacement =
|
||||||
|
matcher.group(0).replaceFirst("cid:" + Pattern.quote(contentId), dataUri);
|
||||||
|
matcher.appendReplacement(result, Matcher.quoteReplacement(replacement));
|
||||||
|
} else {
|
||||||
|
matcher.appendReplacement(result, Matcher.quoteReplacement(matcher.group(0)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
matcher.appendTail(result);
|
||||||
|
|
||||||
|
return result.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String formatEmailDate(Date date) {
|
||||||
|
if (date == null) return "";
|
||||||
|
|
||||||
|
SimpleDateFormat formatter =
|
||||||
|
new SimpleDateFormat("EEE, MMM d, yyyy 'at' h:mm a z", Locale.ENGLISH);
|
||||||
|
formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||||
|
return formatter.format(date);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Data
|
||||||
|
public static class MarkerPosition {
|
||||||
|
private int pageIndex;
|
||||||
|
private float x;
|
||||||
|
private float y;
|
||||||
|
private String character;
|
||||||
|
private String filename;
|
||||||
|
|
||||||
|
public MarkerPosition(int pageIndex, float x, float y, String character, String filename) {
|
||||||
|
this.pageIndex = pageIndex;
|
||||||
|
this.x = x;
|
||||||
|
this.y = y;
|
||||||
|
this.character = character;
|
||||||
|
this.filename = filename;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class AttachmentMarkerPositionFinder extends PDFTextStripper {
|
||||||
|
@Getter private final List<MarkerPosition> positions = new ArrayList<>();
|
||||||
|
private int currentPageIndex;
|
||||||
|
protected boolean sortByPosition;
|
||||||
|
private boolean isInAttachmentSection;
|
||||||
|
private boolean attachmentSectionFound;
|
||||||
|
private final StringBuilder currentText = new StringBuilder();
|
||||||
|
|
||||||
|
private static final Pattern ATTACHMENT_SECTION_PATTERN =
|
||||||
|
Pattern.compile("attachments\\s*\\(\\d+\\)", Pattern.CASE_INSENSITIVE);
|
||||||
|
|
||||||
|
private static final Pattern FILENAME_PATTERN =
|
||||||
|
Pattern.compile("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)");
|
||||||
|
|
||||||
|
public AttachmentMarkerPositionFinder() {
|
||||||
|
super();
|
||||||
|
this.currentPageIndex = 0;
|
||||||
|
this.sortByPosition = false; // Disable sorting to preserve document order
|
||||||
|
this.isInAttachmentSection = false;
|
||||||
|
this.attachmentSectionFound = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getText(PDDocument document) throws IOException {
|
||||||
|
super.getText(document);
|
||||||
|
|
||||||
|
if (sortByPosition) {
|
||||||
|
positions.sort(
|
||||||
|
(a, b) -> {
|
||||||
|
int pageCompare = Integer.compare(a.getPageIndex(), b.getPageIndex());
|
||||||
|
if (pageCompare != 0) return pageCompare;
|
||||||
|
return Float.compare(
|
||||||
|
b.getY(), a.getY()); // Descending Y per PDF coordinate system
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""; // Return empty string as we only need positions
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void startPage(PDPage page) throws IOException {
|
||||||
|
super.startPage(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void endPage(PDPage page) throws IOException {
|
||||||
|
currentPageIndex++;
|
||||||
|
super.endPage(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void writeString(String string, List<TextPosition> textPositions)
|
||||||
|
throws IOException {
|
||||||
|
String lowerString = string.toLowerCase();
|
||||||
|
|
||||||
|
if (ATTACHMENT_SECTION_PATTERN.matcher(lowerString).find()) {
|
||||||
|
isInAttachmentSection = true;
|
||||||
|
attachmentSectionFound = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isInAttachmentSection
|
||||||
|
&& (lowerString.contains("</body>")
|
||||||
|
|| lowerString.contains("</html>")
|
||||||
|
|| (attachmentSectionFound
|
||||||
|
&& lowerString.trim().isEmpty()
|
||||||
|
&& string.length() > 50))) {
|
||||||
|
isInAttachmentSection = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isInAttachmentSection) {
|
||||||
|
currentText.append(string);
|
||||||
|
|
||||||
|
for (int i = 0; (i = string.indexOf(ATTACHMENT_MARKER, i)) != -1; i++) {
|
||||||
|
if (i < textPositions.size()) {
|
||||||
|
TextPosition textPosition = textPositions.get(i);
|
||||||
|
|
||||||
|
String filename = extractFilenameAfterMarker(string, i);
|
||||||
|
|
||||||
|
MarkerPosition position =
|
||||||
|
new MarkerPosition(
|
||||||
|
currentPageIndex,
|
||||||
|
textPosition.getXDirAdj(),
|
||||||
|
textPosition.getYDirAdj(),
|
||||||
|
ATTACHMENT_MARKER,
|
||||||
|
filename);
|
||||||
|
positions.add(position);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
super.writeString(string, textPositions);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setSortByPosition(boolean sortByPosition) {
|
||||||
|
this.sortByPosition = sortByPosition;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractFilenameAfterMarker(String text, int markerIndex) {
|
||||||
|
String afterMarker = text.substring(markerIndex + 1);
|
||||||
|
|
||||||
|
Matcher matcher = FILENAME_PATTERN.matcher("@" + afterMarker);
|
||||||
|
if (matcher.find()) {
|
||||||
|
return matcher.group(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] parts = afterMarker.split("[\\s\\(\\)]+");
|
||||||
|
for (String part : parts) {
|
||||||
|
part = part.trim();
|
||||||
|
if (part.length() > 3 && part.contains(".")) {
|
||||||
|
return part;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Map<Integer, String> addAttachmentsToDocumentWithMapping(
|
||||||
|
PDDocument document,
|
||||||
|
List<MultipartFile> attachments,
|
||||||
|
List<EmlParser.EmailAttachment> originalAttachments)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
PDDocumentCatalog catalog = document.getDocumentCatalog();
|
||||||
|
|
||||||
|
if (catalog == null) {
|
||||||
|
throw new IOException("PDF document catalog is not accessible");
|
||||||
|
}
|
||||||
|
|
||||||
|
PDDocumentNameDictionary documentNames = catalog.getNames();
|
||||||
|
if (documentNames == null) {
|
||||||
|
documentNames = new PDDocumentNameDictionary(catalog);
|
||||||
|
catalog.setNames(documentNames);
|
||||||
|
}
|
||||||
|
|
||||||
|
PDEmbeddedFilesNameTreeNode embeddedFilesTree = documentNames.getEmbeddedFiles();
|
||||||
|
if (embeddedFilesTree == null) {
|
||||||
|
embeddedFilesTree = new PDEmbeddedFilesNameTreeNode();
|
||||||
|
documentNames.setEmbeddedFiles(embeddedFilesTree);
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, PDComplexFileSpecification> existingNames = embeddedFilesTree.getNames();
|
||||||
|
if (existingNames == null) {
|
||||||
|
existingNames = new HashMap<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<Integer, String> indexToFilenameMap = new HashMap<>();
|
||||||
|
|
||||||
|
for (int i = 0; i < attachments.size(); i++) {
|
||||||
|
MultipartFile attachment = attachments.get(i);
|
||||||
|
String filename = attachment.getOriginalFilename();
|
||||||
|
if (filename == null || filename.trim().isEmpty()) {
|
||||||
|
filename = "attachment_" + i;
|
||||||
|
}
|
||||||
|
|
||||||
|
String normalizedFilename =
|
||||||
|
java.text.Normalizer.normalize(filename, java.text.Normalizer.Form.NFC);
|
||||||
|
String uniqueFilename =
|
||||||
|
ensureUniqueFilename(normalizedFilename, existingNames.keySet());
|
||||||
|
|
||||||
|
indexToFilenameMap.put(i, uniqueFilename);
|
||||||
|
|
||||||
|
PDEmbeddedFile embeddedFile = new PDEmbeddedFile(document, attachment.getInputStream());
|
||||||
|
embeddedFile.setSize((int) attachment.getSize());
|
||||||
|
|
||||||
|
GregorianCalendar currentTime = new GregorianCalendar();
|
||||||
|
embeddedFile.setCreationDate(currentTime);
|
||||||
|
embeddedFile.setModDate(currentTime);
|
||||||
|
|
||||||
|
String contentType = attachment.getContentType();
|
||||||
|
if (contentType != null && !contentType.trim().isEmpty()) {
|
||||||
|
embeddedFile.setSubtype(contentType);
|
||||||
|
}
|
||||||
|
|
||||||
|
PDComplexFileSpecification fileSpecification = new PDComplexFileSpecification();
|
||||||
|
fileSpecification.setFile(uniqueFilename);
|
||||||
|
fileSpecification.setFileUnicode(uniqueFilename);
|
||||||
|
fileSpecification.setEmbeddedFile(embeddedFile);
|
||||||
|
fileSpecification.setEmbeddedFileUnicode(embeddedFile);
|
||||||
|
|
||||||
|
existingNames.put(uniqueFilename, fileSpecification);
|
||||||
|
}
|
||||||
|
|
||||||
|
embeddedFilesTree.setNames(existingNames);
|
||||||
|
documentNames.setEmbeddedFiles(embeddedFilesTree);
|
||||||
|
catalog.setNames(documentNames);
|
||||||
|
|
||||||
|
return indexToFilenameMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void addAttachmentAnnotationsToDocumentWithMapping(
|
||||||
|
PDDocument document,
|
||||||
|
List<EmlParser.EmailAttachment> attachments,
|
||||||
|
Map<Integer, String> indexToFilenameMap)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
if (document.getNumberOfPages() == 0 || attachments == null || attachments.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
AttachmentMarkerPositionFinder finder = new AttachmentMarkerPositionFinder();
|
||||||
|
finder.setSortByPosition(false); // Keep document order to maintain pairing
|
||||||
|
finder.getText(document);
|
||||||
|
List<MarkerPosition> markerPositions = finder.getPositions();
|
||||||
|
|
||||||
|
int annotationsToAdd = Math.min(markerPositions.size(), attachments.size());
|
||||||
|
|
||||||
|
for (int i = 0; i < annotationsToAdd; i++) {
|
||||||
|
MarkerPosition position = markerPositions.get(i);
|
||||||
|
|
||||||
|
String filenameNearMarker = position.getFilename();
|
||||||
|
|
||||||
|
EmlParser.EmailAttachment matchingAttachment =
|
||||||
|
findAttachmentByFilename(attachments, filenameNearMarker);
|
||||||
|
|
||||||
|
if (matchingAttachment != null) {
|
||||||
|
String embeddedFilename =
|
||||||
|
findEmbeddedFilenameForAttachment(matchingAttachment, indexToFilenameMap);
|
||||||
|
|
||||||
|
if (embeddedFilename != null) {
|
||||||
|
PDPage page = document.getPage(position.getPageIndex());
|
||||||
|
addAttachmentAnnotationToPageWithMapping(
|
||||||
|
document,
|
||||||
|
page,
|
||||||
|
matchingAttachment,
|
||||||
|
embeddedFilename,
|
||||||
|
position.getX(),
|
||||||
|
position.getY(),
|
||||||
|
i);
|
||||||
|
} else {
|
||||||
|
// No embedded filename found for attachment
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No matching attachment found for filename near marker
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static EmlParser.EmailAttachment findAttachmentByFilename(
|
||||||
|
List<EmlParser.EmailAttachment> attachments, String targetFilename) {
|
||||||
|
if (targetFilename == null || targetFilename.trim().isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
String normalizedTarget = normalizeFilename(targetFilename);
|
||||||
|
|
||||||
|
// First try exact match
|
||||||
|
for (EmlParser.EmailAttachment attachment : attachments) {
|
||||||
|
if (attachment.getFilename() != null) {
|
||||||
|
String normalizedAttachment = normalizeFilename(attachment.getFilename());
|
||||||
|
if (normalizedAttachment.equals(normalizedTarget)) {
|
||||||
|
return attachment;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then try contains match
|
||||||
|
for (EmlParser.EmailAttachment attachment : attachments) {
|
||||||
|
if (attachment.getFilename() != null) {
|
||||||
|
String normalizedAttachment = normalizeFilename(attachment.getFilename());
|
||||||
|
if (normalizedAttachment.contains(normalizedTarget)
|
||||||
|
|| normalizedTarget.contains(normalizedAttachment)) {
|
||||||
|
return attachment;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String findEmbeddedFilenameForAttachment(
|
||||||
|
EmlParser.EmailAttachment attachment, Map<Integer, String> indexToFilenameMap) {
|
||||||
|
|
||||||
|
String attachmentFilename = attachment.getFilename();
|
||||||
|
if (attachmentFilename == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Map.Entry<Integer, String> entry : indexToFilenameMap.entrySet()) {
|
||||||
|
String embeddedFilename = entry.getValue();
|
||||||
|
if (embeddedFilename != null
|
||||||
|
&& (embeddedFilename.equals(attachmentFilename)
|
||||||
|
|| embeddedFilename.contains(attachmentFilename)
|
||||||
|
|| attachmentFilename.contains(embeddedFilename))) {
|
||||||
|
return embeddedFilename;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String normalizeFilename(String filename) {
|
||||||
|
if (filename == null) return "";
|
||||||
|
return filename.toLowerCase()
|
||||||
|
.trim()
|
||||||
|
.replaceAll("\\s+", " ")
|
||||||
|
.replaceAll("[^a-zA-Z0-9._-]", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void addAttachmentAnnotationToPageWithMapping(
|
||||||
|
PDDocument document,
|
||||||
|
PDPage page,
|
||||||
|
EmlParser.EmailAttachment attachment,
|
||||||
|
String embeddedFilename,
|
||||||
|
float x,
|
||||||
|
float y,
|
||||||
|
int attachmentIndex)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
PDAnnotationFileAttachment fileAnnotation = new PDAnnotationFileAttachment();
|
||||||
|
|
||||||
|
PDRectangle rect = calculateAnnotationRectangle(page, x, y);
|
||||||
|
fileAnnotation.setRectangle(rect);
|
||||||
|
|
||||||
|
fileAnnotation.setPrinted(false);
|
||||||
|
fileAnnotation.setHidden(false);
|
||||||
|
fileAnnotation.setNoView(false);
|
||||||
|
fileAnnotation.setNoZoom(true);
|
||||||
|
fileAnnotation.setNoRotate(true);
|
||||||
|
|
||||||
|
try {
|
||||||
|
PDAppearanceDictionary appearance = new PDAppearanceDictionary();
|
||||||
|
PDAppearanceStream normalAppearance = new PDAppearanceStream(document);
|
||||||
|
normalAppearance.setBBox(new PDRectangle(0, 0, rect.getWidth(), rect.getHeight()));
|
||||||
|
appearance.setNormalAppearance(normalAppearance);
|
||||||
|
fileAnnotation.setAppearance(appearance);
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
fileAnnotation.setAppearance(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
PDEmbeddedFilesNameTreeNode efTree =
|
||||||
|
document.getDocumentCatalog().getNames().getEmbeddedFiles();
|
||||||
|
if (efTree != null) {
|
||||||
|
Map<String, PDComplexFileSpecification> efMap = efTree.getNames();
|
||||||
|
if (efMap != null) {
|
||||||
|
PDComplexFileSpecification fileSpec = efMap.get(embeddedFilename);
|
||||||
|
if (fileSpec != null) {
|
||||||
|
fileAnnotation.setFile(fileSpec);
|
||||||
|
} else {
|
||||||
|
// Could not find embedded file
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fileAnnotation.setContents(
|
||||||
|
"Attachment " + (attachmentIndex + 1) + ": " + attachment.getFilename());
|
||||||
|
fileAnnotation.setAnnotationName(
|
||||||
|
"EmbeddedFile_" + attachmentIndex + "_" + embeddedFilename);
|
||||||
|
|
||||||
|
page.getAnnotations().add(fileAnnotation);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user