mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-08-11 13:48:37 +02:00
refactor logic in to multiple files
This commit is contained in:
parent
c5eee8cad9
commit
18cc10eab7
@ -0,0 +1,652 @@
|
||||
package stirling.software.common.util;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.Method;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
import stirling.software.common.model.api.converters.EmlToPdfRequest;
|
||||
|
||||
@UtilityClass
|
||||
public class EmlParser {
|
||||
|
||||
private static volatile Boolean jakartaMailAvailable = null;
|
||||
private static volatile Method mimeUtilityDecodeTextMethod = null;
|
||||
private static volatile boolean mimeUtilityChecked = false;
|
||||
|
||||
private static final Pattern MIME_ENCODED_PATTERN =
|
||||
Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
|
||||
|
||||
private static final String DISPOSITION_ATTACHMENT = "attachment";
|
||||
private static final String TEXT_PLAIN = "text/plain";
|
||||
private static final String TEXT_HTML = "text/html";
|
||||
private static final String MULTIPART_PREFIX = "multipart/";
|
||||
|
||||
private static final String HEADER_CONTENT_TYPE = "content-type:";
|
||||
private static final String HEADER_CONTENT_DISPOSITION = "content-disposition:";
|
||||
private static final String HEADER_CONTENT_TRANSFER_ENCODING = "content-transfer-encoding:";
|
||||
private static final String HEADER_CONTENT_ID = "Content-ID";
|
||||
private static final String HEADER_SUBJECT = "Subject:";
|
||||
private static final String HEADER_FROM = "From:";
|
||||
private static final String HEADER_TO = "To:";
|
||||
private static final String HEADER_CC = "Cc:";
|
||||
private static final String HEADER_BCC = "Bcc:";
|
||||
private static final String HEADER_DATE = "Date:";
|
||||
|
||||
private static synchronized boolean isJakartaMailAvailable() {
|
||||
if (jakartaMailAvailable == null) {
|
||||
try {
|
||||
Class.forName("jakarta.mail.internet.MimeMessage");
|
||||
Class.forName("jakarta.mail.Session");
|
||||
Class.forName("jakarta.mail.internet.MimeUtility");
|
||||
Class.forName("jakarta.mail.internet.MimePart");
|
||||
Class.forName("jakarta.mail.internet.MimeMultipart");
|
||||
Class.forName("jakarta.mail.Multipart");
|
||||
Class.forName("jakarta.mail.Part");
|
||||
jakartaMailAvailable = true;
|
||||
} catch (ClassNotFoundException e) {
|
||||
jakartaMailAvailable = false;
|
||||
}
|
||||
}
|
||||
return jakartaMailAvailable;
|
||||
}
|
||||
|
||||
public static EmailContent extractEmailContent(
|
||||
byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer)
|
||||
throws IOException {
|
||||
EmlProcessingUtils.validateEmlInput(emlBytes);
|
||||
|
||||
if (isJakartaMailAvailable()) {
|
||||
return extractEmailContentAdvanced(emlBytes, request, customHtmlSanitizer);
|
||||
} else {
|
||||
return extractEmailContentBasic(emlBytes, request, customHtmlSanitizer);
|
||||
}
|
||||
}
|
||||
|
||||
private static EmailContent extractEmailContentBasic(
|
||||
byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) {
|
||||
String emlContent = new String(emlBytes, StandardCharsets.UTF_8);
|
||||
EmailContent content = new EmailContent();
|
||||
|
||||
content.setSubject(extractBasicHeader(emlContent, HEADER_SUBJECT));
|
||||
content.setFrom(extractBasicHeader(emlContent, HEADER_FROM));
|
||||
content.setTo(extractBasicHeader(emlContent, HEADER_TO));
|
||||
content.setCc(extractBasicHeader(emlContent, HEADER_CC));
|
||||
content.setBcc(extractBasicHeader(emlContent, HEADER_BCC));
|
||||
|
||||
String dateStr = extractBasicHeader(emlContent, HEADER_DATE);
|
||||
if (!dateStr.isEmpty()) {
|
||||
content.setDateString(dateStr);
|
||||
}
|
||||
|
||||
String htmlBody = extractHtmlBody(emlContent);
|
||||
if (htmlBody != null) {
|
||||
content.setHtmlBody(htmlBody);
|
||||
} else {
|
||||
String textBody = extractTextBody(emlContent);
|
||||
content.setTextBody(textBody != null ? textBody : "Email content could not be parsed");
|
||||
}
|
||||
|
||||
content.getAttachments().addAll(extractAttachmentsBasic(emlContent));
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
private static EmailContent extractEmailContentAdvanced(
|
||||
byte[] emlBytes, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) {
|
||||
try {
|
||||
Class<?> sessionClass = Class.forName("jakarta.mail.Session");
|
||||
Class<?> mimeMessageClass = Class.forName("jakarta.mail.internet.MimeMessage");
|
||||
|
||||
Method getDefaultInstance =
|
||||
sessionClass.getMethod("getDefaultInstance", Properties.class);
|
||||
Object session = getDefaultInstance.invoke(null, new Properties());
|
||||
|
||||
Class<?>[] constructorArgs = new Class<?>[] {sessionClass, InputStream.class};
|
||||
Constructor<?> mimeMessageConstructor =
|
||||
mimeMessageClass.getConstructor(constructorArgs);
|
||||
Object message =
|
||||
mimeMessageConstructor.newInstance(session, new ByteArrayInputStream(emlBytes));
|
||||
|
||||
return extractFromMimeMessage(message, request, customHtmlSanitizer);
|
||||
|
||||
} catch (ReflectiveOperationException e) {
|
||||
return extractEmailContentBasic(emlBytes, request, customHtmlSanitizer);
|
||||
}
|
||||
}
|
||||
|
||||
private static EmailContent extractFromMimeMessage(
|
||||
Object message, EmlToPdfRequest request, CustomHtmlSanitizer customHtmlSanitizer) {
|
||||
EmailContent content = new EmailContent();
|
||||
|
||||
try {
|
||||
Class<?> messageClass = message.getClass();
|
||||
|
||||
Method getSubject = messageClass.getMethod("getSubject");
|
||||
String subject = (String) getSubject.invoke(message);
|
||||
content.setSubject(subject != null ? safeMimeDecode(subject) : "No Subject");
|
||||
|
||||
Method getFrom = messageClass.getMethod("getFrom");
|
||||
Object[] fromAddresses = (Object[]) getFrom.invoke(message);
|
||||
content.setFrom(buildAddressString(fromAddresses));
|
||||
|
||||
extractRecipients(message, messageClass, content);
|
||||
|
||||
Method getSentDate = messageClass.getMethod("getSentDate");
|
||||
content.setDate((Date) getSentDate.invoke(message));
|
||||
|
||||
Method getContent = messageClass.getMethod("getContent");
|
||||
Object messageContent = getContent.invoke(message);
|
||||
|
||||
processMessageContent(message, messageContent, content, request, customHtmlSanitizer);
|
||||
|
||||
} catch (ReflectiveOperationException | RuntimeException e) {
|
||||
content.setSubject("Email Conversion");
|
||||
content.setFrom("Unknown");
|
||||
content.setTo("Unknown");
|
||||
content.setCc("");
|
||||
content.setBcc("");
|
||||
content.setTextBody("Email content could not be parsed with advanced processing");
|
||||
}
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
private static void extractRecipients(
|
||||
Object message, Class<?> messageClass, EmailContent content) {
|
||||
try {
|
||||
Method getRecipients =
|
||||
messageClass.getMethod(
|
||||
"getRecipients", Class.forName("jakarta.mail.Message$RecipientType"));
|
||||
Class<?> recipientTypeClass = Class.forName("jakarta.mail.Message$RecipientType");
|
||||
|
||||
Object toType = recipientTypeClass.getField("TO").get(null);
|
||||
Object[] toRecipients = (Object[]) getRecipients.invoke(message, toType);
|
||||
content.setTo(buildAddressString(toRecipients));
|
||||
|
||||
Object ccType = recipientTypeClass.getField("CC").get(null);
|
||||
Object[] ccRecipients = (Object[]) getRecipients.invoke(message, ccType);
|
||||
content.setCc(buildAddressString(ccRecipients));
|
||||
|
||||
Object bccType = recipientTypeClass.getField("BCC").get(null);
|
||||
Object[] bccRecipients = (Object[]) getRecipients.invoke(message, bccType);
|
||||
content.setBcc(buildAddressString(bccRecipients));
|
||||
|
||||
} catch (ReflectiveOperationException e) {
|
||||
try {
|
||||
Method getAllRecipients = messageClass.getMethod("getAllRecipients");
|
||||
Object[] recipients = (Object[]) getAllRecipients.invoke(message);
|
||||
content.setTo(buildAddressString(recipients));
|
||||
content.setCc("");
|
||||
content.setBcc("");
|
||||
} catch (ReflectiveOperationException ex) {
|
||||
content.setTo("");
|
||||
content.setCc("");
|
||||
content.setBcc("");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static String buildAddressString(Object[] addresses) {
|
||||
if (addresses == null || addresses.length == 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (int i = 0; i < addresses.length; i++) {
|
||||
if (i > 0) builder.append(", ");
|
||||
builder.append(safeMimeDecode(addresses[i].toString()));
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
private static void processMessageContent(
|
||||
Object message,
|
||||
Object messageContent,
|
||||
EmailContent content,
|
||||
EmlToPdfRequest request,
|
||||
CustomHtmlSanitizer customHtmlSanitizer) {
|
||||
try {
|
||||
if (messageContent instanceof String stringContent) {
|
||||
Method getContentType = message.getClass().getMethod("getContentType");
|
||||
String contentType = (String) getContentType.invoke(message);
|
||||
|
||||
if (contentType != null && contentType.toLowerCase().contains(TEXT_HTML)) {
|
||||
content.setHtmlBody(stringContent);
|
||||
} else {
|
||||
content.setTextBody(stringContent);
|
||||
}
|
||||
} else {
|
||||
Class<?> multipartClass = Class.forName("jakarta.mail.Multipart");
|
||||
if (multipartClass.isInstance(messageContent)) {
|
||||
processMultipart(messageContent, content, request, customHtmlSanitizer, 0);
|
||||
}
|
||||
}
|
||||
} catch (ReflectiveOperationException | ClassCastException e) {
|
||||
content.setTextBody("Email content could not be parsed with advanced processing");
|
||||
}
|
||||
}
|
||||
|
||||
private static void processMultipart(
|
||||
Object multipart,
|
||||
EmailContent content,
|
||||
EmlToPdfRequest request,
|
||||
CustomHtmlSanitizer customHtmlSanitizer,
|
||||
int depth) {
|
||||
|
||||
final int MAX_MULTIPART_DEPTH = 10;
|
||||
if (depth > MAX_MULTIPART_DEPTH) {
|
||||
content.setHtmlBody("<div class=\"error\">Maximum multipart depth exceeded</div>");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
Class<?> multipartClass = multipart.getClass();
|
||||
Method getCount = multipartClass.getMethod("getCount");
|
||||
int count = (Integer) getCount.invoke(multipart);
|
||||
|
||||
Method getBodyPart = multipartClass.getMethod("getBodyPart", int.class);
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
Object part = getBodyPart.invoke(multipart, i);
|
||||
processPart(part, content, request, customHtmlSanitizer, depth + 1);
|
||||
}
|
||||
|
||||
} catch (ReflectiveOperationException | ClassCastException e) {
|
||||
content.setHtmlBody("<div class=\"error\">Error processing multipart content</div>");
|
||||
}
|
||||
}
|
||||
|
||||
private static void processPart(
|
||||
Object part,
|
||||
EmailContent content,
|
||||
EmlToPdfRequest request,
|
||||
CustomHtmlSanitizer customHtmlSanitizer,
|
||||
int depth) {
|
||||
try {
|
||||
Class<?> partClass = part.getClass();
|
||||
|
||||
Method isMimeType = partClass.getMethod("isMimeType", String.class);
|
||||
Method getContent = partClass.getMethod("getContent");
|
||||
Method getDisposition = partClass.getMethod("getDisposition");
|
||||
Method getFileName = partClass.getMethod("getFileName");
|
||||
Method getContentType = partClass.getMethod("getContentType");
|
||||
Method getHeader = partClass.getMethod("getHeader", String.class);
|
||||
|
||||
Object disposition = getDisposition.invoke(part);
|
||||
String filename = (String) getFileName.invoke(part);
|
||||
String contentType = (String) getContentType.invoke(part);
|
||||
|
||||
String normalizedDisposition =
|
||||
disposition != null ? ((String) disposition).toLowerCase() : null;
|
||||
|
||||
if ((Boolean) isMimeType.invoke(part, TEXT_PLAIN) && normalizedDisposition == null) {
|
||||
Object partContent = getContent.invoke(part);
|
||||
if (partContent instanceof String stringContent) {
|
||||
content.setTextBody(stringContent);
|
||||
}
|
||||
} else if ((Boolean) isMimeType.invoke(part, TEXT_HTML)
|
||||
&& normalizedDisposition == null) {
|
||||
Object partContent = getContent.invoke(part);
|
||||
if (partContent instanceof String stringContent) {
|
||||
String htmlBody =
|
||||
customHtmlSanitizer != null
|
||||
? customHtmlSanitizer.sanitize(stringContent)
|
||||
: stringContent;
|
||||
content.setHtmlBody(htmlBody);
|
||||
}
|
||||
} else if ((normalizedDisposition != null
|
||||
&& normalizedDisposition.contains(DISPOSITION_ATTACHMENT))
|
||||
|| (filename != null && !filename.trim().isEmpty())) {
|
||||
|
||||
processAttachment(
|
||||
part, content, request, getHeader, getContent, filename, contentType);
|
||||
} else if ((Boolean) isMimeType.invoke(part, "multipart/*")) {
|
||||
Object multipartContent = getContent.invoke(part);
|
||||
if (multipartContent != null) {
|
||||
Class<?> multipartClass = Class.forName("jakarta.mail.Multipart");
|
||||
if (multipartClass.isInstance(multipartContent)) {
|
||||
processMultipart(
|
||||
multipartContent, content, request, customHtmlSanitizer, depth + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (ReflectiveOperationException | RuntimeException e) {
|
||||
// Continue processing other parts if one fails
|
||||
}
|
||||
}
|
||||
|
||||
private static void processAttachment(
|
||||
Object part,
|
||||
EmailContent content,
|
||||
EmlToPdfRequest request,
|
||||
Method getHeader,
|
||||
Method getContent,
|
||||
String filename,
|
||||
String contentType) {
|
||||
|
||||
content.setAttachmentCount(content.getAttachmentCount() + 1);
|
||||
|
||||
if (filename != null && !filename.trim().isEmpty()) {
|
||||
EmailAttachment attachment = new EmailAttachment();
|
||||
attachment.setFilename(safeMimeDecode(filename));
|
||||
attachment.setContentType(contentType);
|
||||
|
||||
try {
|
||||
String[] contentIdHeaders = (String[]) getHeader.invoke(part, HEADER_CONTENT_ID);
|
||||
if (contentIdHeaders != null) {
|
||||
for (String contentIdHeader : contentIdHeaders) {
|
||||
if (contentIdHeader != null && !contentIdHeader.trim().isEmpty()) {
|
||||
attachment.setEmbedded(true);
|
||||
String contentId = contentIdHeader.trim().replaceAll("[<>]", "");
|
||||
attachment.setContentId(contentId);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (ReflectiveOperationException e) {
|
||||
}
|
||||
|
||||
if ((request != null && request.isIncludeAttachments()) || attachment.isEmbedded()) {
|
||||
extractAttachmentData(part, attachment, getContent, request);
|
||||
}
|
||||
|
||||
content.getAttachments().add(attachment);
|
||||
}
|
||||
}
|
||||
|
||||
private static void extractAttachmentData(
|
||||
Object part, EmailAttachment attachment, Method getContent, EmlToPdfRequest request) {
|
||||
try {
|
||||
Object attachmentContent = getContent.invoke(part);
|
||||
byte[] attachmentData = null;
|
||||
|
||||
if (attachmentContent instanceof InputStream inputStream) {
|
||||
try (InputStream stream = inputStream) {
|
||||
attachmentData = stream.readAllBytes();
|
||||
} catch (IOException e) {
|
||||
if (attachment.isEmbedded()) {
|
||||
attachmentData = new byte[0];
|
||||
} else {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
} else if (attachmentContent instanceof byte[] byteArray) {
|
||||
attachmentData = byteArray;
|
||||
} else if (attachmentContent instanceof String stringContent) {
|
||||
attachmentData = stringContent.getBytes(StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
if (attachmentData != null) {
|
||||
long maxSizeMB = request != null ? request.getMaxAttachmentSizeMB() : 10L;
|
||||
long maxSizeBytes = maxSizeMB * 1024 * 1024;
|
||||
|
||||
if (attachmentData.length <= maxSizeBytes || attachment.isEmbedded()) {
|
||||
attachment.setData(attachmentData);
|
||||
attachment.setSizeBytes(attachmentData.length);
|
||||
} else {
|
||||
attachment.setSizeBytes(attachmentData.length);
|
||||
}
|
||||
}
|
||||
} catch (ReflectiveOperationException | RuntimeException e) {
|
||||
// Continue without attachment data
|
||||
}
|
||||
}
|
||||
|
||||
private static String extractBasicHeader(String emlContent, String headerName) {
|
||||
try {
|
||||
String[] lines = emlContent.split("\r?\n");
|
||||
for (int i = 0; i < lines.length; i++) {
|
||||
String line = lines[i];
|
||||
if (line.toLowerCase().startsWith(headerName.toLowerCase())) {
|
||||
StringBuilder value =
|
||||
new StringBuilder(line.substring(headerName.length()).trim());
|
||||
for (int j = i + 1; j < lines.length; j++) {
|
||||
if (lines[j].startsWith(" ") || lines[j].startsWith("\t")) {
|
||||
value.append(" ").append(lines[j].trim());
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return safeMimeDecode(value.toString());
|
||||
}
|
||||
if (line.trim().isEmpty()) break;
|
||||
}
|
||||
} catch (RuntimeException e) {
|
||||
// Ignore errors in header extraction
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
private static String extractHtmlBody(String emlContent) {
|
||||
try {
|
||||
String lowerContent = emlContent.toLowerCase();
|
||||
int htmlStart = lowerContent.indexOf(HEADER_CONTENT_TYPE + " " + TEXT_HTML);
|
||||
if (htmlStart == -1) return null;
|
||||
|
||||
int bodyStart = emlContent.indexOf("\r\n\r\n", htmlStart);
|
||||
if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n", htmlStart);
|
||||
if (bodyStart == -1) return null;
|
||||
|
||||
bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2;
|
||||
int bodyEnd = findPartEnd(emlContent, bodyStart);
|
||||
|
||||
return emlContent.substring(bodyStart, bodyEnd).trim();
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static String extractTextBody(String emlContent) {
|
||||
try {
|
||||
String lowerContent = emlContent.toLowerCase();
|
||||
int textStart = lowerContent.indexOf(HEADER_CONTENT_TYPE + " " + TEXT_PLAIN);
|
||||
if (textStart == -1) {
|
||||
int bodyStart = emlContent.indexOf("\r\n\r\n");
|
||||
if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n");
|
||||
if (bodyStart != -1) {
|
||||
bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2;
|
||||
int bodyEnd = findPartEnd(emlContent, bodyStart);
|
||||
return emlContent.substring(bodyStart, bodyEnd).trim();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
int bodyStart = emlContent.indexOf("\r\n\r\n", textStart);
|
||||
if (bodyStart == -1) bodyStart = emlContent.indexOf("\n\n", textStart);
|
||||
if (bodyStart == -1) return null;
|
||||
|
||||
bodyStart += (emlContent.charAt(bodyStart + 1) == '\r') ? 4 : 2;
|
||||
int bodyEnd = findPartEnd(emlContent, bodyStart);
|
||||
|
||||
return emlContent.substring(bodyStart, bodyEnd).trim();
|
||||
} catch (RuntimeException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static int findPartEnd(String content, int start) {
|
||||
String[] lines = content.substring(start).split("\r?\n");
|
||||
StringBuilder result = new StringBuilder();
|
||||
|
||||
for (String line : lines) {
|
||||
if (line.startsWith("--") && line.length() > 10) break;
|
||||
result.append(line).append("\n");
|
||||
}
|
||||
|
||||
return start + result.length();
|
||||
}
|
||||
|
||||
private static List<EmailAttachment> extractAttachmentsBasic(String emlContent) {
|
||||
List<EmailAttachment> attachments = new ArrayList<>();
|
||||
try {
|
||||
String[] lines = emlContent.split("\r?\n");
|
||||
boolean inHeaders = true;
|
||||
String currentContentType = "";
|
||||
String currentDisposition = "";
|
||||
String currentFilename = "";
|
||||
String currentEncoding = "";
|
||||
|
||||
for (String line : lines) {
|
||||
String lowerLine = line.toLowerCase().trim();
|
||||
|
||||
if (line.trim().isEmpty()) {
|
||||
inHeaders = false;
|
||||
if (isAttachment(currentDisposition, currentFilename, currentContentType)) {
|
||||
EmailAttachment attachment = new EmailAttachment();
|
||||
attachment.setFilename(currentFilename);
|
||||
attachment.setContentType(currentContentType);
|
||||
attachment.setTransferEncoding(currentEncoding);
|
||||
attachments.add(attachment);
|
||||
}
|
||||
currentContentType = "";
|
||||
currentDisposition = "";
|
||||
currentFilename = "";
|
||||
currentEncoding = "";
|
||||
inHeaders = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inHeaders) continue;
|
||||
|
||||
if (lowerLine.startsWith(HEADER_CONTENT_TYPE)) {
|
||||
currentContentType = line.substring(HEADER_CONTENT_TYPE.length()).trim();
|
||||
} else if (lowerLine.startsWith(HEADER_CONTENT_DISPOSITION)) {
|
||||
currentDisposition = line.substring(HEADER_CONTENT_DISPOSITION.length()).trim();
|
||||
currentFilename = extractFilenameFromDisposition(currentDisposition);
|
||||
} else if (lowerLine.startsWith(HEADER_CONTENT_TRANSFER_ENCODING)) {
|
||||
currentEncoding =
|
||||
line.substring(HEADER_CONTENT_TRANSFER_ENCODING.length()).trim();
|
||||
}
|
||||
}
|
||||
} catch (RuntimeException e) {
|
||||
// Continue with empty list
|
||||
}
|
||||
return attachments;
|
||||
}
|
||||
|
||||
private static boolean isAttachment(String disposition, String filename, String contentType) {
|
||||
return (disposition.toLowerCase().contains(DISPOSITION_ATTACHMENT) && !filename.isEmpty())
|
||||
|| (!filename.isEmpty() && !contentType.toLowerCase().startsWith("text/"))
|
||||
|| (contentType.toLowerCase().contains("application/") && !filename.isEmpty());
|
||||
}
|
||||
|
||||
private static String extractFilenameFromDisposition(String disposition) {
|
||||
if (disposition == null || !disposition.contains("filename=")) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Handle filename*= (RFC 2231 encoded filename)
|
||||
if (disposition.toLowerCase().contains("filename*=")) {
|
||||
int filenameStarStart = disposition.toLowerCase().indexOf("filename*=") + 10;
|
||||
int filenameStarEnd = disposition.indexOf(";", filenameStarStart);
|
||||
if (filenameStarEnd == -1) filenameStarEnd = disposition.length();
|
||||
String extendedFilename =
|
||||
disposition.substring(filenameStarStart, filenameStarEnd).trim();
|
||||
extendedFilename = extendedFilename.replaceAll("^\"|\"$", "");
|
||||
|
||||
if (extendedFilename.contains("'")) {
|
||||
String[] parts = extendedFilename.split("'", 3);
|
||||
if (parts.length == 3) {
|
||||
return EmlProcessingUtils.decodeUrlEncoded(parts[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle regular filename=
|
||||
int filenameStart = disposition.toLowerCase().indexOf("filename=") + 9;
|
||||
int filenameEnd = disposition.indexOf(";", filenameStart);
|
||||
if (filenameEnd == -1) filenameEnd = disposition.length();
|
||||
String filename = disposition.substring(filenameStart, filenameEnd).trim();
|
||||
filename = filename.replaceAll("^\"|\"$", "");
|
||||
return safeMimeDecode(filename);
|
||||
}
|
||||
|
||||
public static String safeMimeDecode(String headerValue) {
|
||||
if (headerValue == null || headerValue.trim().isEmpty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
if (!mimeUtilityChecked) {
|
||||
synchronized (EmlParser.class) {
|
||||
if (!mimeUtilityChecked) {
|
||||
initializeMimeUtilityDecoding();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mimeUtilityDecodeTextMethod != null) {
|
||||
try {
|
||||
return (String) mimeUtilityDecodeTextMethod.invoke(null, headerValue.trim());
|
||||
} catch (ReflectiveOperationException | RuntimeException e) {
|
||||
// Fall through to custom implementation
|
||||
}
|
||||
}
|
||||
|
||||
return EmlProcessingUtils.decodeMimeHeader(headerValue.trim());
|
||||
}
|
||||
|
||||
private static void initializeMimeUtilityDecoding() {
|
||||
try {
|
||||
Class<?> mimeUtilityClass = Class.forName("jakarta.mail.internet.MimeUtility");
|
||||
mimeUtilityDecodeTextMethod = mimeUtilityClass.getMethod("decodeText", String.class);
|
||||
} catch (ClassNotFoundException | NoSuchMethodException e) {
|
||||
mimeUtilityDecodeTextMethod = null;
|
||||
}
|
||||
mimeUtilityChecked = true;
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class EmailContent {
|
||||
private String subject;
|
||||
private String from;
|
||||
private String to;
|
||||
private String cc;
|
||||
private String bcc;
|
||||
private Date date;
|
||||
private String dateString; // For basic parsing fallback
|
||||
private String htmlBody;
|
||||
private String textBody;
|
||||
private int attachmentCount;
|
||||
private List<EmailAttachment> attachments = new ArrayList<>();
|
||||
|
||||
public void setHtmlBody(String htmlBody) {
|
||||
this.htmlBody = htmlBody != null ? htmlBody.replaceAll("\r", "") : null;
|
||||
}
|
||||
|
||||
public void setTextBody(String textBody) {
|
||||
this.textBody = textBody != null ? textBody.replaceAll("\r", "") : null;
|
||||
}
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class EmailAttachment {
|
||||
private String filename;
|
||||
private String contentType;
|
||||
private byte[] data;
|
||||
private boolean embedded;
|
||||
private String embeddedFilename;
|
||||
private long sizeBytes;
|
||||
private String contentId;
|
||||
private String disposition;
|
||||
private String transferEncoding;
|
||||
|
||||
public void setData(byte[] data) {
|
||||
this.data = data;
|
||||
if (data != null) {
|
||||
this.sizeBytes = data.length;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,601 @@
|
||||
package stirling.software.common.util;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Base64;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
import stirling.software.common.model.api.converters.EmlToPdfRequest;
|
||||
import stirling.software.common.model.api.converters.HTMLToPdfRequest;
|
||||
|
||||
@UtilityClass
|
||||
public class EmlProcessingUtils {
|
||||
|
||||
// Style constants
|
||||
private static final int DEFAULT_FONT_SIZE = 12;
|
||||
private static final String DEFAULT_FONT_FAMILY = "Helvetica, sans-serif";
|
||||
private static final float DEFAULT_LINE_HEIGHT = 1.4f;
|
||||
private static final String DEFAULT_ZOOM = "1.0";
|
||||
private static final String DEFAULT_TEXT_COLOR = "#202124";
|
||||
private static final String DEFAULT_BACKGROUND_COLOR = "#ffffff";
|
||||
private static final String DEFAULT_BORDER_COLOR = "#e8eaed";
|
||||
private static final String ATTACHMENT_BACKGROUND_COLOR = "#f9f9f9";
|
||||
private static final String ATTACHMENT_BORDER_COLOR = "#eeeeee";
|
||||
|
||||
private static final int EML_CHECK_LENGTH = 8192;
|
||||
private static final int MIN_HEADER_COUNT_FOR_VALID_EML = 2;
|
||||
|
||||
// MIME type detection
|
||||
private static final Map<String, String> EXTENSION_TO_MIME_TYPE =
|
||||
Map.of(
|
||||
".png", "image/png",
|
||||
".jpg", "image/jpeg",
|
||||
".jpeg", "image/jpeg",
|
||||
".gif", "image/gif",
|
||||
".bmp", "image/bmp",
|
||||
".webp", "image/webp",
|
||||
".svg", "image/svg+xml",
|
||||
".ico", "image/x-icon",
|
||||
".tiff", "image/tiff",
|
||||
".tif", "image/tiff");
|
||||
|
||||
public static void validateEmlInput(byte[] emlBytes) {
|
||||
if (emlBytes == null || emlBytes.length == 0) {
|
||||
throw new IllegalArgumentException("EML file is empty or null");
|
||||
}
|
||||
|
||||
if (isInvalidEmlFormat(emlBytes)) {
|
||||
throw new IllegalArgumentException("Invalid EML file format");
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isInvalidEmlFormat(byte[] emlBytes) {
|
||||
try {
|
||||
int checkLength = Math.min(emlBytes.length, EML_CHECK_LENGTH);
|
||||
String content;
|
||||
|
||||
try {
|
||||
content = new String(emlBytes, 0, checkLength, StandardCharsets.UTF_8);
|
||||
if (content.contains("\uFFFD")) {
|
||||
content = new String(emlBytes, 0, checkLength, StandardCharsets.ISO_8859_1);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
content = new String(emlBytes, 0, checkLength, StandardCharsets.ISO_8859_1);
|
||||
}
|
||||
|
||||
String lowerContent = content.toLowerCase(Locale.ROOT);
|
||||
|
||||
boolean hasFrom =
|
||||
lowerContent.contains("from:") || lowerContent.contains("return-path:");
|
||||
boolean hasSubject = lowerContent.contains("subject:");
|
||||
boolean hasMessageId = lowerContent.contains("message-id:");
|
||||
boolean hasDate = lowerContent.contains("date:");
|
||||
boolean hasTo =
|
||||
lowerContent.contains("to:")
|
||||
|| lowerContent.contains("cc:")
|
||||
|| lowerContent.contains("bcc:");
|
||||
boolean hasMimeStructure =
|
||||
lowerContent.contains("multipart/")
|
||||
|| lowerContent.contains("text/plain")
|
||||
|| lowerContent.contains("text/html")
|
||||
|| lowerContent.contains("boundary=");
|
||||
|
||||
int headerCount = 0;
|
||||
if (hasFrom) headerCount++;
|
||||
if (hasSubject) headerCount++;
|
||||
if (hasMessageId) headerCount++;
|
||||
if (hasDate) headerCount++;
|
||||
if (hasTo) headerCount++;
|
||||
|
||||
return headerCount < MIN_HEADER_COUNT_FOR_VALID_EML && !hasMimeStructure;
|
||||
|
||||
} catch (RuntimeException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static String generateEnhancedEmailHtml(
|
||||
EmlParser.EmailContent content,
|
||||
EmlToPdfRequest request,
|
||||
CustomHtmlSanitizer customHtmlSanitizer) {
|
||||
StringBuilder html = new StringBuilder();
|
||||
|
||||
html.append(
|
||||
String.format(
|
||||
"""
|
||||
<!DOCTYPE html>
|
||||
<html lang="en"><head><meta charset="UTF-8">
|
||||
<title>%s</title>
|
||||
<style>
|
||||
""",
|
||||
sanitizeText(content.getSubject(), customHtmlSanitizer)));
|
||||
|
||||
appendEnhancedStyles(html);
|
||||
|
||||
html.append(
|
||||
"""
|
||||
</style>
|
||||
</head><body>
|
||||
""");
|
||||
|
||||
html.append(
|
||||
String.format(
|
||||
"""
|
||||
<div class="email-container">
|
||||
<div class="email-header">
|
||||
<h1>%s</h1>
|
||||
<div class="email-meta">
|
||||
<div><strong>From:</strong> %s</div>
|
||||
<div><strong>To:</strong> %s</div>
|
||||
""",
|
||||
sanitizeText(content.getSubject(), customHtmlSanitizer),
|
||||
sanitizeText(content.getFrom(), customHtmlSanitizer),
|
||||
sanitizeText(content.getTo(), customHtmlSanitizer)));
|
||||
|
||||
if (content.getCc() != null && !content.getCc().trim().isEmpty()) {
|
||||
html.append(
|
||||
String.format(
|
||||
"<div><strong>CC:</strong> %s</div>\n",
|
||||
sanitizeText(content.getCc(), customHtmlSanitizer)));
|
||||
}
|
||||
|
||||
if (content.getBcc() != null && !content.getBcc().trim().isEmpty()) {
|
||||
html.append(
|
||||
String.format(
|
||||
"<div><strong>BCC:</strong> %s</div>\n",
|
||||
sanitizeText(content.getBcc(), customHtmlSanitizer)));
|
||||
}
|
||||
|
||||
if (content.getDate() != null) {
|
||||
html.append(
|
||||
String.format(
|
||||
"<div><strong>Date:</strong> %s</div>\n",
|
||||
PdfAttachmentHandler.formatEmailDate(content.getDate())));
|
||||
} else if (content.getDateString() != null && !content.getDateString().trim().isEmpty()) {
|
||||
html.append(
|
||||
String.format(
|
||||
"<div><strong>Date:</strong> %s</div>\n",
|
||||
sanitizeText(content.getDateString(), customHtmlSanitizer)));
|
||||
}
|
||||
|
||||
html.append("</div></div>\n");
|
||||
|
||||
html.append("<div class=\"email-body\">\n");
|
||||
if (content.getHtmlBody() != null && !content.getHtmlBody().trim().isEmpty()) {
|
||||
String processedHtml =
|
||||
processEmailHtmlBody(content.getHtmlBody(), content, customHtmlSanitizer);
|
||||
html.append(processedHtml);
|
||||
} else if (content.getTextBody() != null && !content.getTextBody().trim().isEmpty()) {
|
||||
html.append(
|
||||
String.format(
|
||||
"<div class=\"text-body\">%s</div>",
|
||||
convertTextToHtml(content.getTextBody(), customHtmlSanitizer)));
|
||||
} else {
|
||||
html.append("<div class=\"no-content\"><p><em>No content available</em></p></div>");
|
||||
}
|
||||
html.append("</div>\n");
|
||||
|
||||
if (content.getAttachmentCount() > 0 || !content.getAttachments().isEmpty()) {
|
||||
appendAttachmentsSection(html, content, request, customHtmlSanitizer);
|
||||
}
|
||||
|
||||
html.append("</div>\n</body></html>");
|
||||
return html.toString();
|
||||
}
|
||||
|
||||
public static String processEmailHtmlBody(
|
||||
String htmlBody,
|
||||
EmlParser.EmailContent emailContent,
|
||||
CustomHtmlSanitizer customHtmlSanitizer) {
|
||||
if (htmlBody == null) return "";
|
||||
|
||||
String processed =
|
||||
customHtmlSanitizer != null ? customHtmlSanitizer.sanitize(htmlBody) : htmlBody;
|
||||
|
||||
processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*fixed[^;]*;?", "");
|
||||
processed = processed.replaceAll("(?i)\\s*position\\s*:\\s*absolute[^;]*;?", "");
|
||||
|
||||
if (emailContent != null && !emailContent.getAttachments().isEmpty()) {
|
||||
processed = PdfAttachmentHandler.processInlineImages(processed, emailContent);
|
||||
}
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
public static String convertTextToHtml(
|
||||
String textBody, CustomHtmlSanitizer customHtmlSanitizer) {
|
||||
if (textBody == null) return "";
|
||||
|
||||
String html =
|
||||
customHtmlSanitizer != null
|
||||
? customHtmlSanitizer.sanitize(textBody)
|
||||
: escapeHtml(textBody);
|
||||
|
||||
html = html.replace("\r\n", "\n").replace("\r", "\n");
|
||||
html = html.replace("\n", "<br>\n");
|
||||
|
||||
html =
|
||||
html.replaceAll(
|
||||
"(https?://[\\w\\-._~:/?#\\[\\]@!$&'()*+,;=%]+)",
|
||||
"<a href=\"$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
|
||||
|
||||
html =
|
||||
html.replaceAll(
|
||||
"([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63})",
|
||||
"<a href=\"mailto:$1\" style=\"color: #1a73e8; text-decoration: underline;\">$1</a>");
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
private static void appendEnhancedStyles(StringBuilder html) {
|
||||
String css =
|
||||
String.format(
|
||||
"""
|
||||
body {
|
||||
font-family: %s;
|
||||
font-size: %dpx;
|
||||
line-height: %s;
|
||||
color: %s;
|
||||
margin: 0;
|
||||
padding: 16px;
|
||||
background-color: %s;
|
||||
}
|
||||
|
||||
.email-container {
|
||||
width: 100%%;
|
||||
max-width: 100%%;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.email-header {
|
||||
padding-bottom: 10px;
|
||||
border-bottom: 1px solid %s;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
.email-header h1 {
|
||||
margin: 0 0 10px 0;
|
||||
font-size: %dpx;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.email-meta div {
|
||||
margin-bottom: 2px;
|
||||
font-size: %dpx;
|
||||
}
|
||||
|
||||
.email-body {
|
||||
word-wrap: break-word;
|
||||
}
|
||||
|
||||
.attachment-section {
|
||||
margin-top: 15px;
|
||||
padding: 10px;
|
||||
background-color: %s;
|
||||
border: 1px solid %s;
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
.attachment-section h3 {
|
||||
margin: 0 0 8px 0;
|
||||
font-size: %dpx;
|
||||
}
|
||||
|
||||
.attachment-item {
|
||||
padding: 5px 0;
|
||||
}
|
||||
|
||||
.attachment-icon {
|
||||
margin-right: 5px;
|
||||
}
|
||||
|
||||
.attachment-details, .attachment-type {
|
||||
font-size: %dpx;
|
||||
color: #555555;
|
||||
}
|
||||
|
||||
.attachment-inclusion-note, .attachment-info-note {
|
||||
margin-top: 8px;
|
||||
padding: 6px;
|
||||
font-size: %dpx;
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
.attachment-inclusion-note {
|
||||
background-color: #e6ffed;
|
||||
border: 1px solid #d4f7dc;
|
||||
color: #006420;
|
||||
}
|
||||
|
||||
.attachment-info-note {
|
||||
background-color: #fff9e6;
|
||||
border: 1px solid #fff0c2;
|
||||
color: #664d00;
|
||||
}
|
||||
|
||||
.attachment-link-container {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
padding: 8px;
|
||||
background-color: #f8f9fa;
|
||||
border: 1px solid #dee2e6;
|
||||
border-radius: 4px;
|
||||
margin: 4px 0;
|
||||
}
|
||||
|
||||
.attachment-link-container:hover {
|
||||
background-color: #e9ecef;
|
||||
}
|
||||
|
||||
.attachment-note {
|
||||
font-size: %dpx;
|
||||
color: #6c757d;
|
||||
font-style: italic;
|
||||
margin-left: 8px;
|
||||
}
|
||||
|
||||
.no-content {
|
||||
padding: 20px;
|
||||
text-align: center;
|
||||
color: #666;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.text-body {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
img {
|
||||
max-width: 100%%;
|
||||
height: auto;
|
||||
display: block;
|
||||
}
|
||||
""",
|
||||
DEFAULT_FONT_FAMILY,
|
||||
DEFAULT_FONT_SIZE,
|
||||
DEFAULT_LINE_HEIGHT,
|
||||
DEFAULT_TEXT_COLOR,
|
||||
DEFAULT_BACKGROUND_COLOR,
|
||||
DEFAULT_BORDER_COLOR,
|
||||
DEFAULT_FONT_SIZE + 4,
|
||||
DEFAULT_FONT_SIZE - 1,
|
||||
ATTACHMENT_BACKGROUND_COLOR,
|
||||
ATTACHMENT_BORDER_COLOR,
|
||||
DEFAULT_FONT_SIZE + 1,
|
||||
DEFAULT_FONT_SIZE - 2,
|
||||
DEFAULT_FONT_SIZE - 2,
|
||||
DEFAULT_FONT_SIZE - 3);
|
||||
|
||||
html.append(css);
|
||||
}
|
||||
|
||||
private static void appendAttachmentsSection(
|
||||
StringBuilder html,
|
||||
EmlParser.EmailContent content,
|
||||
EmlToPdfRequest request,
|
||||
CustomHtmlSanitizer customHtmlSanitizer) {
|
||||
html.append("<div class=\"attachment-section\">\n");
|
||||
int displayedAttachmentCount =
|
||||
content.getAttachmentCount() > 0
|
||||
? content.getAttachmentCount()
|
||||
: content.getAttachments().size();
|
||||
html.append("<h3>Attachments (").append(displayedAttachmentCount).append(")</h3>\n");
|
||||
|
||||
if (!content.getAttachments().isEmpty()) {
|
||||
for (int i = 0; i < content.getAttachments().size(); i++) {
|
||||
EmlParser.EmailAttachment attachment = content.getAttachments().get(i);
|
||||
|
||||
String embeddedFilename =
|
||||
attachment.getFilename() != null
|
||||
? attachment.getFilename()
|
||||
: ("attachment_" + i);
|
||||
attachment.setEmbeddedFilename(embeddedFilename);
|
||||
|
||||
String sizeStr = GeneralUtils.formatBytes(attachment.getSizeBytes());
|
||||
String contentType =
|
||||
attachment.getContentType() != null
|
||||
&& !attachment.getContentType().isEmpty()
|
||||
? ", " + escapeHtml(attachment.getContentType())
|
||||
: "";
|
||||
|
||||
String attachmentId = "attachment_" + i;
|
||||
html.append(
|
||||
String.format(
|
||||
"""
|
||||
<div class="attachment-item" id="%s">
|
||||
<span class="attachment-icon" data-filename="%s">@</span>
|
||||
<span class="attachment-name">%s</span>
|
||||
<span class="attachment-details">(%s%s)</span>
|
||||
</div>
|
||||
""",
|
||||
attachmentId,
|
||||
escapeHtml(embeddedFilename),
|
||||
escapeHtml(EmlParser.safeMimeDecode(attachment.getFilename())),
|
||||
sizeStr,
|
||||
contentType));
|
||||
}
|
||||
}
|
||||
|
||||
if (request != null && request.isIncludeAttachments()) {
|
||||
html.append(
|
||||
"""
|
||||
<div class="attachment-info-note">
|
||||
<p><em>Attachments are embedded in the file.</em></p>
|
||||
</div>
|
||||
""");
|
||||
} else {
|
||||
html.append(
|
||||
"""
|
||||
<div class="attachment-info-note">
|
||||
<p><em>Attachment information displayed - files not included in PDF.</em></p>
|
||||
</div>
|
||||
""");
|
||||
}
|
||||
html.append("</div>\n");
|
||||
}
|
||||
|
||||
public static HTMLToPdfRequest createHtmlRequest(EmlToPdfRequest request) {
|
||||
HTMLToPdfRequest htmlRequest = new HTMLToPdfRequest();
|
||||
|
||||
if (request != null) {
|
||||
htmlRequest.setFileInput(request.getFileInput());
|
||||
}
|
||||
|
||||
htmlRequest.setZoom(Float.parseFloat(DEFAULT_ZOOM));
|
||||
return htmlRequest;
|
||||
}
|
||||
|
||||
public static String detectMimeType(String filename, String existingMimeType) {
|
||||
if (existingMimeType != null && !existingMimeType.isEmpty()) {
|
||||
return existingMimeType;
|
||||
}
|
||||
|
||||
if (filename != null) {
|
||||
String lowerFilename = filename.toLowerCase();
|
||||
for (Map.Entry<String, String> entry : EXTENSION_TO_MIME_TYPE.entrySet()) {
|
||||
if (lowerFilename.endsWith(entry.getKey())) {
|
||||
return entry.getValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "image/png";
|
||||
}
|
||||
|
||||
public static String decodeUrlEncoded(String encoded) {
|
||||
try {
|
||||
return java.net.URLDecoder.decode(encoded, StandardCharsets.UTF_8);
|
||||
} catch (Exception e) {
|
||||
return encoded; // Return original if decoding fails
|
||||
}
|
||||
}
|
||||
|
||||
public static String decodeMimeHeader(String encodedText) {
|
||||
if (encodedText == null || encodedText.trim().isEmpty()) {
|
||||
return encodedText;
|
||||
}
|
||||
|
||||
try {
|
||||
StringBuilder result = new StringBuilder();
|
||||
Pattern concatenatedPattern =
|
||||
Pattern.compile(
|
||||
"(=\\?[^?]+\\?[BbQq]\\?[^?]*\\?=)(\\s*=\\?[^?]+\\?[BbQq]\\?[^?]*\\?=)+");
|
||||
Matcher concatenatedMatcher = concatenatedPattern.matcher(encodedText);
|
||||
String processedText =
|
||||
concatenatedMatcher.replaceAll(
|
||||
match -> match.group().replaceAll("\\s+(?==\\?)", ""));
|
||||
|
||||
Pattern mimePattern = Pattern.compile("=\\?([^?]+)\\?([BbQq])\\?([^?]*)\\?=");
|
||||
Matcher matcher = mimePattern.matcher(processedText);
|
||||
int lastEnd = 0;
|
||||
|
||||
while (matcher.find()) {
|
||||
result.append(processedText, lastEnd, matcher.start());
|
||||
|
||||
String charset = matcher.group(1);
|
||||
String encoding = matcher.group(2).toUpperCase();
|
||||
String encodedValue = matcher.group(3);
|
||||
|
||||
try {
|
||||
String decodedValue =
|
||||
switch (encoding) {
|
||||
case "B" -> {
|
||||
String cleanBase64 = encodedValue.replaceAll("\\s", "");
|
||||
byte[] decodedBytes = Base64.getDecoder().decode(cleanBase64);
|
||||
Charset targetCharset;
|
||||
try {
|
||||
targetCharset = Charset.forName(charset);
|
||||
} catch (Exception e) {
|
||||
targetCharset = StandardCharsets.UTF_8;
|
||||
}
|
||||
yield new String(decodedBytes, targetCharset);
|
||||
}
|
||||
case "Q" -> decodeQuotedPrintable(encodedValue, charset);
|
||||
default -> matcher.group(0); // Return original if unknown encoding
|
||||
};
|
||||
result.append(decodedValue);
|
||||
} catch (RuntimeException e) {
|
||||
result.append(matcher.group(0)); // Keep original on decode error
|
||||
}
|
||||
|
||||
lastEnd = matcher.end();
|
||||
}
|
||||
|
||||
result.append(processedText.substring(lastEnd));
|
||||
return result.toString();
|
||||
} catch (Exception e) {
|
||||
return encodedText; // Return original on any parsing error
|
||||
}
|
||||
}
|
||||
|
||||
private static String decodeQuotedPrintable(String encodedText, String charset) {
|
||||
StringBuilder result = new StringBuilder();
|
||||
for (int i = 0; i < encodedText.length(); i++) {
|
||||
char c = encodedText.charAt(i);
|
||||
switch (c) {
|
||||
case '=' -> {
|
||||
if (i + 2 < encodedText.length()) {
|
||||
String hex = encodedText.substring(i + 1, i + 3);
|
||||
try {
|
||||
int value = Integer.parseInt(hex, 16);
|
||||
result.append((char) value);
|
||||
i += 2;
|
||||
} catch (NumberFormatException e) {
|
||||
result.append(c);
|
||||
}
|
||||
} else if (i + 1 == encodedText.length()
|
||||
|| (i + 2 == encodedText.length()
|
||||
&& encodedText.charAt(i + 1) == '\n')) {
|
||||
if (i + 1 < encodedText.length() && encodedText.charAt(i + 1) == '\n') {
|
||||
i++; // Skip the newline too
|
||||
}
|
||||
} else {
|
||||
result.append(c);
|
||||
}
|
||||
}
|
||||
case '_' -> result.append(' '); // Space encoding in Q encoding
|
||||
default -> result.append(c);
|
||||
}
|
||||
}
|
||||
|
||||
byte[] bytes = result.toString().getBytes(StandardCharsets.ISO_8859_1);
|
||||
try {
|
||||
Charset targetCharset = Charset.forName(charset);
|
||||
return new String(bytes, targetCharset);
|
||||
} catch (Exception e) {
|
||||
try {
|
||||
return new String(bytes, StandardCharsets.UTF_8);
|
||||
} catch (Exception fallbackException) {
|
||||
return new String(bytes, StandardCharsets.ISO_8859_1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static String escapeHtml(String text) {
|
||||
if (text == null) return "";
|
||||
return text.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.replace("\"", """)
|
||||
.replace("'", "'");
|
||||
}
|
||||
|
||||
public static String sanitizeText(String text, CustomHtmlSanitizer customHtmlSanitizer) {
|
||||
if (customHtmlSanitizer != null) {
|
||||
return customHtmlSanitizer.sanitize(text);
|
||||
} else {
|
||||
return escapeHtml(text);
|
||||
}
|
||||
}
|
||||
|
||||
public static String simplifyHtmlContent(String htmlContent) {
|
||||
String simplified = htmlContent.replaceAll("(?i)<script[^>]*>.*?</script>", "");
|
||||
simplified = simplified.replaceAll("(?i)<style[^>]*>.*?</style>", "");
|
||||
return simplified;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,667 @@
|
||||
package stirling.software.common.util;
|
||||
|
||||
import static stirling.software.common.util.AttachmentUtils.setCatalogViewerPreferences;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Base64;
|
||||
import java.util.Date;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TimeZone;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
|
||||
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
|
||||
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PageMode;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
|
||||
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
|
||||
@UtilityClass
|
||||
public class PdfAttachmentHandler {
|
||||
// Note: This class is designed for EML attachments, not general PDF attachments.
|
||||
|
||||
private static final String ATTACHMENT_MARKER = "@";
|
||||
private static final float ATTACHMENT_ICON_WIDTH = 12f;
|
||||
private static final float ATTACHMENT_ICON_HEIGHT = 14f;
|
||||
private static final float ANNOTATION_X_OFFSET = 2f;
|
||||
private static final float ANNOTATION_Y_OFFSET = 10f;
|
||||
|
||||
public static byte[] attachFilesToPdf(
|
||||
byte[] pdfBytes,
|
||||
List<EmlParser.EmailAttachment> attachments,
|
||||
CustomPDFDocumentFactory pdfDocumentFactory)
|
||||
throws IOException {
|
||||
|
||||
if (attachments == null || attachments.isEmpty()) {
|
||||
return pdfBytes;
|
||||
}
|
||||
|
||||
try (PDDocument document = pdfDocumentFactory.load(pdfBytes);
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
|
||||
|
||||
List<MultipartFile> multipartAttachments = new ArrayList<>();
|
||||
for (int i = 0; i < attachments.size(); i++) {
|
||||
EmlParser.EmailAttachment attachment = attachments.get(i);
|
||||
if (attachment.getData() != null && attachment.getData().length > 0) {
|
||||
String embeddedFilename =
|
||||
attachment.getFilename() != null
|
||||
? attachment.getFilename()
|
||||
: ("attachment_" + i);
|
||||
attachment.setEmbeddedFilename(embeddedFilename);
|
||||
multipartAttachments.add(createMultipartFile(attachment));
|
||||
}
|
||||
}
|
||||
|
||||
if (!multipartAttachments.isEmpty()) {
|
||||
Map<Integer, String> indexToFilenameMap =
|
||||
addAttachmentsToDocumentWithMapping(
|
||||
document, multipartAttachments, attachments);
|
||||
setCatalogViewerPreferences(document, PageMode.USE_ATTACHMENTS);
|
||||
addAttachmentAnnotationsToDocumentWithMapping(
|
||||
document, attachments, indexToFilenameMap);
|
||||
}
|
||||
|
||||
document.save(outputStream);
|
||||
return outputStream.toByteArray();
|
||||
} catch (RuntimeException e) {
|
||||
throw new IOException(
|
||||
"Invalid PDF structure or processing error: " + e.getMessage(), e);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Error attaching files to PDF: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private static MultipartFile createMultipartFile(EmlParser.EmailAttachment attachment) {
|
||||
return new MultipartFile() {
|
||||
@Override
|
||||
public @NotNull String getName() {
|
||||
return "attachment";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getOriginalFilename() {
|
||||
return attachment.getFilename() != null
|
||||
? attachment.getFilename()
|
||||
: "attachment_" + System.currentTimeMillis();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getContentType() {
|
||||
return attachment.getContentType() != null
|
||||
? attachment.getContentType()
|
||||
: "application/octet-stream";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return attachment.getData() == null || attachment.getData().length == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSize() {
|
||||
return attachment.getData() != null ? attachment.getData().length : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte @NotNull [] getBytes() {
|
||||
return attachment.getData() != null ? attachment.getData() : new byte[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public @NotNull InputStream getInputStream() {
|
||||
byte[] data = attachment.getData();
|
||||
return new ByteArrayInputStream(data != null ? data : new byte[0]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transferTo(@NotNull File dest) throws IOException, IllegalStateException {
|
||||
try (FileOutputStream fos = new FileOutputStream(dest)) {
|
||||
byte[] data = attachment.getData();
|
||||
if (data != null) {
|
||||
fos.write(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static String ensureUniqueFilename(String filename, Set<String> existingNames) {
|
||||
if (!existingNames.contains(filename)) {
|
||||
return filename;
|
||||
}
|
||||
|
||||
String baseName;
|
||||
String extension = "";
|
||||
int lastDot = filename.lastIndexOf('.');
|
||||
if (lastDot > 0) {
|
||||
baseName = filename.substring(0, lastDot);
|
||||
extension = filename.substring(lastDot);
|
||||
} else {
|
||||
baseName = filename;
|
||||
}
|
||||
|
||||
int counter = 1;
|
||||
String uniqueName;
|
||||
do {
|
||||
uniqueName = baseName + "_" + counter + extension;
|
||||
counter++;
|
||||
} while (existingNames.contains(uniqueName));
|
||||
|
||||
return uniqueName;
|
||||
}
|
||||
|
||||
private static @NotNull PDRectangle calculateAnnotationRectangle(
|
||||
PDPage page, float x, float y) {
|
||||
PDRectangle cropBox = page.getCropBox();
|
||||
|
||||
// ISO 32000-1:2008 Section 8.3: PDF coordinate system transforms
|
||||
int rotation = page.getRotation();
|
||||
float pdfX = x;
|
||||
float pdfY = cropBox.getHeight() - y;
|
||||
|
||||
switch (rotation) {
|
||||
case 90 -> {
|
||||
float temp = pdfX;
|
||||
pdfX = pdfY;
|
||||
pdfY = cropBox.getWidth() - temp;
|
||||
}
|
||||
case 180 -> {
|
||||
pdfX = cropBox.getWidth() - pdfX;
|
||||
pdfY = y;
|
||||
}
|
||||
case 270 -> {
|
||||
float temp = pdfX;
|
||||
pdfX = cropBox.getHeight() - pdfY;
|
||||
pdfY = temp;
|
||||
}
|
||||
default -> {}
|
||||
}
|
||||
|
||||
float iconHeight = ATTACHMENT_ICON_HEIGHT;
|
||||
float paddingX = 2.0f;
|
||||
float paddingY = 2.0f;
|
||||
|
||||
PDRectangle rect =
|
||||
new PDRectangle(
|
||||
pdfX + ANNOTATION_X_OFFSET + paddingX,
|
||||
pdfY - iconHeight + ANNOTATION_Y_OFFSET + paddingY,
|
||||
ATTACHMENT_ICON_WIDTH,
|
||||
iconHeight);
|
||||
|
||||
PDRectangle mediaBox = page.getMediaBox();
|
||||
if (rect.getLowerLeftX() < mediaBox.getLowerLeftX()
|
||||
|| rect.getLowerLeftY() < mediaBox.getLowerLeftY()
|
||||
|| rect.getUpperRightX() > mediaBox.getUpperRightX()
|
||||
|| rect.getUpperRightY() > mediaBox.getUpperRightY()) {
|
||||
|
||||
float adjustedX =
|
||||
Math.max(
|
||||
mediaBox.getLowerLeftX(),
|
||||
Math.min(
|
||||
rect.getLowerLeftX(),
|
||||
mediaBox.getUpperRightX() - rect.getWidth()));
|
||||
float adjustedY =
|
||||
Math.max(
|
||||
mediaBox.getLowerLeftY(),
|
||||
Math.min(
|
||||
rect.getLowerLeftY(),
|
||||
mediaBox.getUpperRightY() - rect.getHeight()));
|
||||
rect = new PDRectangle(adjustedX, adjustedY, rect.getWidth(), rect.getHeight());
|
||||
}
|
||||
|
||||
return rect;
|
||||
}
|
||||
|
||||
public static String processInlineImages(
|
||||
String htmlContent, EmlParser.EmailContent emailContent) {
|
||||
if (htmlContent == null || emailContent == null) return htmlContent;
|
||||
|
||||
Map<String, EmlParser.EmailAttachment> contentIdMap = new HashMap<>();
|
||||
for (EmlParser.EmailAttachment attachment : emailContent.getAttachments()) {
|
||||
if (attachment.isEmbedded()
|
||||
&& attachment.getContentId() != null
|
||||
&& attachment.getData() != null) {
|
||||
contentIdMap.put(attachment.getContentId(), attachment);
|
||||
}
|
||||
}
|
||||
|
||||
if (contentIdMap.isEmpty()) return htmlContent;
|
||||
|
||||
Pattern cidPattern =
|
||||
Pattern.compile(
|
||||
"(?i)<img[^>]*\\ssrc\\s*=\\s*['\"]cid:([^'\"]+)['\"][^>]*>",
|
||||
Pattern.CASE_INSENSITIVE);
|
||||
Matcher matcher = cidPattern.matcher(htmlContent);
|
||||
|
||||
StringBuilder result = new StringBuilder();
|
||||
while (matcher.find()) {
|
||||
String contentId = matcher.group(1);
|
||||
EmlParser.EmailAttachment attachment = contentIdMap.get(contentId);
|
||||
|
||||
if (attachment != null && attachment.getData() != null) {
|
||||
String mimeType =
|
||||
EmlProcessingUtils.detectMimeType(
|
||||
attachment.getFilename(), attachment.getContentType());
|
||||
|
||||
String base64Data = Base64.getEncoder().encodeToString(attachment.getData());
|
||||
String dataUri = "data:" + mimeType + ";base64," + base64Data;
|
||||
|
||||
String replacement =
|
||||
matcher.group(0).replaceFirst("cid:" + Pattern.quote(contentId), dataUri);
|
||||
matcher.appendReplacement(result, Matcher.quoteReplacement(replacement));
|
||||
} else {
|
||||
matcher.appendReplacement(result, Matcher.quoteReplacement(matcher.group(0)));
|
||||
}
|
||||
}
|
||||
matcher.appendTail(result);
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public static String formatEmailDate(Date date) {
|
||||
if (date == null) return "";
|
||||
|
||||
SimpleDateFormat formatter =
|
||||
new SimpleDateFormat("EEE, MMM d, yyyy 'at' h:mm a z", Locale.ENGLISH);
|
||||
formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||
return formatter.format(date);
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class MarkerPosition {
|
||||
private int pageIndex;
|
||||
private float x;
|
||||
private float y;
|
||||
private String character;
|
||||
private String filename;
|
||||
|
||||
public MarkerPosition(int pageIndex, float x, float y, String character, String filename) {
|
||||
this.pageIndex = pageIndex;
|
||||
this.x = x;
|
||||
this.y = y;
|
||||
this.character = character;
|
||||
this.filename = filename;
|
||||
}
|
||||
}
|
||||
|
||||
public static class AttachmentMarkerPositionFinder extends PDFTextStripper {
|
||||
@Getter private final List<MarkerPosition> positions = new ArrayList<>();
|
||||
private int currentPageIndex;
|
||||
protected boolean sortByPosition;
|
||||
private boolean isInAttachmentSection;
|
||||
private boolean attachmentSectionFound;
|
||||
private final StringBuilder currentText = new StringBuilder();
|
||||
|
||||
private static final Pattern ATTACHMENT_SECTION_PATTERN =
|
||||
Pattern.compile("attachments\\s*\\(\\d+\\)", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
private static final Pattern FILENAME_PATTERN =
|
||||
Pattern.compile("@\\s*([^\\s\\(]+(?:\\.[a-zA-Z0-9]+)?)");
|
||||
|
||||
public AttachmentMarkerPositionFinder() {
|
||||
super();
|
||||
this.currentPageIndex = 0;
|
||||
this.sortByPosition = false; // Disable sorting to preserve document order
|
||||
this.isInAttachmentSection = false;
|
||||
this.attachmentSectionFound = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText(PDDocument document) throws IOException {
|
||||
super.getText(document);
|
||||
|
||||
if (sortByPosition) {
|
||||
positions.sort(
|
||||
(a, b) -> {
|
||||
int pageCompare = Integer.compare(a.getPageIndex(), b.getPageIndex());
|
||||
if (pageCompare != 0) return pageCompare;
|
||||
return Float.compare(
|
||||
b.getY(), a.getY()); // Descending Y per PDF coordinate system
|
||||
});
|
||||
}
|
||||
|
||||
return ""; // Return empty string as we only need positions
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void startPage(PDPage page) throws IOException {
|
||||
super.startPage(page);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void endPage(PDPage page) throws IOException {
|
||||
currentPageIndex++;
|
||||
super.endPage(page);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeString(String string, List<TextPosition> textPositions)
|
||||
throws IOException {
|
||||
String lowerString = string.toLowerCase();
|
||||
|
||||
if (ATTACHMENT_SECTION_PATTERN.matcher(lowerString).find()) {
|
||||
isInAttachmentSection = true;
|
||||
attachmentSectionFound = true;
|
||||
}
|
||||
|
||||
if (isInAttachmentSection
|
||||
&& (lowerString.contains("</body>")
|
||||
|| lowerString.contains("</html>")
|
||||
|| (attachmentSectionFound
|
||||
&& lowerString.trim().isEmpty()
|
||||
&& string.length() > 50))) {
|
||||
isInAttachmentSection = false;
|
||||
}
|
||||
|
||||
if (isInAttachmentSection) {
|
||||
currentText.append(string);
|
||||
|
||||
for (int i = 0; (i = string.indexOf(ATTACHMENT_MARKER, i)) != -1; i++) {
|
||||
if (i < textPositions.size()) {
|
||||
TextPosition textPosition = textPositions.get(i);
|
||||
|
||||
String filename = extractFilenameAfterMarker(string, i);
|
||||
|
||||
MarkerPosition position =
|
||||
new MarkerPosition(
|
||||
currentPageIndex,
|
||||
textPosition.getXDirAdj(),
|
||||
textPosition.getYDirAdj(),
|
||||
ATTACHMENT_MARKER,
|
||||
filename);
|
||||
positions.add(position);
|
||||
}
|
||||
}
|
||||
}
|
||||
super.writeString(string, textPositions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setSortByPosition(boolean sortByPosition) {
|
||||
this.sortByPosition = sortByPosition;
|
||||
}
|
||||
|
||||
private String extractFilenameAfterMarker(String text, int markerIndex) {
|
||||
String afterMarker = text.substring(markerIndex + 1);
|
||||
|
||||
Matcher matcher = FILENAME_PATTERN.matcher("@" + afterMarker);
|
||||
if (matcher.find()) {
|
||||
return matcher.group(1);
|
||||
}
|
||||
|
||||
String[] parts = afterMarker.split("[\\s\\(\\)]+");
|
||||
for (String part : parts) {
|
||||
part = part.trim();
|
||||
if (part.length() > 3 && part.contains(".")) {
|
||||
return part;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<Integer, String> addAttachmentsToDocumentWithMapping(
|
||||
PDDocument document,
|
||||
List<MultipartFile> attachments,
|
||||
List<EmlParser.EmailAttachment> originalAttachments)
|
||||
throws IOException {
|
||||
|
||||
PDDocumentCatalog catalog = document.getDocumentCatalog();
|
||||
|
||||
if (catalog == null) {
|
||||
throw new IOException("PDF document catalog is not accessible");
|
||||
}
|
||||
|
||||
PDDocumentNameDictionary documentNames = catalog.getNames();
|
||||
if (documentNames == null) {
|
||||
documentNames = new PDDocumentNameDictionary(catalog);
|
||||
catalog.setNames(documentNames);
|
||||
}
|
||||
|
||||
PDEmbeddedFilesNameTreeNode embeddedFilesTree = documentNames.getEmbeddedFiles();
|
||||
if (embeddedFilesTree == null) {
|
||||
embeddedFilesTree = new PDEmbeddedFilesNameTreeNode();
|
||||
documentNames.setEmbeddedFiles(embeddedFilesTree);
|
||||
}
|
||||
|
||||
Map<String, PDComplexFileSpecification> existingNames = embeddedFilesTree.getNames();
|
||||
if (existingNames == null) {
|
||||
existingNames = new HashMap<>();
|
||||
}
|
||||
|
||||
Map<Integer, String> indexToFilenameMap = new HashMap<>();
|
||||
|
||||
for (int i = 0; i < attachments.size(); i++) {
|
||||
MultipartFile attachment = attachments.get(i);
|
||||
String filename = attachment.getOriginalFilename();
|
||||
if (filename == null || filename.trim().isEmpty()) {
|
||||
filename = "attachment_" + i;
|
||||
}
|
||||
|
||||
String normalizedFilename =
|
||||
java.text.Normalizer.normalize(filename, java.text.Normalizer.Form.NFC);
|
||||
String uniqueFilename =
|
||||
ensureUniqueFilename(normalizedFilename, existingNames.keySet());
|
||||
|
||||
indexToFilenameMap.put(i, uniqueFilename);
|
||||
|
||||
PDEmbeddedFile embeddedFile = new PDEmbeddedFile(document, attachment.getInputStream());
|
||||
embeddedFile.setSize((int) attachment.getSize());
|
||||
|
||||
GregorianCalendar currentTime = new GregorianCalendar();
|
||||
embeddedFile.setCreationDate(currentTime);
|
||||
embeddedFile.setModDate(currentTime);
|
||||
|
||||
String contentType = attachment.getContentType();
|
||||
if (contentType != null && !contentType.trim().isEmpty()) {
|
||||
embeddedFile.setSubtype(contentType);
|
||||
}
|
||||
|
||||
PDComplexFileSpecification fileSpecification = new PDComplexFileSpecification();
|
||||
fileSpecification.setFile(uniqueFilename);
|
||||
fileSpecification.setFileUnicode(uniqueFilename);
|
||||
fileSpecification.setEmbeddedFile(embeddedFile);
|
||||
fileSpecification.setEmbeddedFileUnicode(embeddedFile);
|
||||
|
||||
existingNames.put(uniqueFilename, fileSpecification);
|
||||
}
|
||||
|
||||
embeddedFilesTree.setNames(existingNames);
|
||||
documentNames.setEmbeddedFiles(embeddedFilesTree);
|
||||
catalog.setNames(documentNames);
|
||||
|
||||
return indexToFilenameMap;
|
||||
}
|
||||
|
||||
private static void addAttachmentAnnotationsToDocumentWithMapping(
|
||||
PDDocument document,
|
||||
List<EmlParser.EmailAttachment> attachments,
|
||||
Map<Integer, String> indexToFilenameMap)
|
||||
throws IOException {
|
||||
|
||||
if (document.getNumberOfPages() == 0 || attachments == null || attachments.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
AttachmentMarkerPositionFinder finder = new AttachmentMarkerPositionFinder();
|
||||
finder.setSortByPosition(false); // Keep document order to maintain pairing
|
||||
finder.getText(document);
|
||||
List<MarkerPosition> markerPositions = finder.getPositions();
|
||||
|
||||
int annotationsToAdd = Math.min(markerPositions.size(), attachments.size());
|
||||
|
||||
for (int i = 0; i < annotationsToAdd; i++) {
|
||||
MarkerPosition position = markerPositions.get(i);
|
||||
|
||||
String filenameNearMarker = position.getFilename();
|
||||
|
||||
EmlParser.EmailAttachment matchingAttachment =
|
||||
findAttachmentByFilename(attachments, filenameNearMarker);
|
||||
|
||||
if (matchingAttachment != null) {
|
||||
String embeddedFilename =
|
||||
findEmbeddedFilenameForAttachment(matchingAttachment, indexToFilenameMap);
|
||||
|
||||
if (embeddedFilename != null) {
|
||||
PDPage page = document.getPage(position.getPageIndex());
|
||||
addAttachmentAnnotationToPageWithMapping(
|
||||
document,
|
||||
page,
|
||||
matchingAttachment,
|
||||
embeddedFilename,
|
||||
position.getX(),
|
||||
position.getY(),
|
||||
i);
|
||||
} else {
|
||||
// No embedded filename found for attachment
|
||||
}
|
||||
} else {
|
||||
// No matching attachment found for filename near marker
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static EmlParser.EmailAttachment findAttachmentByFilename(
|
||||
List<EmlParser.EmailAttachment> attachments, String targetFilename) {
|
||||
if (targetFilename == null || targetFilename.trim().isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
String normalizedTarget = normalizeFilename(targetFilename);
|
||||
|
||||
// First try exact match
|
||||
for (EmlParser.EmailAttachment attachment : attachments) {
|
||||
if (attachment.getFilename() != null) {
|
||||
String normalizedAttachment = normalizeFilename(attachment.getFilename());
|
||||
if (normalizedAttachment.equals(normalizedTarget)) {
|
||||
return attachment;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Then try contains match
|
||||
for (EmlParser.EmailAttachment attachment : attachments) {
|
||||
if (attachment.getFilename() != null) {
|
||||
String normalizedAttachment = normalizeFilename(attachment.getFilename());
|
||||
if (normalizedAttachment.contains(normalizedTarget)
|
||||
|| normalizedTarget.contains(normalizedAttachment)) {
|
||||
return attachment;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static String findEmbeddedFilenameForAttachment(
|
||||
EmlParser.EmailAttachment attachment, Map<Integer, String> indexToFilenameMap) {
|
||||
|
||||
String attachmentFilename = attachment.getFilename();
|
||||
if (attachmentFilename == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
for (Map.Entry<Integer, String> entry : indexToFilenameMap.entrySet()) {
|
||||
String embeddedFilename = entry.getValue();
|
||||
if (embeddedFilename != null
|
||||
&& (embeddedFilename.equals(attachmentFilename)
|
||||
|| embeddedFilename.contains(attachmentFilename)
|
||||
|| attachmentFilename.contains(embeddedFilename))) {
|
||||
return embeddedFilename;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static String normalizeFilename(String filename) {
|
||||
if (filename == null) return "";
|
||||
return filename.toLowerCase()
|
||||
.trim()
|
||||
.replaceAll("\\s+", " ")
|
||||
.replaceAll("[^a-zA-Z0-9._-]", "");
|
||||
}
|
||||
|
||||
private static void addAttachmentAnnotationToPageWithMapping(
|
||||
PDDocument document,
|
||||
PDPage page,
|
||||
EmlParser.EmailAttachment attachment,
|
||||
String embeddedFilename,
|
||||
float x,
|
||||
float y,
|
||||
int attachmentIndex)
|
||||
throws IOException {
|
||||
|
||||
PDAnnotationFileAttachment fileAnnotation = new PDAnnotationFileAttachment();
|
||||
|
||||
PDRectangle rect = calculateAnnotationRectangle(page, x, y);
|
||||
fileAnnotation.setRectangle(rect);
|
||||
|
||||
fileAnnotation.setPrinted(false);
|
||||
fileAnnotation.setHidden(false);
|
||||
fileAnnotation.setNoView(false);
|
||||
fileAnnotation.setNoZoom(true);
|
||||
fileAnnotation.setNoRotate(true);
|
||||
|
||||
try {
|
||||
PDAppearanceDictionary appearance = new PDAppearanceDictionary();
|
||||
PDAppearanceStream normalAppearance = new PDAppearanceStream(document);
|
||||
normalAppearance.setBBox(new PDRectangle(0, 0, rect.getWidth(), rect.getHeight()));
|
||||
appearance.setNormalAppearance(normalAppearance);
|
||||
fileAnnotation.setAppearance(appearance);
|
||||
} catch (RuntimeException e) {
|
||||
fileAnnotation.setAppearance(null);
|
||||
}
|
||||
|
||||
PDEmbeddedFilesNameTreeNode efTree =
|
||||
document.getDocumentCatalog().getNames().getEmbeddedFiles();
|
||||
if (efTree != null) {
|
||||
Map<String, PDComplexFileSpecification> efMap = efTree.getNames();
|
||||
if (efMap != null) {
|
||||
PDComplexFileSpecification fileSpec = efMap.get(embeddedFilename);
|
||||
if (fileSpec != null) {
|
||||
fileAnnotation.setFile(fileSpec);
|
||||
} else {
|
||||
// Could not find embedded file
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fileAnnotation.setContents(
|
||||
"Attachment " + (attachmentIndex + 1) + ": " + attachment.getFilename());
|
||||
fileAnnotation.setAnnotationName(
|
||||
"EmbeddedFile_" + attachmentIndex + "_" + embeddedFilename);
|
||||
|
||||
page.getAnnotations().add(fileAnnotation);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user