feat(conversion): refactor EML parser to use Simple Java Mail library and add MSG support (#5427)

# Description of Changes


Note on Simple Java Mail:
- SJM contains Angus/Jakarta Mail in it.
- SJM is a very thin layer on Angus Mail; see here:
https://github.com/bbottema/simple-java-mail
- SJM gives high level methods to more reliably parse in email via Angus
Mail, but also contains lots of other interesting features.
- SJM is Apache 2 licensed

This pull request updates the email processing utilities to add support
for parsing and validating Outlook MSG files, refactors the
`EmlProcessingUtils` utility class to use instance methods and improved
resource management, and enhances the handling and styling of generated
email HTML. The changes also introduce external CSS resource loading
with a fallback mechanism, and update dependencies to support MSG file
parsing.

**MSG file support and validation:**
- Added `simple-java-mail` and `outlook-module` dependencies to enable
EML and MSG file parsing, and updated validation logic to recognize and
accept MSG files by checking their magic bytes.
(`app/common/build.gradle`, `EmlProcessingUtils.java`)
**Refactoring and modernization of `EmlProcessingUtils`:**
- Converted static methods and fields in `EmlProcessingUtils` to
instance methods/fields, improving testability and future extensibility.
(`EmlProcessingUtils.java`)

**Enhanced HTML/CSS styling for email rendering:**
- Updated HTML generation to use consistent formatting and improved
style variable usage, and refactored CSS injection to load from an
external resource (`email-pdf-styles.css`) with a synchronized cache and
a minimal fallback if the resource is missing.
(`EmlProcessingUtils.java`)
**Attachment and content rendering improvements:**
- Improved the formatting of meta-information (e.g., CC, BCC, Date) and
attachment sections in generated email HTML, and ensured more robust
handling of empty or missing content. (`EmlProcessingUtils.java`)

**General code cleanup and logging:**
- Added SLF4J logging for error handling when loading CSS resources, and
cleaned up imports and method signatures for clarity and
maintainability. (`EmlProcessingUtils.java`)


<img width="367" height="991" alt="image"
src="https://github.com/user-attachments/assets/0cfb959c-da92-4cff-9e52-ff4ab7fa806e"
/>


<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [X] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [X] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [X] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [X] I have performed a self-review of my own code
- [X] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Translations (if applicable)

- [ ] I ran
[`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [X] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs
2026-01-13 22:17:40 +01:00
committed by GitHub
parent daf27b6128
commit 84ed1d7ecb
12 changed files with 764 additions and 779 deletions

View File

@@ -338,8 +338,8 @@ const ConvertSettings = ({
</>
) : null}
{/* Email to PDF options */}
{parameters.fromExtension === 'eml' && parameters.toExtension === 'pdf' && (
{/* Email to PDF options (EML and MSG formats) */}
{(parameters.fromExtension === 'eml' || parameters.fromExtension === 'msg') && parameters.toExtension === 'pdf' && (
<>
<Divider />
<ConvertFromEmailSettings

View File

@@ -96,6 +96,7 @@ export const FROM_FORMAT_OPTIONS = [
{ value: 'txt', label: 'TXT', group: 'Text' },
{ value: 'rtf', label: 'RTF', group: 'Text' },
{ value: 'eml', label: 'EML', group: 'Email' },
{ value: 'msg', label: 'MSG (Outlook)', group: 'Email' },
{ value: 'epub', label: 'EPUB', group: 'eBook' },
{ value: 'mobi', label: 'MOBI', group: 'eBook' },
{ value: 'azw3', label: 'AZW3', group: 'eBook' },
@@ -140,6 +141,7 @@ export const CONVERSION_MATRIX: Record<string, string[]> = {
'md': ['pdf'],
'txt': ['pdf'], 'rtf': ['pdf'],
'eml': ['pdf'],
'msg': ['pdf'],
'cbr': ['pdf'],
'epub': ['pdf'], 'mobi': ['pdf'], 'azw3': ['pdf'], 'fb2': ['pdf']
};
@@ -171,6 +173,7 @@ export const EXTENSION_TO_ENDPOINT: Record<string, Record<string, string>> = {
'txt': { 'pdf': 'file-to-pdf' }, 'rtf': { 'pdf': 'file-to-pdf' },
'cbr': { 'pdf': 'cbr-to-pdf' },
'eml': { 'pdf': 'eml-to-pdf' },
'msg': { 'pdf': 'eml-to-pdf' }, // MSG uses same endpoint as EML
'epub': { 'pdf': 'ebook-to-pdf' }, 'mobi': { 'pdf': 'ebook-to-pdf' }, 'azw3': { 'pdf': 'ebook-to-pdf' }, 'fb2': { 'pdf': 'ebook-to-pdf' }
};

View File

@@ -11,7 +11,7 @@ export const CONVERT_SUPPORTED_FORMATS = [
// StarOffice
'sda', 'sdc', 'sdd', 'sdw', 'stc', 'std', 'sti', 'stw', 'sxd', 'sxg', 'sxi', 'sxw',
// Email formats
'eml',
'eml', 'msg',
// Ebook formats
'epub', 'mobi', 'azw3', 'fb2',
// Archive formats

View File

@@ -66,7 +66,7 @@ export const buildConvertFormData = (parameters: ConvertParameters, selectedFile
formData.append("autoRotate", imageOptions.autoRotate.toString());
} else if ((fromExtension === 'html' || fromExtension === 'zip') && toExtension === 'pdf') {
formData.append("zoom", htmlOptions.zoomLevel.toString());
} else if (fromExtension === 'eml' && toExtension === 'pdf') {
} else if ((fromExtension === 'eml' || fromExtension === 'msg') && toExtension === 'pdf') {
formData.append("includeAttachments", emailOptions.includeAttachments.toString());
formData.append("maxAttachmentSizeMB", emailOptions.maxAttachmentSizeMB.toString());
formData.append("downloadHtml", emailOptions.downloadHtml.toString());

View File

@@ -120,6 +120,13 @@ const ALL_CONVERSION_ENDPOINTS: ConversionEndpoint[] = [
toFormat: 'pdf',
description: 'Convert email (EML) to PDF',
apiPath: '/api/v1/convert/eml/pdf'
},
{
endpoint: 'eml-to-pdf', // MSG uses same endpoint as EML
fromFormat: 'msg',
toFormat: 'pdf',
description: 'Convert Outlook email (MSG) to PDF',
apiPath: '/api/v1/convert/eml/pdf'
}
];

View File

@@ -76,8 +76,9 @@ describe('convertUtils', () => {
expect(getEndpointName('txt', 'pdf')).toBe('file-to-pdf');
expect(getEndpointName('rtf', 'pdf')).toBe('file-to-pdf');
// Email to PDF
// Email to PDF (EML and MSG)
expect(getEndpointName('eml', 'pdf')).toBe('eml-to-pdf');
expect(getEndpointName('msg', 'pdf')).toBe('eml-to-pdf');
});
test('should return empty string for unsupported conversions', () => {
@@ -158,8 +159,9 @@ describe('convertUtils', () => {
expect(getEndpointUrl('txt', 'pdf')).toBe('/api/v1/convert/file/pdf');
expect(getEndpointUrl('rtf', 'pdf')).toBe('/api/v1/convert/file/pdf');
// Email to PDF
// Email to PDF (EML and MSG)
expect(getEndpointUrl('eml', 'pdf')).toBe('/api/v1/convert/eml/pdf');
expect(getEndpointUrl('msg', 'pdf')).toBe('/api/v1/convert/eml/pdf');
});
test('should return empty string for unsupported conversions', () => {
@@ -240,8 +242,9 @@ describe('convertUtils', () => {
expect(isConversionSupported('txt', 'pdf')).toBe(true);
expect(isConversionSupported('rtf', 'pdf')).toBe(true);
// Email to PDF
// Email to PDF (EML and MSG)
expect(isConversionSupported('eml', 'pdf')).toBe(true);
expect(isConversionSupported('msg', 'pdf')).toBe(true);
});
test('should return false for unsupported conversions', () => {