feat(pdf-EPUB): add PDF to EPUB/AZW3 conversion functionality via Calibre (#4947)

# Description of Changes

This PR introduces a new conversion tool allowing users to convert PDF
documents into EPUB format. This is particularly useful for reading
documents on e-readers (like Kindles or Kobos) where standard PDFs often
suffer from fixed formatting and unreadable text sizes.

The implementation leverages the existing **Calibre** integration
(`ebook-convert`) to produce reflowable e-books with specific
optimizations for layout and chapter structure.

**Backend Implementation**
* Added `ConvertPDFToEpubController` to handle the conversion workflow.
* Created `ConvertPdfToEpubRequest` to support new conversion parameters
(Device profile and Chapter detection).
* Integrated standard Stirling-PDF temporary file management and process
execution patterns.

**Frontend & UI**
* Added a new view `pdf-to-epub.html` containing the upload form and
configuration options.
* Updated `navElements.html` and `messages.properties` to expose the
tool in the navigation menu under the "Convert" group.
* Minor cleanup of HTML formatting in the existing `ebook-to-pdf`
template for consistency.

**Configuration & Testing**
* Registered the `pdf-to-epub` endpoint in `EndpointConfiguration`,
placing it under the **Calibre** dependency group.
* Added comprehensive unit tests covering command generation, parameter
handling, and temporary file cleanup.


The conversion process utilizes specific calibre, `ebook-convert` flags
to ensure high-quality output:

* **Heuristic Processing** (`--enable-heuristics`): Automatically
detects and fixes common PDF scanning issues, such as broken lines,
hyphens at line ends, and inconsistent paragraph spacing.
* **CSS Filtering** (`--filter-css`): Strips hardcoded styling (font
families, fixed margins, colors) from the PDF. This ensures the
resulting EPUB respects the user's e-reader settings (font size, dark
mode, etc.).
* **Smart Chapter Detection** (`--chapter`): Optionally uses an XPath
expression (`//h:*[re:test(., '\\s*Chapter\\s+', 'i')]`) to detect
headers and insert proper page breaks in the EPUB structure.
* **Device Optimization Profiles**:
* **Tablet/Phone:** Uses the default profile to maintain image
resolution and color.
* **Kindle/E-Ink:** Uses a specific profile to resize images and
optimize contrast for grayscale screens.



<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [X] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [X] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [X] I have performed a self-review of my own code
- [X] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Translations (if applicable)

- [ ] I ran
[`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [X] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-11-25 11:02:50 +01:00 committed by GitHub
parent e68871bad3
commit 97f3b88222
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 761 additions and 36 deletions

View File

@ -258,6 +258,7 @@ public class EndpointConfiguration {
addEndpointToGroup("Convert", "url-to-pdf");
addEndpointToGroup("Convert", "markdown-to-pdf");
addEndpointToGroup("Convert", "ebook-to-pdf");
addEndpointToGroup("Convert", "pdf-to-epub");
addEndpointToGroup("Convert", "pdf-to-csv");
addEndpointToGroup("Convert", "pdf-to-markdown");
addEndpointToGroup("Convert", "eml-to-pdf");
@ -449,6 +450,7 @@ public class EndpointConfiguration {
// Calibre dependent endpoints
addEndpointToGroup("Calibre", "ebook-to-pdf");
addEndpointToGroup("Calibre", "pdf-to-epub");
// Pdftohtml dependent endpoints
addEndpointToGroup("Pdftohtml", "pdf-to-html");

View File

@ -0,0 +1,204 @@
package stirling.software.SPDF.controller.api.converters;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FilenameUtils;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.config.EndpointConfiguration;
import stirling.software.SPDF.model.api.converters.ConvertPdfToEpubRequest;
import stirling.software.SPDF.model.api.converters.ConvertPdfToEpubRequest.OutputFormat;
import stirling.software.SPDF.model.api.converters.ConvertPdfToEpubRequest.TargetDevice;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.ProcessExecutor;
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
import stirling.software.common.util.TempFileManager;
import stirling.software.common.util.WebResponseUtils;
@RestController
@RequestMapping("/api/v1/convert")
@Tag(name = "Convert", description = "Convert APIs")
@RequiredArgsConstructor
@Slf4j
public class ConvertPDFToEpubController {
private static final String CALIBRE_GROUP = "Calibre";
private static final String DEFAULT_EXTENSION = "pdf";
private static final String FILTERED_CSS =
"font-family,color,background-color,margin-left,margin-right";
private static final String SMART_CHAPTER_EXPRESSION =
"//h:*[re:test(., '\\s*Chapter\\s+', 'i')]";
private final TempFileManager tempFileManager;
private final EndpointConfiguration endpointConfiguration;
private static List<String> buildCalibreCommand(
Path inputPath, Path outputPath, boolean detectChapters, TargetDevice targetDevice) {
List<String> command = new ArrayList<>();
command.add("ebook-convert");
command.add(inputPath.toString());
command.add(outputPath.toString());
// Golden defaults
command.add("--enable-heuristics");
command.add("--insert-blank-line");
command.add("--filter-css");
command.add(FILTERED_CSS);
if (detectChapters) {
command.add("--chapter");
command.add(SMART_CHAPTER_EXPRESSION);
}
if (targetDevice != null) {
command.add("--output-profile");
command.add(targetDevice.getCalibreProfile());
}
return command;
}
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/epub")
@Operation(
summary = "Convert PDF to EPUB/AZW3",
description =
"Convert a PDF file to a high-quality EPUB or AZW3 ebook using Calibre. Input:PDF"
+ " Output:EPUB/AZW3 Type:SISO")
public ResponseEntity<byte[]> convertPdfToEpub(@ModelAttribute ConvertPdfToEpubRequest request)
throws Exception {
if (!endpointConfiguration.isGroupEnabled(CALIBRE_GROUP)) {
throw new IllegalStateException(
"Calibre support is disabled. Enable the Calibre group or install Calibre to use"
+ " this feature.");
}
MultipartFile inputFile = request.getFileInput();
if (inputFile == null || inputFile.isEmpty()) {
throw new IllegalArgumentException("No input file provided");
}
boolean detectChapters = !Boolean.FALSE.equals(request.getDetectChapters());
TargetDevice targetDevice =
request.getTargetDevice() == null
? TargetDevice.TABLET_PHONE_IMAGES
: request.getTargetDevice();
OutputFormat outputFormat =
request.getOutputFormat() == null ? OutputFormat.EPUB : request.getOutputFormat();
String originalFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
if (originalFilename == null || originalFilename.isBlank()) {
originalFilename = "document." + DEFAULT_EXTENSION;
}
String extension = FilenameUtils.getExtension(originalFilename);
if (extension.isBlank()) {
throw new IllegalArgumentException("Unable to determine file type");
}
if (!DEFAULT_EXTENSION.equalsIgnoreCase(extension)) {
throw new IllegalArgumentException("Input file must be a PDF");
}
String baseName = FilenameUtils.getBaseName(originalFilename);
if (baseName == null || baseName.isBlank()) {
baseName = "document";
}
Path workingDirectory = null;
Path inputPath = null;
Path outputPath = null;
try {
workingDirectory = tempFileManager.createTempDirectory();
inputPath = workingDirectory.resolve(baseName + "." + DEFAULT_EXTENSION);
outputPath = workingDirectory.resolve(baseName + "." + outputFormat.getExtension());
try (InputStream inputStream = inputFile.getInputStream()) {
Files.copy(inputStream, inputPath, StandardCopyOption.REPLACE_EXISTING);
}
List<String> command =
buildCalibreCommand(inputPath, outputPath, detectChapters, targetDevice);
ProcessExecutorResult result =
ProcessExecutor.getInstance(ProcessExecutor.Processes.CALIBRE)
.runCommandWithOutputHandling(command, workingDirectory.toFile());
if (result == null) {
throw new IllegalStateException("Calibre conversion returned no result");
}
if (result.getRc() != 0) {
String errorMessage = result.getMessages();
if (errorMessage == null || errorMessage.isBlank()) {
errorMessage = "Calibre conversion failed";
}
throw new IllegalStateException(errorMessage);
}
if (!Files.exists(outputPath) || Files.size(outputPath) == 0L) {
throw new IllegalStateException(
"Calibre did not produce a " + outputFormat.name() + " output");
}
String outputFilename =
GeneralUtils.generateFilename(
originalFilename,
"_convertedTo"
+ outputFormat.name()
+ "."
+ outputFormat.getExtension());
byte[] outputBytes = Files.readAllBytes(outputPath);
MediaType mediaType = MediaType.valueOf(outputFormat.getMediaType());
return WebResponseUtils.bytesToWebResponse(outputBytes, outputFilename, mediaType);
} finally {
cleanupTempFiles(workingDirectory, inputPath, outputPath);
}
}
private void cleanupTempFiles(Path workingDirectory, Path inputPath, Path outputPath) {
if (workingDirectory == null) {
return;
}
List<Path> pathsToDelete = new ArrayList<>();
if (inputPath != null) {
pathsToDelete.add(inputPath);
}
if (outputPath != null) {
pathsToDelete.add(outputPath);
}
for (Path path : pathsToDelete) {
try {
Files.deleteIfExists(path);
} catch (IOException e) {
log.warn("Failed to delete temporary file: {}", path, e);
}
}
try {
tempFileManager.deleteTempDirectory(workingDirectory);
} catch (Exception e) {
log.warn("Failed to delete temporary directory: {}", workingDirectory, e);
}
}
}

View File

@ -54,6 +54,17 @@ public class ConverterWebController {
return "convert/ebook-to-pdf";
}
@GetMapping("/pdf-to-epub")
@Hidden
public String convertPdfToEpubForm(Model model) {
if (!ApplicationContextProvider.getBean(EndpointConfiguration.class)
.isEndpointEnabled("pdf-to-epub")) {
throw new ResponseStatusException(HttpStatus.NOT_FOUND);
}
model.addAttribute("currentPage", "pdf-to-epub");
return "convert/pdf-to-epub";
}
@GetMapping("/pdf-to-cbr")
@Hidden
public String convertPdfToCbrForm(Model model) {

View File

@ -0,0 +1,58 @@
package stirling.software.SPDF.model.api.converters;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import stirling.software.common.model.api.PDFFile;
@Data
@EqualsAndHashCode(callSuper = true)
public class ConvertPdfToEpubRequest extends PDFFile {
@Schema(
description = "Detect headings that look like chapters and insert EPUB page breaks.",
allowableValues = {"true", "false"},
defaultValue = "true")
private Boolean detectChapters = Boolean.TRUE;
@Schema(
description = "Choose an output profile optimized for the reader device.",
allowableValues = {"TABLET_PHONE_IMAGES", "KINDLE_EINK_TEXT"},
defaultValue = "TABLET_PHONE_IMAGES")
private TargetDevice targetDevice = TargetDevice.TABLET_PHONE_IMAGES;
@Schema(
description = "Choose the output format for the ebook.",
allowableValues = {"EPUB", "AZW3"},
defaultValue = "EPUB")
private OutputFormat outputFormat = OutputFormat.EPUB;
@Getter
public enum TargetDevice {
TABLET_PHONE_IMAGES("tablet"),
KINDLE_EINK_TEXT("kindle");
private final String calibreProfile;
TargetDevice(String calibreProfile) {
this.calibreProfile = calibreProfile;
}
}
@Getter
public enum OutputFormat {
EPUB("epub", "application/epub+zip"),
AZW3("azw3", "application/vnd.amazon.ebook");
private final String extension;
private final String mediaType;
OutputFormat(String extension, String mediaType) {
this.extension = extension;
this.mediaType = mediaType;
}
}
}

View File

@ -706,6 +706,10 @@ home.ebookToPdf.title=eBook to PDF
home.ebookToPdf.desc=Convert eBook files (EPUB, MOBI, AZW3, FB2, TXT, DOCX) to PDF using Calibre.
ebookToPdf.tags=conversion,ebook,calibre,epub,mobi,azw3
home.pdfToEpub.title=PDF to EPUB/AZW3
home.pdfToEpub.desc=Convert PDF files into EPUB or AZW3 ebooks optimised for e-readers using Calibre.
pdfToEpub.tags=conversion,ebook,epub,azw3,calibre
home.pdfToCbz.title=PDF to CBZ
home.pdfToCbz.desc=Convert PDF files to CBZ comic book archives.
pdfToCbz.tags=conversion,comic,book,archive,cbz,pdf
@ -1592,6 +1596,20 @@ ebookToPDF.includePageNumbers=Add page numbers to the generated PDF
ebookToPDF.optimizeForEbook=Optimize PDF for ebook readers (uses Ghostscript)
ebookToPDF.calibreDisabled=Calibre support is disabled. Enable the Calibre tool group or install Calibre to use this feature.
#pdfToEpub
pdfToEpub.title=PDF to EPUB/AZW3
pdfToEpub.header=PDF to EPUB/AZW3
pdfToEpub.submit=Convert
pdfToEpub.selectText=Select PDF file
pdfToEpub.outputFormat=Output format
pdfToEpub.outputFormat.epub=EPUB
pdfToEpub.outputFormat.azw3=AZW3
pdfToEpub.detectChapters=Detect chapters and insert automatic breaks
pdfToEpub.targetDevice=Target device
pdfToEpub.targetDevice.tablet=Tablet / Phone (keeps images high quality)
pdfToEpub.targetDevice.kindle=Kindle / E-Ink (text-focused, smaller images)
pdfToEpub.calibreDisabled=Calibre support is disabled. Enable the Calibre tool group or install Calibre to use this feature.
#pdfToCBR
pdfToCBR.title=PDF to CBR
pdfToCBR.header=PDF to CBR

View File

@ -38,56 +38,31 @@
th:replace="~{fragments/common :: fileSelector(name='fileInput', multipleInputsForSingleRequest=false, accept='.epub,.mobi,.azw3,.fb2,.txt,.docx', inputText=#{ebookToPDF.selectText})}">
</div>
<div class="form-check mb-2">
<input class="form-check-input"
id="embedAllFonts"
name="embedAllFonts"
type="checkbox"
value="true">
<label for="embedAllFonts"
th:text="#{ebookToPDF.embedAllFonts}">
Embed all fonts in PDF
</label>
<div class="form-check mb-3">
<input id="embedAllFonts" name="embedAllFonts" type="checkbox" value="true">
<label for="embedAllFonts" th:text="#{ebookToPDF.embedAllFonts}"></label>
</div>
<div class="form-check mb-2">
<input class="form-check-input"
id="includeTableOfContents"
<div class="form-check mb-3">
<input id="includeTableOfContents"
name="includeTableOfContents"
type="checkbox"
value="true">
<label
for="includeTableOfContents"
th:text="#{ebookToPDF.includeTableOfContents}">
Add table of contents
</label>
<label for="includeTableOfContents" th:text="#{ebookToPDF.includeTableOfContents}"></label>
</div>
<div class="form-check mb-2">
<input class="form-check-input"
id="includePageNumbers"
name="includePageNumbers"
type="checkbox"
value="true">
<label
for="includePageNumbers"
th:text="#{ebookToPDF.includePageNumbers}">
Add page numbers
</label>
<div class="form-check mb-3">
<input id="includePageNumbers" name="includePageNumbers" type="checkbox" value="true">
<label for="includePageNumbers" th:text="#{ebookToPDF.includePageNumbers}"></label>
</div>
<div class="form-check mb-3"
th:if="${@endpointConfiguration.isGroupEnabled('Ghostscript')}">
<input class="form-check-input"
id="optimizeForEbook"
<input id="optimizeForEbook"
name="optimizeForEbook"
type="checkbox"
value="true">
<label
for="optimizeForEbook"
th:text="#{ebookToPDF.optimizeForEbook}">
Optimize PDF for ebook readers (uses Ghostscript)
</label>
<label for="optimizeForEbook" th:text="#{ebookToPDF.optimizeForEbook}"></label>
</div>
<button class="btn btn-primary"

View File

@ -0,0 +1,87 @@
<!DOCTYPE html>
<html th:data-language="${#locale.toString()}"
th:dir="#{language.direction}"
th:lang="${#locale.language}"
xmlns:th="https://www.thymeleaf.org">
<head>
<th:block th:insert="~{fragments/common :: head(title=#{pdfToEpub.title}, header=#{pdfToEpub.header})}"></th:block>
</head>
<body>
<th:block th:insert="~{fragments/common :: game}"></th:block>
<div id="page-container">
<div id="content-wrap">
<th:block th:insert="~{fragments/navbar.html :: navbar}"></th:block>
<br><br>
<div class="container">
<div class="row justify-content-center">
<div class="col-md-6 bg-card">
<div class="tool-header">
<span class="material-symbols-rounded tool-header-icon convert">menu_book</span>
<span class="tool-header-text"
th:text="#{pdfToEpub.header}"></span>
</div>
<p th:text="#{processTimeWarning}"></p>
<div class="alert alert-warning"
th:if="${!@endpointConfiguration.isGroupEnabled('Calibre')}">
<span th:text="#{pdfToEpub.calibreDisabled}">Calibre support is disabled.</span>
</div>
<form enctype="multipart/form-data"
id="pdfToEpubForm"
method="post"
th:action="@{'/api/v1/convert/pdf/epub'}"
th:if="${@endpointConfiguration.isGroupEnabled('Calibre')}">
<div
th:replace="~{fragments/common :: fileSelector(name='fileInput', multipleInputsForSingleRequest=false, accept='.pdf', inputText=#{pdfToEpub.selectText})}">
</div>
<div class="mb-3">
<label class="form-label" for="outputFormat"
th:text="#{pdfToEpub.outputFormat}"></label>
<select class="form-select"
id="outputFormat"
name="outputFormat">
<option selected
th:text="#{pdfToEpub.outputFormat.epub}"
value="EPUB"></option>
<option th:text="#{pdfToEpub.outputFormat.azw3}"
value="AZW3"></option>
</select>
</div>
<div class="form-check mb-3">
<input checked id="detectChapters" name="detectChapters" type="checkbox">
<label for="detectChapters" th:text="#{pdfToEpub.detectChapters}"></label>
</div>
<div class="mb-3">
<label class="form-label" for="targetDevice"
th:text="#{pdfToEpub.targetDevice}"></label>
<select class="form-select"
id="targetDevice"
name="targetDevice">
<option selected
th:text="#{pdfToEpub.targetDevice.tablet}"
value="TABLET_PHONE_IMAGES"></option>
<option th:text="#{pdfToEpub.targetDevice.kindle}"
value="KINDLE_EINK_TEXT"></option>
</select>
</div>
<button class="btn btn-primary"
id="submitBtn"
th:text="#{pdfToEpub.submit}"
type="submit">Convert</button>
</form>
</div>
</div>
</div>
</div>
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
</div>
</body>
</html>

View File

@ -89,6 +89,9 @@
<div
th:replace="~{fragments/navbarEntry :: navbarEntry('pdf-to-cbr', 'auto_stories', 'home.pdfToCbr.title', 'home.pdfToCbr.desc', 'pdfToCbr.tags', 'convert')}">
</div>
<div
th:replace="~{fragments/navbarEntry :: navbarEntry('pdf-to-epub', 'menu_book', 'home.pdfToEpub.title', 'home.pdfToEpub.desc', 'pdfToEpub.tags', 'convert')}">
</div>
<div
th:replace="~{fragments/navbarEntry :: navbarEntry('pdf-to-pdfa', 'picture_as_pdf', 'home.pdfToPDFA.title', 'home.pdfToPDFA.desc', 'pdfToPDFA.tags', 'convert')}">
</div>
@ -163,6 +166,9 @@
<div
th:replace="~{fragments/navbarEntry :: navbarEntry('pdf-to-pdfa', 'picture_as_pdf', 'home.pdfToPDFA.title', 'home.pdfToPDFA.desc', 'pdfToPDFA.tags', 'convert')}">
</div>
<div
th:replace="~{fragments/navbarEntry :: navbarEntry('pdf-to-epub', 'menu_book', 'home.pdfToEpub.title', 'home.pdfToEpub.desc', 'pdfToEpub.tags', 'convert')}">
</div>
<div
th:replace="~{fragments/navbarEntry :: navbarEntry('pdf-to-word', 'description', 'home.PDFToWord.title', 'home.PDFToWord.desc', 'PDFToWord.tags', 'convert')}">
</div>

View File

@ -0,0 +1,328 @@
package stirling.software.SPDF.controller.api.converters;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.ArgumentCaptor;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.MockedStatic;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile;
import stirling.software.SPDF.config.EndpointConfiguration;
import stirling.software.SPDF.model.api.converters.ConvertPdfToEpubRequest;
import stirling.software.SPDF.model.api.converters.ConvertPdfToEpubRequest.OutputFormat;
import stirling.software.SPDF.model.api.converters.ConvertPdfToEpubRequest.TargetDevice;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.ProcessExecutor;
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
import stirling.software.common.util.ProcessExecutor.Processes;
import stirling.software.common.util.TempFileManager;
@ExtendWith(MockitoExtension.class)
class ConvertPDFToEpubControllerTest {
private static final MediaType EPUB_MEDIA_TYPE = MediaType.valueOf("application/epub+zip");
@Mock private TempFileManager tempFileManager;
@Mock private EndpointConfiguration endpointConfiguration;
@InjectMocks private ConvertPDFToEpubController controller;
@Test
void convertPdfToEpub_buildsGoldenCommandAndCleansUp() throws Exception {
when(endpointConfiguration.isGroupEnabled("Calibre")).thenReturn(true);
MockMultipartFile pdfFile =
new MockMultipartFile(
"fileInput", "novel.pdf", "application/pdf", "content".getBytes());
ConvertPdfToEpubRequest request = new ConvertPdfToEpubRequest();
request.setFileInput(pdfFile);
Path workingDir = Files.createTempDirectory("pdf-epub-test-");
when(tempFileManager.createTempDirectory()).thenReturn(workingDir);
AtomicReference<Path> deletedDir = new AtomicReference<>();
doAnswer(
invocation -> {
Path dir = invocation.getArgument(0);
deletedDir.set(dir);
if (Files.exists(dir)) {
try (Stream<Path> paths = Files.walk(dir)) {
paths.sorted(Comparator.reverseOrder())
.forEach(
path -> {
try {
Files.deleteIfExists(path);
} catch (IOException ignored) {
}
});
}
}
return null;
})
.when(tempFileManager)
.deleteTempDirectory(any(Path.class));
try (MockedStatic<ProcessExecutor> pe = Mockito.mockStatic(ProcessExecutor.class);
MockedStatic<GeneralUtils> gu = Mockito.mockStatic(GeneralUtils.class)) {
ProcessExecutor executor = mock(ProcessExecutor.class);
pe.when(() -> ProcessExecutor.getInstance(Processes.CALIBRE)).thenReturn(executor);
ProcessExecutorResult execResult = mock(ProcessExecutorResult.class);
when(execResult.getRc()).thenReturn(0);
@SuppressWarnings("unchecked")
ArgumentCaptor<List<String>> commandCaptor = ArgumentCaptor.forClass(List.class);
Path expectedInput = workingDir.resolve("novel.pdf");
Path expectedOutput = workingDir.resolve("novel.epub");
when(executor.runCommandWithOutputHandling(
commandCaptor.capture(), eq(workingDir.toFile())))
.thenAnswer(
invocation -> {
Files.writeString(expectedOutput, "epub");
return execResult;
});
gu.when(() -> GeneralUtils.generateFilename("novel.pdf", "_convertedToEPUB.epub"))
.thenReturn("novel_convertedToEPUB.epub");
ResponseEntity<byte[]> response = controller.convertPdfToEpub(request);
List<String> command = commandCaptor.getValue();
assertEquals(11, command.size());
assertEquals("ebook-convert", command.get(0));
assertEquals(expectedInput.toString(), command.get(1));
assertEquals(expectedOutput.toString(), command.get(2));
assertTrue(command.contains("--enable-heuristics"));
assertTrue(command.contains("--insert-blank-line"));
assertTrue(command.contains("--filter-css"));
assertTrue(
command.contains(
"font-family,color,background-color,margin-left,margin-right"));
assertTrue(command.contains("--chapter"));
assertTrue(command.stream().anyMatch(arg -> arg.contains("Chapter\\s+")));
assertTrue(command.contains("--output-profile"));
assertTrue(command.contains(TargetDevice.TABLET_PHONE_IMAGES.getCalibreProfile()));
assertEquals(EPUB_MEDIA_TYPE, response.getHeaders().getContentType());
assertEquals(
"novel_convertedToEPUB.epub",
response.getHeaders().getContentDisposition().getFilename());
assertEquals("epub", new String(response.getBody(), StandardCharsets.UTF_8));
verify(tempFileManager).deleteTempDirectory(workingDir);
assertEquals(workingDir, deletedDir.get());
} finally {
deleteIfExists(workingDir);
}
}
@Test
void convertPdfToEpub_respectsOptions() throws Exception {
when(endpointConfiguration.isGroupEnabled("Calibre")).thenReturn(true);
MockMultipartFile pdfFile =
new MockMultipartFile(
"fileInput", "story.pdf", "application/pdf", "content".getBytes());
ConvertPdfToEpubRequest request = new ConvertPdfToEpubRequest();
request.setFileInput(pdfFile);
request.setDetectChapters(false);
request.setTargetDevice(TargetDevice.KINDLE_EINK_TEXT);
Path workingDir = Files.createTempDirectory("pdf-epub-options-test-");
when(tempFileManager.createTempDirectory()).thenReturn(workingDir);
doAnswer(
invocation -> {
Path dir = invocation.getArgument(0);
if (Files.exists(dir)) {
try (Stream<Path> paths = Files.walk(dir)) {
paths.sorted(Comparator.reverseOrder())
.forEach(
path -> {
try {
Files.deleteIfExists(path);
} catch (IOException ignored) {
}
});
}
}
return null;
})
.when(tempFileManager)
.deleteTempDirectory(any(Path.class));
try (MockedStatic<ProcessExecutor> pe = Mockito.mockStatic(ProcessExecutor.class);
MockedStatic<GeneralUtils> gu = Mockito.mockStatic(GeneralUtils.class)) {
ProcessExecutor executor = mock(ProcessExecutor.class);
pe.when(() -> ProcessExecutor.getInstance(Processes.CALIBRE)).thenReturn(executor);
ProcessExecutorResult execResult = mock(ProcessExecutorResult.class);
when(execResult.getRc()).thenReturn(0);
@SuppressWarnings("unchecked")
ArgumentCaptor<List<String>> commandCaptor = ArgumentCaptor.forClass(List.class);
Path expectedOutput = workingDir.resolve("story.epub");
when(executor.runCommandWithOutputHandling(
commandCaptor.capture(), eq(workingDir.toFile())))
.thenAnswer(
invocation -> {
Files.writeString(expectedOutput, "epub");
return execResult;
});
gu.when(() -> GeneralUtils.generateFilename("story.pdf", "_convertedToEPUB.epub"))
.thenReturn("story_convertedToEPUB.epub");
ResponseEntity<byte[]> response = controller.convertPdfToEpub(request);
List<String> command = commandCaptor.getValue();
assertTrue(command.stream().noneMatch(arg -> "--chapter".equals(arg)));
assertTrue(command.contains("--output-profile"));
assertTrue(command.contains(TargetDevice.KINDLE_EINK_TEXT.getCalibreProfile()));
assertTrue(command.contains("--filter-css"));
assertTrue(
command.contains(
"font-family,color,background-color,margin-left,margin-right"));
assertTrue(command.size() >= 9);
assertEquals(EPUB_MEDIA_TYPE, response.getHeaders().getContentType());
assertEquals(
"story_convertedToEPUB.epub",
response.getHeaders().getContentDisposition().getFilename());
assertEquals("epub", new String(response.getBody(), StandardCharsets.UTF_8));
} finally {
deleteIfExists(workingDir);
}
}
@Test
void convertPdfToAzw3_buildsCorrectCommandAndOutput() throws Exception {
when(endpointConfiguration.isGroupEnabled("Calibre")).thenReturn(true);
MockMultipartFile pdfFile =
new MockMultipartFile(
"fileInput", "book.pdf", "application/pdf", "content".getBytes());
ConvertPdfToEpubRequest request = new ConvertPdfToEpubRequest();
request.setFileInput(pdfFile);
request.setOutputFormat(OutputFormat.AZW3);
request.setDetectChapters(false);
request.setTargetDevice(TargetDevice.KINDLE_EINK_TEXT);
Path workingDir = Files.createTempDirectory("pdf-azw3-test-");
when(tempFileManager.createTempDirectory()).thenReturn(workingDir);
doAnswer(
invocation -> {
Path dir = invocation.getArgument(0);
if (Files.exists(dir)) {
try (Stream<Path> paths = Files.walk(dir)) {
paths.sorted(Comparator.reverseOrder())
.forEach(
path -> {
try {
Files.deleteIfExists(path);
} catch (IOException ignored) {
}
});
}
}
return null;
})
.when(tempFileManager)
.deleteTempDirectory(any(Path.class));
try (MockedStatic<ProcessExecutor> pe = Mockito.mockStatic(ProcessExecutor.class);
MockedStatic<GeneralUtils> gu = Mockito.mockStatic(GeneralUtils.class)) {
ProcessExecutor executor = mock(ProcessExecutor.class);
pe.when(() -> ProcessExecutor.getInstance(Processes.CALIBRE)).thenReturn(executor);
ProcessExecutorResult execResult = mock(ProcessExecutorResult.class);
when(execResult.getRc()).thenReturn(0);
@SuppressWarnings("unchecked")
ArgumentCaptor<List<String>> commandCaptor = ArgumentCaptor.forClass(List.class);
Path expectedInput = workingDir.resolve("book.pdf");
Path expectedOutput = workingDir.resolve("book.azw3");
when(executor.runCommandWithOutputHandling(
commandCaptor.capture(), eq(workingDir.toFile())))
.thenAnswer(
invocation -> {
Files.writeString(expectedOutput, "azw3");
return execResult;
});
gu.when(() -> GeneralUtils.generateFilename("book.pdf", "_convertedToAZW3.azw3"))
.thenReturn("book_convertedToAZW3.azw3");
ResponseEntity<byte[]> response = controller.convertPdfToEpub(request);
List<String> command = commandCaptor.getValue();
assertEquals("ebook-convert", command.get(0));
assertEquals(expectedInput.toString(), command.get(1));
assertEquals(expectedOutput.toString(), command.get(2));
assertTrue(command.contains("--enable-heuristics"));
assertTrue(command.contains("--insert-blank-line"));
assertTrue(command.contains("--filter-css"));
assertTrue(command.stream().noneMatch(arg -> "--chapter".equals(arg)));
assertTrue(command.contains("--output-profile"));
assertTrue(command.contains(TargetDevice.KINDLE_EINK_TEXT.getCalibreProfile()));
assertEquals(
MediaType.valueOf("application/vnd.amazon.ebook"),
response.getHeaders().getContentType());
assertEquals(
"book_convertedToAZW3.azw3",
response.getHeaders().getContentDisposition().getFilename());
assertEquals("azw3", new String(response.getBody(), StandardCharsets.UTF_8));
verify(tempFileManager).deleteTempDirectory(workingDir);
} finally {
deleteIfExists(workingDir);
}
}
private void deleteIfExists(Path directory) throws IOException {
if (directory == null || !Files.exists(directory)) {
return;
}
try (Stream<Path> paths = Files.walk(directory)) {
paths.sorted(Comparator.reverseOrder())
.forEach(
path -> {
try {
Files.deleteIfExists(path);
} catch (IOException ignored) {
}
});
}
}
}

View File

@ -110,6 +110,42 @@ class ConverterWebControllerTest {
}
}
@Nested
@DisplayName("PDF to EPUB endpoint tests")
class PdfToEpubTests {
@Test
@DisplayName("Should return 404 when endpoint disabled")
void shouldReturn404WhenDisabled() throws Exception {
try (MockedStatic<ApplicationContextProvider> acp =
org.mockito.Mockito.mockStatic(ApplicationContextProvider.class)) {
EndpointConfiguration endpointConfig = mock(EndpointConfiguration.class);
when(endpointConfig.isEndpointEnabled(eq("pdf-to-epub"))).thenReturn(false);
acp.when(() -> ApplicationContextProvider.getBean(EndpointConfiguration.class))
.thenReturn(endpointConfig);
mockMvc.perform(get("/pdf-to-epub")).andExpect(status().isNotFound());
}
}
@Test
@DisplayName("Should return OK when endpoint enabled")
void shouldReturnOkWhenEnabled() throws Exception {
try (MockedStatic<ApplicationContextProvider> acp =
org.mockito.Mockito.mockStatic(ApplicationContextProvider.class)) {
EndpointConfiguration endpointConfig = mock(EndpointConfiguration.class);
when(endpointConfig.isEndpointEnabled(eq("pdf-to-epub"))).thenReturn(true);
acp.when(() -> ApplicationContextProvider.getBean(EndpointConfiguration.class))
.thenReturn(endpointConfig);
mockMvc.perform(get("/pdf-to-epub"))
.andExpect(status().isOk())
.andExpect(view().name("convert/pdf-to-epub"))
.andExpect(model().attribute("currentPage", "pdf-to-epub"));
}
}
}
@Test
@DisplayName("Should handle pdf-to-img with default maxDPI=500")
void shouldHandlePdfToImgWithDefaultMaxDpi() throws Exception {