feat(conversion): switch PDF input engine to pdftohtml for improved performance and reduced dependencies (#5820)

This commit is contained in:
Balázs Szücs
2026-03-02 14:55:42 +01:00
committed by GitHub
parent cfe040485b
commit 48dd4154e9
4 changed files with 173 additions and 30 deletions

View File

@@ -54,6 +54,12 @@ public class ConvertPDFToEpubController {
command.add(inputPath.toString());
command.add(outputPath.toString());
// Use pdftohtml engine (poppler) for PDF input instead of calibre's Qt-based engine.
// This avoids the Qt WebEngine dependency for PDF parsing and uses the lighter
// poppler-utils pdftohtml binary which is already available in the container.
command.add("--pdf-engine");
command.add("pdftohtml");
// Golden defaults
command.add("--enable-heuristics");
command.add("--insert-blank-line");

View File

@@ -113,10 +113,12 @@ class ConvertPDFToEpubControllerTest {
ResponseEntity<byte[]> response = controller.convertPdfToEpub(request);
List<String> command = commandCaptor.getValue();
assertEquals(11, command.size());
assertEquals(13, command.size());
assertEquals("ebook-convert", command.get(0));
assertEquals(expectedInput.toString(), command.get(1));
assertEquals(expectedOutput.toString(), command.get(2));
assertTrue(command.contains("--pdf-engine"));
assertTrue(command.contains("pdftohtml"));
assertTrue(command.contains("--enable-heuristics"));
assertTrue(command.contains("--insert-blank-line"));
assertTrue(command.contains("--filter-css"));
@@ -206,11 +208,13 @@ class ConvertPDFToEpubControllerTest {
assertTrue(command.stream().noneMatch(arg -> "--chapter".equals(arg)));
assertTrue(command.contains("--output-profile"));
assertTrue(command.contains(TargetDevice.KINDLE_EINK_TEXT.getCalibreProfile()));
assertTrue(command.contains("--pdf-engine"));
assertTrue(command.contains("pdftohtml"));
assertTrue(command.contains("--filter-css"));
assertTrue(
command.contains(
"font-family,color,background-color,margin-left,margin-right"));
assertTrue(command.size() >= 9);
assertTrue(command.size() >= 11);
assertEquals(EPUB_MEDIA_TYPE, response.getHeaders().getContentType());
assertEquals(
@@ -289,6 +293,8 @@ class ConvertPDFToEpubControllerTest {
assertEquals("ebook-convert", command.get(0));
assertEquals(expectedInput.toString(), command.get(1));
assertEquals(expectedOutput.toString(), command.get(2));
assertTrue(command.contains("--pdf-engine"));
assertTrue(command.contains("pdftohtml"));
assertTrue(command.contains("--enable-heuristics"));
assertTrue(command.contains("--insert-blank-line"));
assertTrue(command.contains("--filter-css"));