mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-02-17 13:52:14 +01:00
feat(docker-runtime): unified Debian-based images, dynamic path resolution & enhanced UNO/LibreOffice handling (#4880)
# Description of Changes ### What was changed This PR introduces a major refinement to the Docker runtime, system path resolution, conversion tooling, and integration logic across the codebase. Key improvements include: - Migration of **Dockerfile**, **Dockerfile.fat** to a unified Debian-based environment. - Introduction of **RuntimePathConfig** enhancements to dynamically resolve: - `weasyprint`, `unoconvert`, `calibre`, `ocrmypdf`, `soffice` - Tesseract `tessdata` paths with Docker-aware defaults. - Support for **UNO server (unoserver/unoconvert)** as primary document converter with automatic fallback to `soffice`. - Isolation of Python environments for WeasyPrint and UNO tooling. - Updated controllers and services to correctly inject `RuntimePathConfig`. - Improved process execution logic in converters and OCR handling. - Major updates to `init.sh` and `init-without-ocr.sh`: - Unified environment initialization - Proper UID/GID remapping - Safer permissions handling - Automatic Tesseract path detection - Reliable startup of headless LibreOffice + Xvfb + UNO server - Full test suite updates: - Adaptation to new conversion paths - Mocking of UNO and LibreOffice commands - More robust Docker test logic - Updated example docker-compose files referencing GHCR test images. - Expanded configuration schema for new operations paths. ### Why the change was made These changes address long-standing issues around: - Inconsistent or missing binary paths between image variants. - Reduced reliability of document conversions (UNO vs. soffice). - Lack of uniform runtime initialization across Docker images. - Repetitive environment setup logic split across multiple scripts. - Fragile test scenarios tied to Alpine-based images. Switching to a unified Debian-based runtime significantly improves: - Compatibility with LibreOffice, Calibre, WebEngine and graphics stack. - UNO stability for document conversions. - Tesseract deterministic behavior. - Debuggability and reliability of CI/CD Docker-based tests. The improvements to `RuntimePathConfig` ensure all system binaries are fully configurable and correctly detected at runtime. --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Translations (if applicable) - [ ] I ran [`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [x] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details.
This commit is contained in:
@@ -41,6 +41,8 @@ public class ExternalAppDepConfig {
|
||||
private final String weasyprintPath;
|
||||
private final String unoconvPath;
|
||||
private final String calibrePath;
|
||||
private final String ocrMyPdfPath;
|
||||
private final String sOfficePath;
|
||||
|
||||
/**
|
||||
* Map of command(binary) -> affected groups (e.g. "gs" -> ["Ghostscript"]). Immutable to avoid
|
||||
@@ -58,11 +60,13 @@ public class ExternalAppDepConfig {
|
||||
this.weasyprintPath = runtimePathConfig.getWeasyPrintPath();
|
||||
this.unoconvPath = runtimePathConfig.getUnoConvertPath();
|
||||
this.calibrePath = runtimePathConfig.getCalibrePath();
|
||||
this.ocrMyPdfPath = runtimePathConfig.getOcrMyPdfPath();
|
||||
this.sOfficePath = runtimePathConfig.getSOfficePath();
|
||||
|
||||
Map<String, List<String>> tmp = new HashMap<>();
|
||||
tmp.put("gs", List.of("Ghostscript"));
|
||||
tmp.put("ocrmypdf", List.of("OCRmyPDF"));
|
||||
tmp.put("soffice", List.of("LibreOffice"));
|
||||
tmp.put(ocrMyPdfPath, List.of("OCRmyPDF"));
|
||||
tmp.put(sOfficePath, List.of("LibreOffice"));
|
||||
tmp.put(weasyprintPath, List.of("Weasyprint"));
|
||||
tmp.put("pdftohtml", List.of("Pdftohtml"));
|
||||
tmp.put(unoconvPath, List.of("Unoconvert"));
|
||||
|
||||
@@ -93,6 +93,7 @@ public class ConvertOfficeController {
|
||||
Files.copy(inputFile.getInputStream(), inputPath, StandardCopyOption.REPLACE_EXISTING);
|
||||
}
|
||||
|
||||
Path libreOfficeProfile = null;
|
||||
try {
|
||||
ProcessExecutorResult result;
|
||||
// Run Unoconvert command
|
||||
@@ -112,8 +113,10 @@ public class ConvertOfficeController {
|
||||
.runCommandWithOutputHandling(command);
|
||||
} // Run soffice command
|
||||
else {
|
||||
libreOfficeProfile = Files.createTempDirectory("libreoffice_profile_");
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add("soffice");
|
||||
command.add(runtimePathConfig.getSOfficePath());
|
||||
command.add("-env:UserInstallation=" + libreOfficeProfile.toUri().toString());
|
||||
command.add("--headless");
|
||||
command.add("--nologo");
|
||||
command.add("--convert-to");
|
||||
@@ -169,6 +172,9 @@ public class ConvertOfficeController {
|
||||
} catch (IOException e) {
|
||||
log.warn("Failed to delete temp input file: {}", inputPath, e);
|
||||
}
|
||||
if (libreOfficeProfile != null) {
|
||||
FileUtils.deleteQuietly(libreOfficeProfile.toFile());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
import stirling.software.common.configuration.RuntimePathConfig;
|
||||
import stirling.software.common.model.api.PDFFile;
|
||||
import stirling.software.common.util.PDFToFile;
|
||||
import stirling.software.common.util.TempFileManager;
|
||||
@@ -24,6 +25,7 @@ import stirling.software.common.util.TempFileManager;
|
||||
public class ConvertPDFToHtml {
|
||||
|
||||
private final TempFileManager tempFileManager;
|
||||
private final RuntimePathConfig runtimePathConfig;
|
||||
|
||||
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/html")
|
||||
@Operation(
|
||||
@@ -32,7 +34,7 @@ public class ConvertPDFToHtml {
|
||||
"This endpoint converts a PDF file to HTML format. Input:PDF Output:HTML Type:SISO")
|
||||
public ResponseEntity<byte[]> processPdfToHTML(@ModelAttribute PDFFile file) throws Exception {
|
||||
MultipartFile inputFile = file.getFileInput();
|
||||
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
|
||||
PDFToFile pdfToFile = new PDFToFile(tempFileManager, runtimePathConfig);
|
||||
return pdfToFile.processPdfToHtml(inputFile);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import lombok.RequiredArgsConstructor;
|
||||
import stirling.software.SPDF.model.api.converters.PdfToPresentationRequest;
|
||||
import stirling.software.SPDF.model.api.converters.PdfToTextOrRTFRequest;
|
||||
import stirling.software.SPDF.model.api.converters.PdfToWordRequest;
|
||||
import stirling.software.common.configuration.RuntimePathConfig;
|
||||
import stirling.software.common.model.api.PDFFile;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.GeneralUtils;
|
||||
@@ -35,6 +36,7 @@ public class ConvertPDFToOffice {
|
||||
|
||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
private final TempFileManager tempFileManager;
|
||||
private final RuntimePathConfig runtimePathConfig;
|
||||
|
||||
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/pdf/presentation")
|
||||
@Operation(
|
||||
@@ -47,7 +49,7 @@ public class ConvertPDFToOffice {
|
||||
throws IOException, InterruptedException {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
String outputFormat = request.getOutputFormat();
|
||||
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
|
||||
PDFToFile pdfToFile = new PDFToFile(tempFileManager, runtimePathConfig);
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import");
|
||||
}
|
||||
|
||||
@@ -72,7 +74,7 @@ public class ConvertPDFToOffice {
|
||||
MediaType.TEXT_PLAIN);
|
||||
}
|
||||
} else {
|
||||
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
|
||||
PDFToFile pdfToFile = new PDFToFile(tempFileManager, runtimePathConfig);
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
||||
}
|
||||
}
|
||||
@@ -87,7 +89,7 @@ public class ConvertPDFToOffice {
|
||||
throws IOException, InterruptedException {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
String outputFormat = request.getOutputFormat();
|
||||
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
|
||||
PDFToFile pdfToFile = new PDFToFile(tempFileManager, runtimePathConfig);
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
||||
}
|
||||
|
||||
@@ -100,7 +102,7 @@ public class ConvertPDFToOffice {
|
||||
public ResponseEntity<byte[]> processPdfToXML(@ModelAttribute PDFFile file) throws Exception {
|
||||
MultipartFile inputFile = file.getFileInput();
|
||||
|
||||
PDFToFile pdfToFile = new PDFToFile(tempFileManager);
|
||||
PDFToFile pdfToFile = new PDFToFile(tempFileManager, runtimePathConfig);
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,9 +71,11 @@ import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.SPDF.model.api.converters.PdfToPdfARequest;
|
||||
import stirling.software.common.configuration.RuntimePathConfig;
|
||||
import stirling.software.common.util.ExceptionUtils;
|
||||
import stirling.software.common.util.ProcessExecutor;
|
||||
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
|
||||
@@ -83,8 +85,11 @@ import stirling.software.common.util.WebResponseUtils;
|
||||
@RequestMapping("/api/v1/convert")
|
||||
@Slf4j
|
||||
@Tag(name = "Convert", description = "Convert APIs")
|
||||
@RequiredArgsConstructor
|
||||
public class ConvertPDFToPDFA {
|
||||
|
||||
private final RuntimePathConfig runtimePathConfig;
|
||||
|
||||
private static final String ICC_RESOURCE_PATH = "/icc/sRGB2014.icc";
|
||||
private static final int PDFA_COMPATIBILITY_POLICY = 1;
|
||||
|
||||
@@ -1043,26 +1048,33 @@ public class ConvertPDFToPDFA {
|
||||
? "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"2\"}}"
|
||||
: "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"1\"}}";
|
||||
|
||||
// Prepare LibreOffice command
|
||||
List<String> command =
|
||||
new ArrayList<>(
|
||||
Arrays.asList(
|
||||
"soffice",
|
||||
"--headless",
|
||||
"--nologo",
|
||||
"--convert-to",
|
||||
pdfFilter,
|
||||
"--outdir",
|
||||
tempOutputDir.toString(),
|
||||
tempInputFile.toString()));
|
||||
Path libreOfficeProfile = Files.createTempDirectory("libreoffice_profile_");
|
||||
try {
|
||||
// Prepare LibreOffice command
|
||||
List<String> command =
|
||||
new ArrayList<>(
|
||||
Arrays.asList(
|
||||
runtimePathConfig.getSOfficePath(),
|
||||
"-env:UserInstallation="
|
||||
+ libreOfficeProfile.toUri().toString(),
|
||||
"--headless",
|
||||
"--nologo",
|
||||
"--convert-to",
|
||||
pdfFilter,
|
||||
"--outdir",
|
||||
tempOutputDir.toString(),
|
||||
tempInputFile.toString()));
|
||||
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
||||
.runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
||||
.runCommandWithOutputHandling(command);
|
||||
|
||||
if (returnCode.getRc() != 0) {
|
||||
log.error("PDF/A conversion failed with return code: {}", returnCode.getRc());
|
||||
throw ExceptionUtils.createPdfaConversionFailedException();
|
||||
if (returnCode.getRc() != 0) {
|
||||
log.error("PDF/A conversion failed with return code: {}", returnCode.getRc());
|
||||
throw ExceptionUtils.createPdfaConversionFailedException();
|
||||
}
|
||||
} finally {
|
||||
FileUtils.deleteQuietly(libreOfficeProfile.toFile());
|
||||
}
|
||||
|
||||
// Get the output file
|
||||
|
||||
@@ -37,10 +37,17 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.SPDF.config.EndpointConfiguration;
|
||||
import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest;
|
||||
import stirling.software.common.configuration.RuntimePathConfig;
|
||||
import stirling.software.common.model.ApplicationProperties;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.*;
|
||||
import stirling.software.common.util.ExceptionUtils;
|
||||
import stirling.software.common.util.GeneralUtils;
|
||||
import stirling.software.common.util.ProcessExecutor;
|
||||
import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.common.util.TempDirectory;
|
||||
import stirling.software.common.util.TempFile;
|
||||
import stirling.software.common.util.TempFileManager;
|
||||
import stirling.software.common.util.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/misc")
|
||||
@@ -53,6 +60,7 @@ public class OCRController {
|
||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
private final TempFileManager tempFileManager;
|
||||
private final EndpointConfiguration endpointConfiguration;
|
||||
private final RuntimePathConfig runtimePathConfig;
|
||||
|
||||
private boolean isOcrMyPdfEnabled() {
|
||||
return endpointConfiguration.isGroupEnabled("OCRmyPDF");
|
||||
@@ -64,7 +72,7 @@ public class OCRController {
|
||||
|
||||
/** Gets the list of available Tesseract languages from the tessdata directory */
|
||||
public List<String> getAvailableTesseractLanguages() {
|
||||
String tessdataDir = applicationProperties.getSystem().getTessdataDir();
|
||||
String tessdataDir = runtimePathConfig.getTessDataPath();
|
||||
File[] files = new File(tessdataDir).listFiles();
|
||||
if (files == null) {
|
||||
return Collections.emptyList();
|
||||
@@ -80,9 +88,10 @@ public class OCRController {
|
||||
@Operation(
|
||||
summary = "Process a PDF file with OCR",
|
||||
description =
|
||||
"This endpoint processes a PDF file using OCR (Optical Character Recognition). "
|
||||
+ "Users can specify languages, sidecar, deskew, clean, cleanFinal, ocrType, ocrRenderType, and removeImagesAfter options. "
|
||||
+ "Uses OCRmyPDF if available, falls back to Tesseract. Input:PDF Output:PDF Type:SI-Conditional")
|
||||
"This endpoint processes a PDF file using OCR (Optical Character Recognition). Users can"
|
||||
+ " specify languages, sidecar, deskew, clean, cleanFinal, ocrType, ocrRenderType,"
|
||||
+ " and removeImagesAfter options. Uses OCRmyPDF if available, falls back to"
|
||||
+ " Tesseract. Input:PDF Output:PDF Type:SI-Conditional")
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(
|
||||
@ModelAttribute ProcessPdfWithOcrRequest request)
|
||||
throws IOException, InterruptedException {
|
||||
@@ -217,7 +226,7 @@ public class OCRController {
|
||||
List<String> command =
|
||||
new ArrayList<>(
|
||||
Arrays.asList(
|
||||
"ocrmypdf",
|
||||
runtimePathConfig.getOcrMyPdfPath(),
|
||||
"--verbose",
|
||||
"2",
|
||||
"--output-type",
|
||||
|
||||
@@ -14,16 +14,20 @@ import io.swagger.v3.oas.annotations.Hidden;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.common.configuration.RuntimePathConfig;
|
||||
import stirling.software.common.model.ApplicationProperties;
|
||||
import stirling.software.common.util.CheckProgramInstall;
|
||||
|
||||
@Controller
|
||||
@Tag(name = "Misc", description = "Miscellaneous APIs")
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class OtherWebController {
|
||||
|
||||
private final ApplicationProperties applicationProperties;
|
||||
private final RuntimePathConfig runtimePathConfig;
|
||||
|
||||
@GetMapping("/compress-pdf")
|
||||
@Hidden
|
||||
@@ -120,7 +124,7 @@ public class OtherWebController {
|
||||
}
|
||||
|
||||
public List<String> getAvailableTesseractLanguages() {
|
||||
String tessdataDir = applicationProperties.getSystem().getTessdataDir();
|
||||
String tessdataDir = runtimePathConfig.getTessDataPath();
|
||||
File[] files = new File(tessdataDir).listFiles();
|
||||
if (files == null) {
|
||||
return Collections.emptyList();
|
||||
|
||||
@@ -115,7 +115,7 @@ system:
|
||||
showUpdate: false # see when a new update is available
|
||||
showUpdateOnlyAdmin: false # only admins can see when a new update is available, depending on showUpdate it must be set to 'true'
|
||||
customHTMLFiles: false # enable to have files placed in /customFiles/templates override the existing template HTML files
|
||||
tessdataDir: /usr/share/tessdata # path to the directory containing the Tessdata files. This setting is relevant for Windows systems. For Windows users, this path should be adjusted to point to the appropriate directory where the Tessdata files are stored.
|
||||
tessdataDir: "" # path to the directory containing the Tessdata files. This setting is relevant for Windows systems. For Windows users, this path should be adjusted to point to the appropriate directory where the Tessdata files are stored.
|
||||
enableAnalytics: null # Master toggle for analytics: set to 'true' to enable all analytics, 'false' to disable all analytics, or leave as 'null' to prompt admin on first launch
|
||||
enablePosthog: null # Enable PostHog analytics (open-source product analytics): set to 'true' to enable, 'false' to disable, or 'null' to enable by default when analytics is enabled
|
||||
enableScarf: null # Enable Scarf pixel: set to 'true' to enable, 'false' to disable, or 'null' to enable by default when analytics is enabled
|
||||
@@ -150,6 +150,8 @@ system:
|
||||
weasyprint: '' # Defaults to /opt/venv/bin/weasyprint
|
||||
unoconvert: '' # Defaults to /opt/venv/bin/unoconvert
|
||||
calibre: '' # Defaults to /usr/bin/ebook-convert
|
||||
ocrmypdf: '' # Defaults to /usr/bin/ocrmypdf
|
||||
soffice: '' # Defaults to /usr/bin/soffice
|
||||
fileUploadLimit: '' # Defaults to "". No limit when string is empty. Set a number, between 0 and 999, followed by one of the following strings to set a limit. "KB", "MB", "GB".
|
||||
tempFileManagement:
|
||||
baseTmpDir: '' # Defaults to java.io.tmpdir/stirling-pdf
|
||||
|
||||
@@ -32,6 +32,8 @@ class ExternalAppDepConfigTest {
|
||||
void setUp() {
|
||||
when(runtimePathConfig.getWeasyPrintPath()).thenReturn("/custom/weasyprint");
|
||||
when(runtimePathConfig.getUnoConvertPath()).thenReturn("/custom/unoconvert");
|
||||
when(runtimePathConfig.getCalibrePath()).thenReturn("/custom/calibre");
|
||||
when(runtimePathConfig.getOcrMyPdfPath()).thenReturn("/custom/ocrmypdf");
|
||||
lenient()
|
||||
.when(endpointConfiguration.getEndpointsForGroup(anyString()))
|
||||
.thenReturn(Set.of());
|
||||
@@ -45,6 +47,8 @@ class ExternalAppDepConfigTest {
|
||||
|
||||
assertEquals(List.of("Weasyprint"), mapping.get("/custom/weasyprint"));
|
||||
assertEquals(List.of("Unoconvert"), mapping.get("/custom/unoconvert"));
|
||||
assertEquals(List.of("Calibre"), mapping.get("/custom/calibre"));
|
||||
assertEquals(List.of("OCRmyPDF"), mapping.get("/custom/ocrmypdf"));
|
||||
assertEquals(List.of("Ghostscript"), mapping.get("gs"));
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user