refactor(core): parallel, timeout-safe external dependency probes with version gating + tests (#4640)

# Description of Changes

**What was changed**
- Rewrote `ExternalAppDepConfig` to:
- Run dependency probes in parallel with per-call timeouts to avoid
startup hangs on broken PATHs.
- Support both Unix (`command -v`) and Windows (`where`) lookups in a
single codepath with a fallback `--version` probe.
- Centralize version extraction via a regex (`(\d+(?:\.\d+){0,2})`) and
add a small `Version` comparator (major.minor.patch).
- Enforce a minimum WeasyPrint version (`>= 58.0`), disabling affected
group(s) if the requirement is not met.
  - Improve Python/OpenCV handling:
- Resolve interpreter (`python3` → `python`) and check `import cv2`;
disable OpenCV group if unavailable.
- Disable both Python and OpenCV groups when no interpreter is present.
- Keep the command→group mapping immutable and include
runtime-configured paths for WeasyPrint/Unoconvert.
- Improve feature name formatting derived from endpoints (e.g.,
`pdf-to-html` → `PDF To Html`, `img-extract` → `Image Extract`).
- Ensure thread pool shutdown and emit a consolidated disabled-endpoints
summary at the end of checks.
- Added `ExternalAppDepConfigTest` (JUnit + Mockito) to cover:
  - Mapping includes runtime paths and core commands.
- Endpoint-to-feature formatting and capitalization rules (`pdf` →
`PDF`, mixed case normalization).
  - WeasyPrint command detection (`/custom/weasyprint`, name contains).
  - Version comparison edge cases (e.g., `58`, `57.9.2`, `58.beta`).

**Why the change was made**
- Prevents startup stalls caused by long-running or broken shell
lookups.
- Unifies platform-specific logic and de-duplicates probing/formatting
across the codebase.
- Introduces explicit version gating for WeasyPrint to ensure feature
reliability and predictable behavior.
- Makes dependency handling more observable (structured logs) and
maintainable (immutable mappings, focused helpers).
- Improves resilience of Python/OpenCV-dependent features across diverse
environments.

---

## Checklist

### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Ludy 2025-10-30 00:30:10 +01:00 committed by GitHub
parent fdc8fab545
commit e4cf8d800b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 463 additions and 125 deletions

View File

@ -1,9 +1,15 @@
package stirling.software.SPDF.config;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.*;
import java.util.concurrent.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.springframework.context.annotation.Configuration;
@ -15,151 +21,302 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.configuration.RuntimePathConfig;
import stirling.software.common.util.RegexPatternUtils;
/**
* Dependency checker that - runs probes in parallel with timeouts (prevents hanging on broken
* PATHs) - supports Windows+Unix in a single place - de-duplicates logic for version extraction &
* command availability - keeps group <-> command mapping and feature formatting tidy & immutable
*/
@Configuration
@Slf4j
public class ExternalAppDepConfig {
private static final Duration DEFAULT_TIMEOUT = Duration.ofSeconds(5);
private static final Pattern VERSION_PATTERN = Pattern.compile("(\\d+(?:\\.\\d+){0,2})");
private final EndpointConfiguration endpointConfiguration;
private final boolean isWindows =
System.getProperty("os.name").toLowerCase(Locale.ROOT).contains("windows");
private final String weasyprintPath;
private final String unoconvPath;
/**
* Map of command(binary) -> affected groups (e.g. "gs" -> ["Ghostscript"]). Immutable to avoid
* accidental mutations.
*/
private final Map<String, List<String>> commandToGroupMapping;
private final ExecutorService pool =
Executors.newFixedThreadPool(
Math.max(2, Runtime.getRuntime().availableProcessors() / 2));
public ExternalAppDepConfig(
EndpointConfiguration endpointConfiguration, RuntimePathConfig runtimePathConfig) {
this.endpointConfiguration = endpointConfiguration;
weasyprintPath = runtimePathConfig.getWeasyPrintPath();
unoconvPath = runtimePathConfig.getUnoConvertPath();
this.weasyprintPath = runtimePathConfig.getWeasyPrintPath();
this.unoconvPath = runtimePathConfig.getUnoConvertPath();
commandToGroupMapping =
new HashMap<>() {
{
put("gs", List.of("Ghostscript"));
put("ocrmypdf", List.of("OCRmyPDF"));
put("soffice", List.of("LibreOffice"));
put(weasyprintPath, List.of("Weasyprint"));
put("pdftohtml", List.of("Pdftohtml"));
put(unoconvPath, List.of("Unoconvert"));
put("qpdf", List.of("qpdf"));
put("tesseract", List.of("tesseract"));
put("rar", List.of("rar")); // Required for real CBR output
}
};
}
private boolean isCommandAvailable(String command) {
try {
ProcessBuilder processBuilder = new ProcessBuilder();
if (System.getProperty("os.name").toLowerCase().contains("windows")) {
processBuilder.command("where", command);
} else {
processBuilder.command("which", command);
}
Process process = processBuilder.start();
int exitCode = process.waitFor();
return exitCode == 0;
} catch (Exception e) {
log.debug("Error checking for command {}: {}", command, e.getMessage());
return false;
}
}
private List<String> getAffectedFeatures(String group) {
return endpointConfiguration.getEndpointsForGroup(group).stream()
.map(endpoint -> formatEndpointAsFeature(endpoint))
.toList();
}
private String formatEndpointAsFeature(String endpoint) {
// First replace common terms
String feature = endpoint.replace("-", " ").replace("pdf", "PDF").replace("img", "image");
// Split into words and capitalize each word
return Arrays.stream(RegexPatternUtils.getInstance().getWordSplitPattern().split(feature))
.map(word -> capitalizeWord(word))
.collect(Collectors.joining(" "));
}
private String capitalizeWord(String word) {
if (word.isEmpty()) {
return word;
}
if ("pdf".equalsIgnoreCase(word)) {
return "PDF";
}
return word.substring(0, 1).toUpperCase() + word.substring(1).toLowerCase();
}
private void checkDependencyAndDisableGroup(String command) {
boolean isAvailable = isCommandAvailable(command);
if (!isAvailable) {
List<String> affectedGroups = commandToGroupMapping.get(command);
if (affectedGroups != null) {
for (String group : affectedGroups) {
List<String> affectedFeatures = getAffectedFeatures(group);
endpointConfiguration.disableGroup(group);
log.warn(
"Missing dependency: {} - Disabling group: {} (Affected features: {})",
command,
group,
!affectedFeatures.isEmpty()
? String.join(", ", affectedFeatures)
: "unknown");
}
}
}
Map<String, List<String>> tmp = new HashMap<>();
tmp.put("gs", List.of("Ghostscript"));
tmp.put("ocrmypdf", List.of("OCRmyPDF"));
tmp.put("soffice", List.of("LibreOffice"));
tmp.put(weasyprintPath, List.of("Weasyprint"));
tmp.put("pdftohtml", List.of("Pdftohtml"));
tmp.put(unoconvPath, List.of("Unoconvert"));
tmp.put("qpdf", List.of("qpdf"));
tmp.put("tesseract", List.of("tesseract"));
tmp.put("rar", List.of("rar"));
this.commandToGroupMapping = Collections.unmodifiableMap(tmp);
}
@PostConstruct
public void checkDependencies() {
// Check core dependencies
checkDependencyAndDisableGroup("gs");
checkDependencyAndDisableGroup("ocrmypdf");
checkDependencyAndDisableGroup("tesseract");
checkDependencyAndDisableGroup("soffice");
checkDependencyAndDisableGroup("qpdf");
checkDependencyAndDisableGroup(weasyprintPath);
checkDependencyAndDisableGroup("pdftohtml");
checkDependencyAndDisableGroup(unoconvPath);
checkDependencyAndDisableGroup("rar");
// Special handling for Python/OpenCV dependencies
boolean pythonAvailable = isCommandAvailable("python3") || isCommandAvailable("python");
if (!pythonAvailable) {
List<String> pythonFeatures = getAffectedFeatures("Python");
try {
// core checks in parallel
List<Callable<Void>> tasks =
commandToGroupMapping.keySet().stream()
.<Callable<Void>>map(
cmd ->
() -> {
checkDependencyAndDisableGroup(cmd);
return null;
})
.collect(Collectors.toList());
invokeAllWithTimeout(tasks, DEFAULT_TIMEOUT.plusSeconds(3));
// Python / OpenCV special handling
checkPythonAndOpenCV();
} finally {
endpointConfiguration.logDisabledEndpointsSummary();
pool.shutdown();
}
}
private void checkDependencyAndDisableGroup(String command) {
boolean available = isCommandAvailable(command);
if (!available) {
List<String> affectedGroups = commandToGroupMapping.get(command);
if (affectedGroups == null || affectedGroups.isEmpty()) return;
for (String group : affectedGroups) {
List<String> affectedFeatures = getAffectedFeatures(group);
endpointConfiguration.disableGroup(group);
log.warn(
"Missing dependency: {} - Disabling group: {} (Affected features: {})",
command,
group,
affectedFeatures.isEmpty()
? "unknown"
: String.join(", ", affectedFeatures));
}
return;
}
// Extra: enforce minimum WeasyPrint version if command matches
if (isWeasyprint(command)) {
Optional<String> version = getVersionSafe(command, "--version");
version.ifPresentOrElse(
v -> {
Version installed = new Version(v);
// https://www.courtbouillon.org/blog/00040-weasyprint-58/
Version required = new Version("58.0");
if (installed.compareTo(required) < 0) {
List<String> affectedGroups =
commandToGroupMapping.getOrDefault(
command, List.of("Weasyprint"));
for (String group : affectedGroups) {
endpointConfiguration.disableGroup(group);
}
log.warn(
"WeasyPrint version {} is below required {} - disabling"
+ " group(s): {}",
installed,
required,
String.join(", ", affectedGroups));
} else {
log.info("WeasyPrint {} meets minimum {}", installed, required);
}
},
() ->
log.warn(
"WeasyPrint version could not be determined ({} --version)",
command));
}
}
private boolean isWeasyprint(String command) {
return Objects.equals(command, weasyprintPath)
|| command.toLowerCase(Locale.ROOT).contains("weasyprint");
}
private List<String> getAffectedFeatures(String group) {
List<String> endpoints = new ArrayList<>(endpointConfiguration.getEndpointsForGroup(group));
return endpoints.stream().map(this::formatEndpointAsFeature).toList();
}
private String formatEndpointAsFeature(String endpoint) {
String feature = endpoint.replace("-", " ").replace("pdf", "PDF").replace("img", "image");
return Arrays.stream(RegexPatternUtils.getInstance().getWordSplitPattern().split(feature))
.map(this::capitalizeWord)
.collect(Collectors.joining(" "));
}
private String capitalizeWord(String word) {
if (word == null || word.isEmpty()) return word;
if ("pdf".equalsIgnoreCase(word)) return "PDF";
return word.substring(0, 1).toUpperCase(Locale.ROOT)
+ word.substring(1).toLowerCase(Locale.ROOT);
}
private void checkPythonAndOpenCV() {
String python = findFirstAvailable(List.of("python3", "python")).orElse(null);
if (python == null) {
disablePythonAndOpenCV("Python interpreter not found on PATH");
return;
}
// Check OpenCV import
int ec = runAndWait(List.of(python, "-c", "import cv2"), DEFAULT_TIMEOUT).exitCode();
if (ec != 0) {
List<String> openCVFeatures = getAffectedFeatures("OpenCV");
endpointConfiguration.disableGroup("Python");
endpointConfiguration.disableGroup("OpenCV");
log.warn(
"Missing dependency: Python - Disabling Python features: {} and OpenCV features: {}",
String.join(", ", pythonFeatures),
"OpenCV not available in Python - Disabling OpenCV features: {}",
String.join(", ", openCVFeatures));
} else {
// If Python is available, check for OpenCV
try {
ProcessBuilder processBuilder = new ProcessBuilder();
if (System.getProperty("os.name").toLowerCase().contains("windows")) {
processBuilder.command("python", "-c", "import cv2");
} else {
processBuilder.command("python3", "-c", "import cv2");
}
}
private void disablePythonAndOpenCV(String reason) {
List<String> pythonFeatures = getAffectedFeatures("Python");
List<String> openCVFeatures = getAffectedFeatures("OpenCV");
endpointConfiguration.disableGroup("Python");
endpointConfiguration.disableGroup("OpenCV");
log.warn(
"Missing dependency: Python (reason: {}) - Disabling Python features: {} and OpenCV"
+ " features: {}",
reason,
String.join(", ", pythonFeatures),
String.join(", ", openCVFeatures));
}
private Optional<String> findFirstAvailable(List<String> commands) {
for (String c : commands) {
if (isCommandAvailable(c)) return Optional.of(c);
}
return Optional.empty();
}
private boolean isCommandAvailable(String command) {
// First try OS-native lookup
List<String> lookup = isWindows ? List.of("where", command) : List.of("which", command);
ProbeResult res = runAndWait(lookup, DEFAULT_TIMEOUT);
if (res.exitCode() == 0) return true;
// Fallback: try `--version` when helpful (covers py-launcher shims on Windows etc.)
ProbeResult ver = runAndWait(List.of(command, "--version"), DEFAULT_TIMEOUT);
return ver.exitCode() == 0;
}
private Optional<String> getVersionSafe(String command, String arg) {
try {
ProbeResult res = runAndWait(List.of(command, arg), DEFAULT_TIMEOUT);
if (res.exitCode() != 0) return Optional.empty();
String text = res.combined();
Matcher m = VERSION_PATTERN.matcher(text);
return m.find() ? Optional.of(m.group(1)) : Optional.empty();
} catch (Exception e) {
return Optional.empty();
}
}
private void invokeAllWithTimeout(List<Callable<Void>> tasks, Duration timeout) {
try {
List<Future<Void>> futures =
pool.invokeAll(tasks, timeout.toMillis(), TimeUnit.MILLISECONDS);
for (Future<Void> f : futures) {
try {
f.get();
} catch (Exception ignored) {
}
Process process = processBuilder.start();
int exitCode = process.waitFor();
if (exitCode != 0) {
List<String> openCVFeatures = getAffectedFeatures("OpenCV");
endpointConfiguration.disableGroup("OpenCV");
log.warn(
"OpenCV not available in Python - Disabling OpenCV features: {}",
String.join(", ", openCVFeatures));
}
} catch (Exception e) {
List<String> openCVFeatures = getAffectedFeatures("OpenCV");
endpointConfiguration.disableGroup("OpenCV");
log.warn(
"Error checking OpenCV: {} - Disabling OpenCV features: {}",
e.getMessage(),
String.join(", ", openCVFeatures));
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
private ProbeResult runAndWait(List<String> cmd, Duration timeout) {
ProcessBuilder pb = new ProcessBuilder(cmd);
try {
Process p = pb.start();
boolean finished = p.waitFor(timeout.toMillis(), TimeUnit.MILLISECONDS);
if (!finished) {
p.destroyForcibly();
return new ProbeResult(124, "", "timeout");
}
String out = readStream(p.getInputStream());
String err = readStream(p.getErrorStream());
int ec = p.exitValue();
return new ProbeResult(ec, out, err);
} catch (IOException | InterruptedException e) {
if (e instanceof InterruptedException) Thread.currentThread().interrupt();
return new ProbeResult(127, "", String.valueOf(e.getMessage()));
}
}
private static String readStream(InputStream in) throws IOException {
StringBuilder sb = new StringBuilder();
try (BufferedReader br =
new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))) {
String line;
while ((line = br.readLine()) != null) {
if (sb.length() > 0) sb.append('\n');
sb.append(line);
}
}
endpointConfiguration.logDisabledEndpointsSummary();
return sb.toString().trim();
}
private record ProbeResult(int exitCode, String stdout, String stderr) {
String combined() {
return (stdout == null ? "" : stdout) + "\n" + (stderr == null ? "" : stderr);
}
}
/** Simple numeric version comparator (major.minor.patch). */
static class Version implements Comparable<Version> {
private final int[] parts;
Version(String ver) {
String[] tokens = ver.split("\\.");
parts = new int[3];
for (int i = 0; i < 3; i++) {
if (i < tokens.length) {
try {
parts[i] = Integer.parseInt(tokens[i]);
} catch (NumberFormatException e) {
parts[i] = 0;
}
} else {
parts[i] = 0;
}
}
}
@Override
public int compareTo(Version o) {
for (int i = 0; i < 3; i++) {
int a = parts[i];
int b = o.parts[i];
if (a != b) return Integer.compare(a, b);
}
return 0;
}
@Override
public String toString() {
return parts[0] + "." + parts[1] + "." + parts[2];
}
}
}

View File

@ -0,0 +1,181 @@
package stirling.software.SPDF.config;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.lenient;
import static org.mockito.Mockito.when;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import stirling.software.common.configuration.RuntimePathConfig;
@ExtendWith(MockitoExtension.class)
class ExternalAppDepConfigTest {
@Mock private EndpointConfiguration endpointConfiguration;
@Mock private RuntimePathConfig runtimePathConfig;
private ExternalAppDepConfig config;
@BeforeEach
void setUp() {
when(runtimePathConfig.getWeasyPrintPath()).thenReturn("/custom/weasyprint");
when(runtimePathConfig.getUnoConvertPath()).thenReturn("/custom/unoconvert");
lenient()
.when(endpointConfiguration.getEndpointsForGroup(anyString()))
.thenReturn(Set.of());
config = new ExternalAppDepConfig(endpointConfiguration, runtimePathConfig);
}
@Test
void commandToGroupMappingIncludesRuntimePaths() throws Exception {
Map<String, List<String>> mapping = getCommandToGroupMapping();
assertEquals(List.of("Weasyprint"), mapping.get("/custom/weasyprint"));
assertEquals(List.of("Unoconvert"), mapping.get("/custom/unoconvert"));
assertEquals(List.of("Ghostscript"), mapping.get("gs"));
}
@Test
void getAffectedFeaturesFormatsEndpoints() throws Exception {
Set<String> endpoints = new LinkedHashSet<>(List.of("pdf-to-html", "img-extract"));
when(endpointConfiguration.getEndpointsForGroup("Ghostscript")).thenReturn(endpoints);
@SuppressWarnings("unchecked")
List<String> features =
(List<String>) invokePrivateMethod(config, "getAffectedFeatures", "Ghostscript");
assertEquals(List.of("PDF To Html", "Image Extract"), features);
}
@Test
void formatEndpointAsFeatureConvertsNames() throws Exception {
String formatted =
(String) invokePrivateMethod(config, "formatEndpointAsFeature", "pdf-img-extract");
assertEquals("PDF Image Extract", formatted);
}
@Test
void capitalizeWordHandlesSpecialCases() throws Exception {
String pdf = (String) invokePrivateMethod(config, "capitalizeWord", "pdf");
String mixed = (String) invokePrivateMethod(config, "capitalizeWord", "tEsT");
String empty = (String) invokePrivateMethod(config, "capitalizeWord", "");
assertEquals("PDF", pdf);
assertEquals("Test", mixed);
assertEquals("", empty);
}
@Test
void isWeasyprintMatchesConfiguredCommands() throws Exception {
boolean directMatch =
(boolean) invokePrivateMethod(config, "isWeasyprint", "/custom/weasyprint");
boolean nameContains =
(boolean) invokePrivateMethod(config, "isWeasyprint", "/usr/bin/weasyprint-cli");
boolean differentCommand = (boolean) invokePrivateMethod(config, "isWeasyprint", "qpdf");
assertTrue(directMatch);
assertTrue(nameContains);
assertFalse(differentCommand);
}
@Test
void versionComparisonHandlesDifferentFormats() {
ExternalAppDepConfig.Version required = new ExternalAppDepConfig.Version("58");
ExternalAppDepConfig.Version installed = new ExternalAppDepConfig.Version("57.9.2");
ExternalAppDepConfig.Version beta = new ExternalAppDepConfig.Version("58.beta");
assertTrue(installed.compareTo(required) < 0);
assertEquals(0, beta.compareTo(required));
assertEquals("58.0.0", beta.toString());
}
@SuppressWarnings("unchecked")
private Map<String, List<String>> getCommandToGroupMapping() throws Exception {
Field field = ExternalAppDepConfig.class.getDeclaredField("commandToGroupMapping");
field.setAccessible(true);
return (Map<String, List<String>>) field.get(config);
}
private Object invokePrivateMethod(Object target, String methodName, Object... args)
throws Exception {
Method method = findMatchingMethod(methodName, args);
method.setAccessible(true);
return method.invoke(target, args);
}
private Method findMatchingMethod(String methodName, Object[] args)
throws NoSuchMethodException {
Method[] methods = ExternalAppDepConfig.class.getDeclaredMethods();
for (Method candidate : methods) {
if (!candidate.getName().equals(methodName)
|| candidate.getParameterCount() != args.length) {
continue;
}
Class<?>[] parameterTypes = candidate.getParameterTypes();
boolean matches = true;
for (int i = 0; i < parameterTypes.length; i++) {
if (!isParameterCompatible(parameterTypes[i], args[i])) {
matches = false;
break;
}
}
if (matches) {
return candidate;
}
}
throw new NoSuchMethodException(
"No matching method found for " + methodName + " with provided arguments");
}
private boolean isParameterCompatible(Class<?> parameterType, Object arg) {
if (arg == null) {
return !parameterType.isPrimitive();
}
Class<?> argumentClass = arg.getClass();
if (parameterType.isPrimitive()) {
return getWrapperType(parameterType).isAssignableFrom(argumentClass);
}
return parameterType.isAssignableFrom(argumentClass);
}
private Class<?> getWrapperType(Class<?> primitiveType) {
if (primitiveType == boolean.class) {
return Boolean.class;
} else if (primitiveType == byte.class) {
return Byte.class;
} else if (primitiveType == short.class) {
return Short.class;
} else if (primitiveType == int.class) {
return Integer.class;
} else if (primitiveType == long.class) {
return Long.class;
} else if (primitiveType == float.class) {
return Float.class;
} else if (primitiveType == double.class) {
return Double.class;
} else if (primitiveType == char.class) {
return Character.class;
}
throw new IllegalArgumentException("Type is not primitive: " + primitiveType);
}
}