feat(admin): add tessdata language management for OCR and download support (#5519)

# Description of Changes

### What was changed
- Added new admin-only API endpoints to:
  - List installed tessdata OCR languages
- Fetch available tessdata languages from the official Tesseract
repository
- Download selected tessdata language files directly into the configured
tessdata directory
- Implemented server-side validation, safe language name handling, and
directory writability checks.
- Extended the Admin Advanced Settings UI to:
  - Display installed tessdata languages
  - Show available remote languages not yet installed
- Allow selecting and downloading additional languages via a
multi-select UI
- Gracefully fall back to manual download links when the tessdata
directory is not writable
- Added new i18n strings for all related UI states (loading, success,
error, permission warnings).

### Why the change was made
- Managing OCR languages previously required manual filesystem
interaction.
- This change improves usability for administrators by enabling in-app
management of tessdata languages while maintaining security constraints.
- The writable directory check and manual fallback ensure compatibility
with restricted or containerized environments.


<img width="1282" height="832" alt="image"
src="https://github.com/user-attachments/assets/aa958730-0ffb-4fd6-9af8-87c527a476e4"
/>


---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Translations (if applicable)

- [ ] I ran
[`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Ludy
2026-01-21 23:10:47 +01:00
committed by GitHub
parent 23f872823d
commit 1436821a3a
4 changed files with 922 additions and 2 deletions

View File

@@ -0,0 +1,302 @@
package stirling.software.proprietary.security.controller.api;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.*;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.swagger.v3.oas.annotations.Operation;
import lombok.Data;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import stirling.software.common.configuration.RuntimePathConfig;
@Slf4j
@RestController
@RequestMapping("/api/v1/ui-data")
@RequiredArgsConstructor
public class UIDataTessdataController {
private final RuntimePathConfig runtimePathConfig;
private static volatile List<String> cachedRemoteTessdata = null;
private static volatile long cachedRemoteTessdataExpiry = 0L;
private static final long REMOTE_TESSDATA_TTL_MS = 10 * 60 * 1000; // 10 minutes
@GetMapping("/tessdata-languages")
@PreAuthorize("hasRole('ROLE_ADMIN')")
@Operation(summary = "List installed and remotely available tessdata languages")
public ResponseEntity<TessdataLanguagesResponse> getTessdataLanguages() {
TessdataLanguagesResponse response = new TessdataLanguagesResponse();
response.setInstalled(getAvailableTesseractLanguages());
response.setAvailable(getRemoteTessdataLanguages());
response.setWritable(isWritableDirectory(Paths.get(runtimePathConfig.getTessDataPath())));
return ResponseEntity.ok(response);
}
@PostMapping("/tessdata/download")
@PreAuthorize("hasRole('ROLE_ADMIN')")
@Operation(summary = "Download selected tessdata languages from the official repository")
public ResponseEntity<Map<String, Object>> downloadTessdataLanguages(
@RequestBody TessdataDownloadRequest request) {
if (request.getLanguages() == null || request.getLanguages().isEmpty()) {
return ResponseEntity.badRequest()
.body(Map.of("message", "No languages provided for download"));
}
Path tessdataDir = Paths.get(runtimePathConfig.getTessDataPath());
try {
Files.createDirectories(tessdataDir);
} catch (IOException e) {
log.error("Failed to create tessdata directory {}", tessdataDir, e);
return ResponseEntity.internalServerError()
.body(Map.of("message", "Failed to prepare tessdata directory"));
}
if (!isWritableDirectory(tessdataDir)) {
return ResponseEntity.status(HttpStatus.FORBIDDEN)
.body(Map.of("message", tessdataDir.toString()));
}
List<String> downloaded = new ArrayList<>();
List<String> failed = new ArrayList<>();
List<String> remoteLanguages = getRemoteTessdataLanguages();
Set<String> remoteSet =
remoteLanguages == null ? Collections.emptySet() : new HashSet<>(remoteLanguages);
for (String language : request.getLanguages()) {
if (language == null || language.isBlank()) {
failed.add(language);
continue;
}
String safeLang = language.replaceAll("[^A-Za-z0-9_+\\-]", "");
if (!safeLang.equals(language)) {
failed.add(language);
continue;
}
if (!remoteSet.isEmpty() && !remoteSet.contains(safeLang)) {
log.warn("Requested tessdata language {} not available in upstream list", safeLang);
failed.add(language);
continue;
}
String downloadUrl =
"https://raw.githubusercontent.com/tesseract-ocr/tessdata/main/"
+ safeLang
+ ".traineddata";
Path baseRealPath;
try {
baseRealPath = tessdataDir.toRealPath();
} catch (IOException e) {
log.warn("Failed to resolve tessdata directory {}", tessdataDir, e);
failed.add(language);
continue;
}
Path targetFile = baseRealPath.resolve(safeLang + ".traineddata").normalize();
if (!targetFile.startsWith(baseRealPath)) {
log.warn("Blocked tessdata download path traversal attempt for {}", language);
failed.add(language);
continue;
}
if (downloadLanguageFile(safeLang, targetFile, downloadUrl)) {
downloaded.add(safeLang);
} else {
failed.add(language);
}
}
Map<String, Object> response =
Map.of(
"downloaded", downloaded,
"failed", failed,
"tessdataDir", tessdataDir.toString());
if (!downloaded.isEmpty() && failed.isEmpty()) {
return ResponseEntity.ok(response);
} else if (!downloaded.isEmpty()) {
return ResponseEntity.status(207).body(response); // Multi-Status for partial success
} else {
return ResponseEntity.status(HttpStatus.BAD_GATEWAY).body(response);
}
}
/** Download the language file, returning true on success. Extracted for testability. */
protected boolean downloadLanguageFile(String safeLang, Path targetFile, String downloadUrl) {
HttpURLConnection connection = null;
try {
URL url = new URL(downloadUrl);
connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
connection.setRequestProperty("User-Agent", "Stirling-PDF-App");
connection.setRequestProperty("Accept", "application/octet-stream");
connection.setConnectTimeout(5000);
connection.setReadTimeout(30000);
int status = connection.getResponseCode();
if (status != HttpURLConnection.HTTP_OK) {
log.warn(
"Tessdata language {} not downloadable. HTTP {} from {}",
safeLang,
status,
downloadUrl);
return false;
}
try (InputStream is = connection.getInputStream()) {
Files.copy(is, targetFile, StandardCopyOption.REPLACE_EXISTING);
log.info("Downloaded tessdata language {} to {}", safeLang, targetFile);
return true;
}
} catch (IOException e) {
log.warn("Failed to download tessdata language {} from {}", safeLang, downloadUrl, e);
return false;
} finally {
if (connection != null) {
connection.disconnect();
}
}
}
/** Fetch list of available remote tessdata languages (with simple caching). */
protected List<String> getRemoteTessdataLanguages() {
long now = System.currentTimeMillis();
List<String> localCache;
long localExpiry;
synchronized (UIDataTessdataController.class) {
localCache = cachedRemoteTessdata;
localExpiry = cachedRemoteTessdataExpiry;
}
if (localCache != null && now < localExpiry) {
return localCache;
}
String apiUrl = "https://api.github.com/repos/tesseract-ocr/tessdata/contents";
HttpURLConnection connection = null;
try {
URL url = new URL(apiUrl);
connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
connection.setRequestProperty("User-Agent", "Stirling-PDF-App");
connection.setRequestProperty("Accept", "application/vnd.github+json");
connection.setConnectTimeout(5000);
connection.setReadTimeout(30000);
int status = connection.getResponseCode();
if (status != HttpURLConnection.HTTP_OK) {
String remaining = connection.getHeaderField("X-RateLimit-Remaining");
String reset = connection.getHeaderField("X-RateLimit-Reset");
if (status == HttpURLConnection.HTTP_FORBIDDEN && remaining != null) {
log.warn(
"GitHub tessdata listing rate limited. Remaining={}, resetEpochSeconds={}",
remaining,
reset);
} else {
log.warn("GitHub tessdata listing returned HTTP {}", status);
}
return cachedRemoteTessdata != null
? cachedRemoteTessdata
: Collections.emptyList();
}
try (InputStream is = connection.getInputStream()) {
ObjectMapper mapper = new ObjectMapper();
List<Map<String, Object>> items =
mapper.readValue(is, new TypeReference<List<Map<String, Object>>>() {});
List<String> languages =
items.stream()
.map(item -> (String) item.get("name"))
.filter(Objects::nonNull)
.filter(name -> name.endsWith(".traineddata"))
.map(name -> name.replace(".traineddata", ""))
.filter(lang -> !"osd".equalsIgnoreCase(lang))
.sorted()
.toList();
synchronized (UIDataTessdataController.class) {
cachedRemoteTessdata = languages;
cachedRemoteTessdataExpiry =
System.currentTimeMillis() + REMOTE_TESSDATA_TTL_MS;
}
return languages;
}
} catch (IOException e) {
log.warn("Failed to fetch tessdata languages from GitHub", e);
return cachedRemoteTessdata != null ? cachedRemoteTessdata : Collections.emptyList();
} finally {
if (connection != null) {
connection.disconnect();
}
}
}
@Data
private static class TessdataDownloadRequest {
private List<String> languages;
}
@Data
private static class TessdataLanguagesResponse {
private List<String> installed;
private List<String> available;
private boolean writable;
}
private List<String> getAvailableTesseractLanguages() {
String tessdataDir = runtimePathConfig.getTessDataPath();
java.io.File[] files = new java.io.File(tessdataDir).listFiles();
if (files == null) {
return Collections.emptyList();
}
return Arrays.stream(files)
.filter(file -> file.getName().endsWith(".traineddata"))
.map(file -> file.getName().replace(".traineddata", ""))
.filter(lang -> !"osd".equalsIgnoreCase(lang))
.sorted()
.toList();
}
protected boolean isWritableDirectory(Path dir) {
try {
Files.createDirectories(dir);
} catch (IOException e) {
log.warn("Tessdata directory cannot be created: {}", dir, e);
return false;
}
if (!Files.isWritable(dir)) {
log.warn("Tessdata directory not writable (ACL check failed): {}", dir);
return false;
}
try {
Path probe = Files.createTempFile(dir, "tessdata-write-test", ".tmp");
Files.deleteIfExists(probe);
return true;
} catch (IOException e) {
log.warn("Tessdata directory not writable (temp file creation failed): {}", dir);
return false;
}
}
}

View File

@@ -0,0 +1,373 @@
package stirling.software.proprietary.security.controller.api;
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get;
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.Mockito;
import org.springframework.http.MediaType;
import org.springframework.test.web.servlet.MockMvc;
import org.springframework.test.web.servlet.setup.MockMvcBuilders;
import stirling.software.common.configuration.RuntimePathConfig;
class UIDataTessdataControllerTest {
@Test
void downloadTessdataLanguages_withEmptyList_returnsBadRequest() throws Exception {
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn("ignored/path");
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected List<String> getRemoteTessdataLanguages() {
return List.of("eng");
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(
post("/api/v1/ui-data/tessdata/download")
.contentType(MediaType.APPLICATION_JSON)
.content("{\"languages\":[]}"))
.andExpect(status().isBadRequest())
.andExpect(jsonPath("$.message").value("No languages provided for download"));
}
@Test
void downloadTessdataLanguages_blocksPathTraversal(@TempDir Path tempDir) throws Exception {
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn(tempDir.toString());
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected List<String> getRemoteTessdataLanguages() {
return List.of("eng");
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(
post("/api/v1/ui-data/tessdata/download")
.contentType(MediaType.APPLICATION_JSON)
.content("{\"languages\":[\"../evil\"]}"))
.andExpect(status().isBadGateway())
.andExpect(jsonPath("$.downloaded").isArray())
.andExpect(jsonPath("$.downloaded").isEmpty())
.andExpect(jsonPath("$.failed[0]").value("../evil"));
// Ensure no file was written outside the tessdata directory
Path escapedPath = tempDir.resolve("../evil.traineddata").normalize();
assert Files.notExists(escapedPath) : "Traversal path should not be written";
}
@Test
void downloadTessdataLanguages_rejectsUnknownLanguage(@TempDir Path tempDir) throws Exception {
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn(tempDir.toString());
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected List<String> getRemoteTessdataLanguages() {
return List.of("eng");
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(
post("/api/v1/ui-data/tessdata/download")
.contentType(MediaType.APPLICATION_JSON)
.content("{\"languages\":[\"fra\"]}"))
.andExpect(status().isBadGateway())
.andExpect(jsonPath("$.downloaded").isEmpty())
.andExpect(jsonPath("$.failed[0]").value("fra"));
}
@Test
void downloadTessdataLanguages_successAndFailureMixed(@TempDir Path tempDir) throws Exception {
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn(tempDir.toString());
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected List<String> getRemoteTessdataLanguages() {
return List.of("eng", "fra");
}
@Override
protected boolean downloadLanguageFile(
String safeLang, Path targetFile, String downloadUrl) {
if ("eng".equals(safeLang)) {
try {
Files.writeString(targetFile, "dummy");
return true;
} catch (Exception e) {
return false;
}
}
return false;
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(
post("/api/v1/ui-data/tessdata/download")
.contentType(MediaType.APPLICATION_JSON)
.content("{\"languages\":[\"eng\",\"fra\"]}"))
.andExpect(status().isMultiStatus())
.andExpect(jsonPath("$.downloaded[0]").value("eng"))
.andExpect(jsonPath("$.failed[0]").value("fra"));
}
@Test
void downloadTessdataLanguages_handlesInvalidSanitizedLanguage(@TempDir Path tempDir)
throws Exception {
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn(tempDir.toString());
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected List<String> getRemoteTessdataLanguages() {
return List.of("eng");
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(
post("/api/v1/ui-data/tessdata/download")
.contentType(MediaType.APPLICATION_JSON)
.content("{\"languages\":[\"eng/\"]}"))
.andExpect(status().isBadGateway())
.andExpect(jsonPath("$.downloaded").isEmpty())
.andExpect(jsonPath("$.failed[0]").value("eng/"));
}
@Test
void downloadTessdataLanguages_returnsForbiddenWhenNotWritable(@TempDir Path tempDir)
throws Exception {
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn(tempDir.toString());
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected boolean isWritableDirectory(Path dir) {
return false;
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(
post("/api/v1/ui-data/tessdata/download")
.contentType(MediaType.APPLICATION_JSON)
.content("{\"languages\":[\"eng\"]}"))
.andExpect(status().isForbidden());
}
@Test
void downloadTessdataLanguages_handlesNetworkFailure(@TempDir Path tempDir) throws Exception {
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn(tempDir.toString());
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected List<String> getRemoteTessdataLanguages() {
return List.of("eng");
}
@Override
protected boolean downloadLanguageFile(
String safeLang, Path targetFile, String downloadUrl) {
return false; // simulate network failure
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(
post("/api/v1/ui-data/tessdata/download")
.contentType(MediaType.APPLICATION_JSON)
.content("{\"languages\":[\"eng\"]}"))
.andExpect(status().isBadGateway())
.andExpect(jsonPath("$.downloaded").isArray())
.andExpect(jsonPath("$.downloaded").isEmpty())
.andExpect(jsonPath("$.failed[0]").value("eng"));
}
@Test
void downloadTessdataLanguages_allSuccess(@TempDir Path tempDir) throws Exception {
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn(tempDir.toString());
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected List<String> getRemoteTessdataLanguages() {
return List.of("eng");
}
@Override
protected boolean downloadLanguageFile(
String safeLang, Path targetFile, String downloadUrl) {
try {
Files.writeString(targetFile, "dummy");
return true;
} catch (IOException e) {
return false;
}
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(
post("/api/v1/ui-data/tessdata/download")
.contentType(MediaType.APPLICATION_JSON)
.content("{\"languages\":[\"eng\"]}"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.downloaded[0]").value("eng"))
.andExpect(jsonPath("$.failed").isArray())
.andExpect(jsonPath("$.failed").isEmpty());
}
@Test
void tessdataLanguages_returnsInstalledAvailableAndWritable(@TempDir Path tempDir)
throws Exception {
Files.createFile(tempDir.resolve("eng.traineddata"));
Files.createFile(tempDir.resolve("deu.traineddata"));
Files.createFile(tempDir.resolve("osd.traineddata")); // should be filtered
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn(tempDir.toString());
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected List<String> getRemoteTessdataLanguages() {
return List.of("eng", "fra");
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(get("/api/v1/ui-data/tessdata-languages"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.installed[0]").value("deu"))
.andExpect(jsonPath("$.installed[1]").value("eng"))
.andExpect(jsonPath("$.available[0]").value("eng"))
.andExpect(jsonPath("$.available[1]").value("fra"))
.andExpect(jsonPath("$.writable").value(true));
}
@Test
void tessdataLanguages_emptyDirectory(@TempDir Path tempDir) throws Exception {
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn(tempDir.toString());
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected List<String> getRemoteTessdataLanguages() {
return List.of("eng");
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(get("/api/v1/ui-data/tessdata-languages"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.installed").isArray())
.andExpect(jsonPath("$.installed").isEmpty())
.andExpect(jsonPath("$.available[0]").value("eng"))
.andExpect(jsonPath("$.writable").value(true));
}
@Test
void tessdataLanguages_nonTraineddataFilesAreIgnored(@TempDir Path tempDir) throws Exception {
Files.createFile(tempDir.resolve("notes.txt"));
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn(tempDir.toString());
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected List<String> getRemoteTessdataLanguages() {
return List.of("eng");
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(get("/api/v1/ui-data/tessdata-languages"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.installed").isArray())
.andExpect(jsonPath("$.installed").isEmpty())
.andExpect(jsonPath("$.writable").value(true));
}
@Test
void tessdataLanguages_handlesNonExistentDirectory(@TempDir Path tempDir) throws Exception {
Path missingDir = tempDir.resolve("missing");
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn(missingDir.toString());
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected List<String> getRemoteTessdataLanguages() {
return List.of("eng");
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(get("/api/v1/ui-data/tessdata-languages"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.installed").isArray())
.andExpect(jsonPath("$.installed").isEmpty())
.andExpect(jsonPath("$.writable").value(true));
}
@Test
void tessdataLanguages_marksNotWritable(@TempDir Path tempDir) throws Exception {
RuntimePathConfig runtimePathConfig = Mockito.mock(RuntimePathConfig.class);
Mockito.when(runtimePathConfig.getTessDataPath()).thenReturn(tempDir.toString());
UIDataTessdataController controller =
new UIDataTessdataController(runtimePathConfig) {
@Override
protected boolean isWritableDirectory(Path dir) {
return false;
}
@Override
protected List<String> getRemoteTessdataLanguages() {
return List.of("eng");
}
};
MockMvc mvc = MockMvcBuilders.standaloneSetup(controller).build();
mvc.perform(get("/api/v1/ui-data/tessdata-languages"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.writable").value(false));
}
}

View File

@@ -4836,6 +4836,26 @@ description = "Maximum DPI for image processing (0 = unlimited)"
[admin.settings.advanced.tessdataDir]
label = "Tessdata Directory"
description = "Path to the tessdata directory for OCR language files"
downloadMissingTitle = "No language selected"
downloadMissingBody = "Please select at least one language to download."
downloadSuccessTitle = "Languages downloaded"
downloadSuccessBody = "The selected tessdata languages have been saved."
downloadErrorTitle = "Download Failed"
loadingLanguages = "Loading installed tessdata languages..."
installedLanguages = "Installed tessdata languages"
noLanguages = "No tessdata languages found in the configured directory."
downloadLabel = "Download additional tessdata languages"
downloadPlaceholder = "Select languages"
downloadNothingFound = "No additional languages found"
permissionNotice = "The tessdata path is not writable. Downloads will be opened in the browser; please save the .traineddata files manually into the tessdata folder."
manualLinks = "Manual downloads: click the links and place the files into the tessdata folder."
downloadButton = "Download selected languages"
downloadInvalidTitle = "Invalid selection"
downloadInvalidBody = "Some selected languages are not available to download. Please refresh and choose from the list."
downloadErrorNetwork = "Download failed due to a network error. Please check your connection and try again."
downloadErrorServer = "The server encountered an error while downloading tessdata languages. Please try again later."
downloadErrorPermission = "Tessdata directory is not writable: {{message}}. Please choose a writable directory (e.g. under the application data folder) or adjust permissions."
downloadErrorGeneric = "Download failed: {{message}}. Please try again later."
[admin.settings.advanced.disableSanitize]
label = "Disable HTML Sanitization"

View File

@@ -1,6 +1,6 @@
import { useEffect } from 'react';
import { useEffect, useMemo, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { NumberInput, Switch, Button, Stack, Paper, Text, Loader, Group, Accordion, TextInput } from '@mantine/core';
import { NumberInput, Switch, Button, Stack, Paper, Text, Loader, Group, Accordion, TextInput, MultiSelect } from '@mantine/core';
import { alert } from '@app/components/toast';
import RestartConfirmationModal from '@app/components/shared/config/RestartConfirmationModal';
import { useRestartServer } from '@app/components/shared/config/useRestartServer';
@@ -9,6 +9,7 @@ import PendingBadge from '@app/components/shared/config/PendingBadge';
import apiClient from '@app/services/apiClient';
import { useLoginRequired } from '@app/hooks/useLoginRequired';
import LoginRequiredBanner from '@app/components/shared/config/LoginRequiredBanner';
import { Z_INDEX_OVER_CONFIG_MODAL } from '@app/styles/zIndex';
interface AdvancedSettingsData {
enableAlphaFunctionality?: boolean;
@@ -173,6 +174,166 @@ export default function AdminAdvancedSection() {
}
}, [loginEnabled]);
const [tessdataLanguages, setTessdataLanguages] = useState<string[]>([]);
const [remoteTessdataLanguages, setRemoteTessdataLanguages] = useState<string[]>([]);
const [tessdataDirWritable, setTessdataDirWritable] = useState<boolean>(true);
const [manualDownloadLinks, setManualDownloadLinks] = useState<string[]>([]);
const [tessdataLanguagesLoading, setTessdataLanguagesLoading] = useState(false);
const [downloadLanguagesLoading, setDownloadLanguagesLoading] = useState(false);
const [selectedDownloadLanguages, setSelectedDownloadLanguages] = useState<string[]>([]);
useEffect(() => {
if (!loginEnabled) return;
const fetchTessdataLanguages = async () => {
setTessdataLanguagesLoading(true);
try {
const { data } = await apiClient.get<{ installed: string[]; available: string[]; writable?: boolean }>('/api/v1/ui-data/tessdata-languages', {
suppressErrorToast: true
});
const installed = data.installed || [];
const available = data.available || [];
setTessdataLanguages(installed);
setRemoteTessdataLanguages(available.filter((lang) => !installed.includes(lang)));
setTessdataDirWritable(data.writable !== false);
setManualDownloadLinks([]);
} catch (error) {
console.error('[AdminAdvancedSection] Failed to load tessdata languages', error);
setTessdataLanguages([]);
setRemoteTessdataLanguages([]);
setTessdataDirWritable(true);
setManualDownloadLinks([]);
} finally {
setTessdataLanguagesLoading(false);
}
};
fetchTessdataLanguages();
}, [loginEnabled]);
const refreshTessdataWithRetry = async (retries = 3, delayMs = 400) => {
for (let attempt = 0; attempt < retries; attempt++) {
try {
const { data } = await apiClient.get<{ installed: string[]; available: string[]; writable?: boolean }>(
'/api/v1/ui-data/tessdata-languages',
{ suppressErrorToast: true }
);
const installed = data.installed || [];
const available = data.available || [];
setTessdataLanguages(installed);
setRemoteTessdataLanguages(available.filter((lang) => !installed.includes(lang)));
setTessdataDirWritable(data.writable !== false);
setManualDownloadLinks([]);
return;
} catch (err) {
if (attempt === retries - 1) {
console.error('[AdminAdvancedSection] Retry refresh tessdata failed', err);
return;
}
await new Promise((resolve) => setTimeout(resolve, delayMs));
}
}
};
const safeLangRegex = useMemo(() => new RegExp('[^A-Za-z0-9_+\\-]', 'g'), []);
const handleDownloadTessdataLanguages = async () => {
if (!loginEnabled) return;
if (selectedDownloadLanguages.length === 0) {
alert({
alertType: 'warning',
title: t('admin.settings.advanced.tessdataDir.downloadMissingTitle', 'No language selected'),
body: t('admin.settings.advanced.tessdataDir.downloadMissingBody', 'Please select at least one language to download.'),
expandable: false,
});
return;
}
// Ensure selection is a subset of remote languages to prevent invalid requests
const remoteSet = new Set(remoteTessdataLanguages);
const invalidSelection = selectedDownloadLanguages.filter((lang) => !remoteSet.has(lang));
if (invalidSelection.length > 0) {
alert({
alertType: 'warning',
title: t('admin.settings.advanced.tessdataDir.downloadInvalidTitle', 'Invalid selection'),
body: t(
'admin.settings.advanced.tessdataDir.downloadInvalidBody',
'Some selected languages are not available to download. Please refresh and choose from the list.'
),
expandable: false,
});
return;
}
setDownloadLanguagesLoading(true);
try {
await apiClient.post('/api/v1/ui-data/tessdata/download', { languages: selectedDownloadLanguages }, {
suppressErrorToast: true
});
alert({
alertType: 'success',
title: t('admin.settings.advanced.tessdataDir.downloadSuccessTitle', 'Languages downloaded'),
body: t('admin.settings.advanced.tessdataDir.downloadSuccessBody', 'The selected tessdata languages have been saved.'),
});
// Refresh installed list with retry in case filesystem sync is delayed
await refreshTessdataWithRetry();
setSelectedDownloadLanguages([]);
setManualDownloadLinks([]);
} catch (error) {
console.error('[AdminAdvancedSection] Download tessdata languages failed', error);
const response = (error as any)?.response;
const status = response?.status;
const serverMessage = response?.data?.message;
if (status === 403) {
console.warn('[AdminAdvancedSection] Tessdata directory not writable, falling back to manual download:', serverMessage);
setTessdataDirWritable(false);
setManualDownloadLinks(
selectedDownloadLanguages.map((lang) => {
const safeLang = lang.replace(safeLangRegex, '');
return `https://raw.githubusercontent.com/tesseract-ocr/tessdata/main/${safeLang}.traineddata`;
})
);
const message = t('admin.settings.advanced.tessdataDir.downloadErrorPermission', {
defaultValue:
'Tessdata directory is not writable: {{message}}. Please choose a writable directory (e.g. under the application data folder) or adjust permissions.',
message: serverMessage ?? settings.tessdataDir ?? 'unknown location',
});
alert({
alertType: 'error',
title: t('admin.settings.advanced.tessdataDir.downloadErrorTitle', 'Download Failed'),
body: message,
expandable: false,
});
return;
}
let message: string;
if (!response) {
message = t(
'admin.settings.advanced.tessdataDir.downloadErrorNetwork',
'Download failed due to a network error. Please check your connection and try again.'
);
} else if (status >= 500) {
message = t(
'admin.settings.advanced.tessdataDir.downloadErrorServer',
'The server encountered an error while downloading tessdata languages. Please try again later.'
);
} else {
message = t('admin.settings.advanced.tessdataDir.downloadErrorGeneric', {
defaultValue: 'Download failed: {{message}}. Please try again later.',
message: serverMessage ?? settings.tessdataDir ?? 'unknown location',
});
}
alert({
alertType: 'error',
title: t('admin.settings.advanced.tessdataDir.downloadErrorTitle', 'Download Failed'),
body: message,
expandable: false,
});
} finally {
setDownloadLanguagesLoading(false);
}
};
const handleSave = async () => {
if (!validateLoginEnabled()) {
return;
@@ -301,6 +462,7 @@ export default function AdminAdvancedSection() {
/>
</div>
{/* Tessdata Directory */}
<div>
<TextInput
label={
@@ -315,6 +477,69 @@ export default function AdminAdvancedSection() {
placeholder="/usr/share/tessdata"
disabled={!loginEnabled}
/>
{tessdataLanguagesLoading ? (
<Group gap="xs" mt={6}>
<Loader size="xs" />
<Text size="xs">
{t('admin.settings.advanced.tessdataDir.loadingLanguages', 'Loading installed tessdata languages...')}
</Text>
</Group>
) : (
<Text size="xs" c="dimmed" mt={6}>
{tessdataLanguages.length > 0
? `${t('admin.settings.advanced.tessdataDir.installedLanguages', 'Installed tessdata languages')}: ${tessdataLanguages.join(', ')}`
: t('admin.settings.advanced.tessdataDir.noLanguages', 'No tessdata languages found in the configured directory')}
</Text>
)}
<Stack gap="xs" mt="sm">
<MultiSelect
label={t('admin.settings.advanced.tessdataDir.downloadLabel', 'Download additional tessdata languages')}
placeholder={t('admin.settings.advanced.tessdataDir.downloadPlaceholder', 'Select languages')}
data={remoteTessdataLanguages.map((lang) => ({ value: lang, label: lang }))}
searchable
disabled={!loginEnabled || remoteTessdataLanguages.length === 0}
value={selectedDownloadLanguages}
onChange={setSelectedDownloadLanguages}
comboboxProps={{ withinPortal: true, zIndex: Z_INDEX_OVER_CONFIG_MODAL }}
nothingFoundMessage={t('admin.settings.advanced.tessdataDir.downloadNothingFound', 'No additional languages found')}
/>
{!tessdataDirWritable && (
<Text size="xs" c="yellow.4">
{t(
'admin.settings.advanced.tessdataDir.permissionNotice',
'The tessdata path is not writable. Downloads will be opened in the browser; please save the .traineddata files manually into the tessdata folder.'
)}
</Text>
)}
{!tessdataDirWritable && manualDownloadLinks.length > 0 && (
<Stack gap="xs">
<Text size="xs" c="dimmed">
{t(
'admin.settings.advanced.tessdataDir.manualLinks',
'Manual downloads: click the links and place the files into the tessdata folder.'
)}
</Text>
<Stack gap={4}>
{manualDownloadLinks.map((link) => (
<a key={link} href={link} target="_blank" rel="noreferrer" style={{ fontSize: '12px' }}>
{link}
</a>
))}
</Stack>
</Stack>
)}
<Group justify="flex-end">
<Button
size="xs"
variant="light"
onClick={handleDownloadTessdataLanguages}
loading={downloadLanguagesLoading}
disabled={!loginEnabled || remoteTessdataLanguages.length === 0}
>
{t('admin.settings.advanced.tessdataDir.downloadButton', 'Download selected languages')}
</Button>
</Group>
</Stack>
</div>
</Stack>
</Paper>