mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2024-12-31 00:08:08 +01:00
update docs
This commit is contained in:
parent
748392cd29
commit
ab7610f72c
@ -8,7 +8,7 @@ The paths have changed for the tessdata locations on new Docker images. Please u
|
|||||||
|
|
||||||
## How does the OCR Work
|
## How does the OCR Work
|
||||||
|
|
||||||
Stirling-PDF uses [qpdf](https://github.com/qpdf/qpdf), which in turn uses Tesseract for its text recognition. All credit goes to them for this awesome work!
|
Stirling-PDF uses Tesseract for its text recognition. All credit goes to them for this awesome work!
|
||||||
|
|
||||||
## Language Packs
|
## Language Packs
|
||||||
|
|
||||||
@ -52,8 +52,6 @@ Add the following to your existing Docker run command:
|
|||||||
|
|
||||||
### Non-Docker Setup
|
### Non-Docker Setup
|
||||||
|
|
||||||
If you are not using Docker, you need to install the OCR components, including the `qpdf` app. You can see the [qpdf install guide](https://qpdf.readthedocs.io/en/latest/installation.html).
|
|
||||||
|
|
||||||
For Debian-based systems, install languages with this command:
|
For Debian-based systems, install languages with this command:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@ -83,8 +81,7 @@ rpm -qa | grep tesseract-langpack | sed 's/tesseract-langpack-//g'
|
|||||||
|
|
||||||
For Windows:
|
For Windows:
|
||||||
|
|
||||||
Ensure qpdf in installed with
|
You must ensure tesseract is installed
|
||||||
``pip install qpdf``
|
|
||||||
|
|
||||||
Additional languages must be downloaded manually:
|
Additional languages must be downloaded manually:
|
||||||
Download desired .traineddata files from tessdata or tessdata_fast
|
Download desired .traineddata files from tessdata or tessdata_fast
|
||||||
|
@ -146,7 +146,6 @@ The easiest method is to use the language packs provided by your repositories. S
|
|||||||
|
|
||||||
1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need.
|
1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need.
|
||||||
2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tessdata`
|
2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tessdata`
|
||||||
3. Please view [qpdf install guide](https://qpdf.readthedocs.io/en/latest/installation.html) for more info.
|
|
||||||
|
|
||||||
**IMPORTANT:** DO NOT REMOVE EXISTING `eng.traineddata`, IT'S REQUIRED.
|
**IMPORTANT:** DO NOT REMOVE EXISTING `eng.traineddata`, IT'S REQUIRED.
|
||||||
|
|
||||||
|
@ -42,6 +42,8 @@ public class ExternalAppDepConfig {
|
|||||||
put("pdftohtml", List.of("Pdftohtml"));
|
put("pdftohtml", List.of("Pdftohtml"));
|
||||||
put("unoconv", List.of("Unoconv"));
|
put("unoconv", List.of("Unoconv"));
|
||||||
put("qpdf", List.of("qpdf"));
|
put("qpdf", List.of("qpdf"));
|
||||||
|
put("tesseract", List.of("tesseract"));
|
||||||
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -96,6 +98,7 @@ public class ExternalAppDepConfig {
|
|||||||
public void checkDependencies() {
|
public void checkDependencies() {
|
||||||
|
|
||||||
// Check core dependencies
|
// Check core dependencies
|
||||||
|
checkDependencyAndDisableGroup("tesseract");
|
||||||
checkDependencyAndDisableGroup("soffice");
|
checkDependencyAndDisableGroup("soffice");
|
||||||
checkDependencyAndDisableGroup("qpdf");
|
checkDependencyAndDisableGroup("qpdf");
|
||||||
checkDependencyAndDisableGroup("weasyprint");
|
checkDependencyAndDisableGroup("weasyprint");
|
||||||
|
Loading…
Reference in New Issue
Block a user