mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
enhance OCR language selection and improve redaction options with dynamic translations
Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
3ac7f0df4c
commit
e2ac7edad9
@ -1,5 +1,10 @@
|
|||||||
package stirling.software.SPDF.controller.web;
|
package stirling.software.SPDF.controller.web;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.springframework.stereotype.Controller;
|
import org.springframework.stereotype.Controller;
|
||||||
import org.springframework.ui.Model;
|
import org.springframework.ui.Model;
|
||||||
import org.springframework.web.bind.annotation.GetMapping;
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
@ -7,14 +12,36 @@ import org.springframework.web.bind.annotation.GetMapping;
|
|||||||
import io.swagger.v3.oas.annotations.Hidden;
|
import io.swagger.v3.oas.annotations.Hidden;
|
||||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
|
import stirling.software.common.model.ApplicationProperties;
|
||||||
|
|
||||||
@Controller
|
@Controller
|
||||||
@Tag(name = "Security", description = "Security APIs")
|
@Tag(name = "Security", description = "Security APIs")
|
||||||
|
@RequiredArgsConstructor
|
||||||
public class SecurityWebController {
|
public class SecurityWebController {
|
||||||
|
|
||||||
|
private final ApplicationProperties applicationProperties;
|
||||||
|
|
||||||
|
private List<String> getAvailableTesseractLanguages() {
|
||||||
|
String tessdataDir = applicationProperties.getSystem().getTessdataDir();
|
||||||
|
File[] files = new File(tessdataDir).listFiles();
|
||||||
|
if (files == null) {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
return Arrays.stream(files)
|
||||||
|
.filter(file -> file.getName().endsWith(".traineddata"))
|
||||||
|
.map(file -> file.getName().replace(".traineddata", ""))
|
||||||
|
.filter(lang -> !"osd".equalsIgnoreCase(lang))
|
||||||
|
.sorted()
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
@GetMapping("/auto-redact")
|
@GetMapping("/auto-redact")
|
||||||
@Hidden
|
@Hidden
|
||||||
public String autoRedactForm(Model model) {
|
public String autoRedactForm(Model model) {
|
||||||
model.addAttribute("currentPage", "auto-redact");
|
model.addAttribute("currentPage", "auto-redact");
|
||||||
|
model.addAttribute("languages", getAvailableTesseractLanguages());
|
||||||
return "security/auto-redact";
|
return "security/auto-redact";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
package stirling.software.SPDF.model.api.security;
|
package stirling.software.SPDF.model.api.security;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import io.swagger.v3.oas.annotations.media.Schema;
|
import io.swagger.v3.oas.annotations.media.Schema;
|
||||||
|
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
@ -53,4 +55,10 @@ public class RedactPdfRequest extends PDFFile {
|
|||||||
allowableValues = {"moderate", "visual", "aggressive"},
|
allowableValues = {"moderate", "visual", "aggressive"},
|
||||||
requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||||
private String redactionMode;
|
private String redactionMode;
|
||||||
|
|
||||||
|
@Schema(
|
||||||
|
description =
|
||||||
|
"List of OCR languages to use for restoration when needed (Tesseract codes like 'eng', 'deu')",
|
||||||
|
requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||||
|
private List<String> languages;
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,12 +1,12 @@
|
|||||||
package stirling.software.SPDF.utils.text;
|
package stirling.software.SPDF.utils.text;
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||||
|
|
||||||
import lombok.experimental.UtilityClass;
|
import lombok.experimental.UtilityClass;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@UtilityClass
|
@UtilityClass
|
||||||
public class TextEncodingHelper {
|
public class TextEncodingHelper {
|
||||||
@ -516,5 +516,4 @@ public class TextEncodingHelper {
|
|||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -921,6 +921,15 @@ autoRedact.wholeWordSearchLabel=Whole Word Search
|
|||||||
autoRedact.customPaddingLabel=Custom Extra Padding
|
autoRedact.customPaddingLabel=Custom Extra Padding
|
||||||
autoRedact.convertPDFToImageLabel=Convert PDF to PDF-Image (Used to remove text behind the box)
|
autoRedact.convertPDFToImageLabel=Convert PDF to PDF-Image (Used to remove text behind the box)
|
||||||
autoRedact.submitButton=Submit
|
autoRedact.submitButton=Submit
|
||||||
|
autoRedact.pdfImageLabel=PDF Image
|
||||||
|
autoRedact.redactionStyleLabel=Redaction Style
|
||||||
|
autoRedact.pdfImageDescription=For maximum security, uses an image-based method to ensure text is unrecoverable. May slightly affect document quality.
|
||||||
|
autoRedact.visualRedactionLabel=Visual
|
||||||
|
autoRedact.visualRedactionDescription=Converts to image with visual redactions for maximum security.
|
||||||
|
autoRedact.deleteTextLabel=Delete Text
|
||||||
|
autoRedact.deleteTextDescription=Removes the text completely. This may alter the original layout or leave a gap.
|
||||||
|
autoRedact.keepLayoutLabel=Keep Layout
|
||||||
|
autoRedact.keepLayoutDescription=Covers text with a redaction box, preserving the page's original design.
|
||||||
|
|
||||||
#redact
|
#redact
|
||||||
redact.title=Manual Redaction
|
redact.title=Manual Redaction
|
||||||
|
@ -23,6 +23,15 @@
|
|||||||
background-color: #0d6efd;
|
background-color: #0d6efd;
|
||||||
border-color: #0d6efd;
|
border-color: #0d6efd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* OCR language list styling */
|
||||||
|
#languages {
|
||||||
|
max-height: 400px;
|
||||||
|
overflow-y: auto;
|
||||||
|
border: 1px solid var(--md-sys-color-surface-3);
|
||||||
|
border-radius: 5px;
|
||||||
|
padding: 10px;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
@ -62,27 +71,27 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="redaction-options-group">
|
<div class="redaction-options-group">
|
||||||
<label class="form-label fw-bold mb-3">Redaction style</label>
|
<label class="form-label fw-bold mb-3" th:text="#{autoRedact.redactionStyleLabel}"></label>
|
||||||
<div class="form-check mb-2">
|
<div class="form-check mb-2">
|
||||||
<input aria-describedby="visual-desc" class="form-check-input" id="visualImage" name="redactionMode" type="radio" value="visual">
|
<input aria-describedby="visual-desc" class="form-check-input" id="visualImage" name="redactionMode" type="radio" value="visual">
|
||||||
<label class="form-check-label" for="visualImage">Visual</label>
|
<label class="form-check-label" for="visualImage" th:text="#{autoRedact.visualRedactionLabel}">Visual</label>
|
||||||
<small class="form-text text-muted d-block mt-1" id="visual-desc">Converts to image with visual redactions for maximum security.</small>
|
<small class="form-text text-muted d-block mt-1" id="visual-desc" th:text="#{autoRedact.visualRedactionDescription}">Converts to image with visual redactions for maximum security.</small>
|
||||||
</div>
|
</div>
|
||||||
<div class="form-check mb-2">
|
<div class="form-check mb-2">
|
||||||
<input aria-describedby="delete-desc" class="form-check-input" id="deleteText" name="redactionMode" type="radio" value="aggressive">
|
<input aria-describedby="delete-desc" class="form-check-input" id="deleteText" name="redactionMode" type="radio" value="aggressive">
|
||||||
<label class="form-check-label" for="deleteText">Delete Text</label>
|
<label class="form-check-label" for="deleteText" th:text="#{autoRedact.deleteTextLabel}">Delete Text</label>
|
||||||
<small class="form-text text-muted d-block mt-1" id="delete-desc">Removes the text completely. This may alter the original layout or leave a gap.</small>
|
<small class="form-text text-muted d-block mt-1" id="delete-desc" th:text="#{autoRedact.deleteTextDescription}">Removes the text completely. This may alter the original layout or leave a gap.</small>
|
||||||
</div>
|
</div>
|
||||||
<div class="form-check mb-3">
|
<div class="form-check mb-3">
|
||||||
<input aria-describedby="keep-desc" checked class="form-check-input" id="keepLayout" name="redactionMode" type="radio" value="moderate">
|
<input aria-describedby="keep-desc" checked class="form-check-input" id="keepLayout" name="redactionMode" type="radio" value="moderate">
|
||||||
<label class="form-check-label" for="keepLayout">Keep Layout</label>
|
<label class="form-check-label" for="keepLayout" th:text="#{autoRedact.keepLayoutLabel}">Keep Layout</label>
|
||||||
<small class="form-text text-muted d-block mt-1" id="keep-desc">Covers text with a redaction box, preserving the page's original design.</small>
|
<small class="form-text text-muted d-block mt-1" id="keep-desc" th:text="#{autoRedact.keepLayoutDescription}">Covers text with a redaction box, preserving the page's original design.</small>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="form-check">
|
<div class="form-check">
|
||||||
<input aria-describedby="guarantee-desc" class="form-check-input" id="guaranteeRedaction" name="convertPDFToImage" type="checkbox">
|
<input aria-describedby="guarantee-desc" class="form-check-input" id="guaranteeRedaction" name="convertPDFToImage" type="checkbox">
|
||||||
<label class="form-check-label" for="guaranteeRedaction">PDF image</label>
|
<label class="form-check-label" for="guaranteeRedaction" th:text="#{autoRedact.pdfImageLabel}">PDF image</label>
|
||||||
<small class="form-text text-muted d-block mt-1" id="guarantee-desc">For maximum security, uses an image-based method to ensure text is unrecoverable. May slightly affect document quality.</small>
|
<small class="form-text text-muted d-block mt-1" id="guarantee-desc" th:text="#{autoRedact.pdfImageDescription}">For maximum security, uses an image-based method to ensure text is unrecoverable. May slightly affect document quality.</small>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -113,22 +122,14 @@
|
|||||||
|
|
||||||
<br>
|
<br>
|
||||||
|
|
||||||
<div class="mb-3">
|
<div class="mb-3" th:if="${#lists.size(languages) > 0}">
|
||||||
<label class="form-label" for="ocrLanguage">OCR Language</label>
|
<label class="form-label" for="languages">OCR Languages</label>
|
||||||
<select aria-describedby="ocr-desc" class="form-select" id="ocrLanguage" name="ocrLanguage">
|
<div id="languages">
|
||||||
<option value="eng">English</option>
|
<div class="form-check" th:each="language, iterStat : ${languages}">
|
||||||
<option value="spa">Spanish</option>
|
<input onchange="handleLangSelection()" required th:id="${'language-' + language}" th:name="languages" th:value="${language}" type="checkbox" />
|
||||||
<option value="fra">French</option>
|
<label th:for="${'language-' + language}" th:text="${language}"></label>
|
||||||
<option value="deu">German</option>
|
</div>
|
||||||
<option value="ita">Italian</option>
|
</div>
|
||||||
<option value="por">Portuguese</option>
|
|
||||||
<option value="rus">Russian</option>
|
|
||||||
<option value="ara">Arabic</option>
|
|
||||||
<option value="chi_sim">Chinese (Simplified)</option>
|
|
||||||
<option value="jpn">Japanese</option>
|
|
||||||
<option value="kor">Korean</option>
|
|
||||||
<option value="hin">Hindi</option>
|
|
||||||
</select>
|
|
||||||
<small class="form-text text-muted" id="ocr-desc">Used when OCR restoration is needed</small>
|
<small class="form-text text-muted" id="ocr-desc">Used when OCR restoration is needed</small>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -144,7 +145,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
|
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
|
||||||
</div>
|
</div>
|
||||||
<script>
|
<script th:inline="javascript">
|
||||||
function handleColorChange(selectedValue) {
|
function handleColorChange(selectedValue) {
|
||||||
const container = document.getElementById('customColorContainer');
|
const container = document.getElementById('customColorContainer');
|
||||||
const input = document.getElementById('customColor');
|
const input = document.getElementById('customColor');
|
||||||
@ -159,7 +160,83 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
document.addEventListener('DOMContentLoaded', function () {
|
function handleLangSelection() {
|
||||||
|
let checkboxes = document.getElementsByName("languages");
|
||||||
|
let selected = false;
|
||||||
|
for (let i = 0; i < checkboxes.length; i++) {
|
||||||
|
if (checkboxes[i].checked) {
|
||||||
|
selected = true;
|
||||||
|
checkboxes[i].setAttribute('required', 'false');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (selected) {
|
||||||
|
for (let i = 0; i < checkboxes.length; i++) {
|
||||||
|
checkboxes[i].removeAttribute('required');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (let i = 0; i < checkboxes.length; i++) {
|
||||||
|
checkboxes[i].setAttribute('required', 'true');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Translations for language names
|
||||||
|
const languageTranslations = {};
|
||||||
|
/*[# th:each="lang : ${languages}"]*/
|
||||||
|
languageTranslations['[(${lang})]'] = /*[[#{${'lang.' + lang}}]]*/[(${lang})];
|
||||||
|
/*[/]*/
|
||||||
|
|
||||||
|
const localeToTesseract = {
|
||||||
|
'en': 'eng', 'fr': 'fra', 'de': 'deu', 'es': 'spa', 'it': 'ita', 'pt': 'por', 'ru': 'rus',
|
||||||
|
'zh': 'chi_sim', 'ja': 'jpn', 'ko': 'kor', 'ar': 'ara', 'hi': 'hin', 'nl': 'nld', 'cs': 'ces',
|
||||||
|
'pl': 'pol', 'tr': 'tur', 'uk': 'ukr', 'vi': 'vie', 'sv': 'swe', 'no': 'nor', 'fi': 'fin',
|
||||||
|
'da': 'dan', 'el': 'ell', 'he': 'heb', 'hu': 'hun', 'bg': 'bul', 'ro': 'ron', 'hr': 'hrv',
|
||||||
|
'sk': 'slk', 'id': 'ind', 'th': 'tha', 'sl': 'slv'
|
||||||
|
};
|
||||||
|
|
||||||
|
function getTranslatedLanguageName(shortCode) {
|
||||||
|
return languageTranslations[shortCode] || shortCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
function prioritizeLanguages() {
|
||||||
|
const languageContainer = document.getElementById('languages');
|
||||||
|
if (!languageContainer) return;
|
||||||
|
const formChecks = Array.from(languageContainer.getElementsByClassName('form-check'));
|
||||||
|
if (formChecks.length === 0) return;
|
||||||
|
formChecks.forEach(element => {
|
||||||
|
const label = element.querySelector('label');
|
||||||
|
if (label) {
|
||||||
|
const langCode = label.getAttribute('for').split('-')[1];
|
||||||
|
label.textContent = getTranslatedLanguageName(langCode);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
const browserLanguage = document.documentElement.lang || navigator.language || navigator.userLanguage;
|
||||||
|
const uiLanguage = document.documentElement.getAttribute('data-language') || browserLanguage;
|
||||||
|
const primaryLanguageCode = (uiLanguage || '').split(/[-_]/)[0].toLowerCase();
|
||||||
|
const tesseractPrimaryCode = localeToTesseract[primaryLanguageCode];
|
||||||
|
const priorityLanguages = [];
|
||||||
|
if (tesseractPrimaryCode) priorityLanguages.push(tesseractPrimaryCode);
|
||||||
|
if (tesseractPrimaryCode !== 'eng') priorityLanguages.push('eng');
|
||||||
|
const sortedElements = formChecks.sort((a, b) => {
|
||||||
|
const aInput = a.querySelector('input');
|
||||||
|
const bInput = b.querySelector('input');
|
||||||
|
if (!aInput || !bInput) return 0;
|
||||||
|
const aLangCode = aInput.id.split('-')[1];
|
||||||
|
const bLangCode = bInput.id.split('-')[1];
|
||||||
|
const aIsPriority = priorityLanguages.includes(aLangCode);
|
||||||
|
const bIsPriority = priorityLanguages.includes(bLangCode);
|
||||||
|
if (aIsPriority && !bIsPriority) return -1;
|
||||||
|
if (!aIsPriority && bIsPriority) return 1;
|
||||||
|
if (aIsPriority && bIsPriority) {
|
||||||
|
return priorityLanguages.indexOf(aLangCode) - priorityLanguages.indexOf(bLangCode);
|
||||||
|
}
|
||||||
|
return getTranslatedLanguageName(aLangCode).localeCompare(getTranslatedLanguageName(bLangCode));
|
||||||
|
});
|
||||||
|
languageContainer.innerHTML = '';
|
||||||
|
sortedElements.forEach(element => languageContainer.appendChild(element));
|
||||||
|
}
|
||||||
|
|
||||||
|
document.addEventListener('DOMContentLoaded', function () {
|
||||||
const redactionModeRadios = document.querySelectorAll('input[name="redactionMode"]');
|
const redactionModeRadios = document.querySelectorAll('input[name="redactionMode"]');
|
||||||
const aggressiveModeHidden = document.getElementById('aggressiveMode');
|
const aggressiveModeHidden = document.getElementById('aggressiveMode');
|
||||||
const guaranteeRedactionCheckbox = document.getElementById('guaranteeRedaction');
|
const guaranteeRedactionCheckbox = document.getElementById('guaranteeRedaction');
|
||||||
@ -188,7 +265,6 @@
|
|||||||
|
|
||||||
if (defaultColor) {
|
if (defaultColor) {
|
||||||
handleColorChange(defaultColor.value);
|
handleColorChange(defaultColor.value);
|
||||||
// Set initial value for customColor input when a pre-defined color is selected
|
|
||||||
const customColorInput = document.getElementById('customColor');
|
const customColorInput = document.getElementById('customColor');
|
||||||
if (defaultColor.value !== 'custom') {
|
if (defaultColor.value !== 'custom') {
|
||||||
customColorInput.value = defaultColor.value;
|
customColorInput.value = defaultColor.value;
|
||||||
@ -196,6 +272,9 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
updateMode();
|
updateMode();
|
||||||
|
|
||||||
|
// Initialize language list ordering & labels
|
||||||
|
prioritizeLanguages();
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
Loading…
Reference in New Issue
Block a user