mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
refactor redaction services to improve resource management and streamline text processing
Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
f236505cae
commit
3ac7f0df4c
@ -30,51 +30,39 @@ class AggressiveRedactionService implements RedactionModeStrategy {
|
|||||||
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
|
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
|
||||||
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
|
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
|
||||||
|
|
||||||
PDDocument doc = null;
|
try (PDDocument doc = pdfDocumentFactory.load(request.getFileInput())) {
|
||||||
PDDocument fb = null;
|
|
||||||
try {
|
|
||||||
doc = pdfDocumentFactory.load(request.getFileInput());
|
|
||||||
Map<Integer, List<PDFText>> allFound =
|
Map<Integer, List<PDFText>> allFound =
|
||||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||||
if (allFound.isEmpty()) {
|
if (allFound.isEmpty()) {
|
||||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
return toByteArray(doc);
|
||||||
doc.save(baos);
|
|
||||||
return baos.toByteArray();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
helper.performTextReplacementAggressive(doc, allFound, listOfText, useRegex, wholeWord);
|
helper.performTextReplacementAggressive(doc, allFound, listOfText, useRegex, wholeWord);
|
||||||
Map<Integer, List<PDFText>> residual =
|
Map<Integer, List<PDFText>> residual =
|
||||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||||
boolean residualExists = residual.values().stream().mapToInt(List::size).sum() > 0;
|
boolean residualExists = residual.values().stream().mapToInt(List::size).sum() > 0;
|
||||||
String effectiveColor =
|
|
||||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
|
||||||
? "#000000"
|
|
||||||
: request.getRedactColor();
|
|
||||||
if (residualExists) {
|
if (residualExists) {
|
||||||
// Use the new visual redaction with OCR restoration fallback
|
|
||||||
return helper.performVisualRedactionWithOcrRestoration(
|
return helper.performVisualRedactionWithOcrRestoration(
|
||||||
request, listOfText, useRegex, wholeWord);
|
request, listOfText, useRegex, wholeWord);
|
||||||
}
|
}
|
||||||
|
|
||||||
return RedactionService.finalizeRedaction(
|
return RedactionService.finalizeRedaction(
|
||||||
doc,
|
doc,
|
||||||
allFound,
|
allFound,
|
||||||
request.getRedactColor(),
|
request.getRedactColor(),
|
||||||
request.getCustomPadding(),
|
request.getCustomPadding(),
|
||||||
request.getConvertPDFToImage(), /*text removal*/
|
request.getConvertPDFToImage(),
|
||||||
true);
|
true);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new IOException("Aggressive redaction failed: " + e.getMessage(), e);
|
throw new IOException("Aggressive redaction failed: " + e.getMessage(), e);
|
||||||
} finally {
|
}
|
||||||
if (doc != null)
|
}
|
||||||
try {
|
|
||||||
doc.close();
|
private byte[] toByteArray(PDDocument doc) throws IOException {
|
||||||
} catch (IOException ignore) {
|
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||||
}
|
doc.save(baos);
|
||||||
if (fb != null)
|
return baos.toByteArray();
|
||||||
try {
|
|
||||||
fb.close();
|
|
||||||
} catch (IOException ignore) {
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -29,49 +29,36 @@ class ModerateRedactionService implements RedactionModeStrategy {
|
|||||||
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
|
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
|
||||||
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
|
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
|
||||||
|
|
||||||
PDDocument doc = null;
|
try (PDDocument doc = pdfDocumentFactory.load(request.getFileInput())) {
|
||||||
PDDocument fallback = null;
|
|
||||||
try {
|
|
||||||
doc = pdfDocumentFactory.load(request.getFileInput());
|
|
||||||
Map<Integer, List<PDFText>> allFound =
|
Map<Integer, List<PDFText>> allFound =
|
||||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||||
if (allFound.isEmpty()) {
|
if (allFound.isEmpty()) {
|
||||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
return toByteArray(doc);
|
||||||
doc.save(baos);
|
|
||||||
return baos.toByteArray();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean fallbackToBoxOnly =
|
boolean fallbackToBoxOnly =
|
||||||
helper.performTextReplacement(doc, allFound, listOfText, useRegex, wholeWord);
|
helper.performTextReplacement(doc, allFound, listOfText, useRegex, wholeWord);
|
||||||
String effectiveColor =
|
|
||||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
|
||||||
? "#000000"
|
|
||||||
: request.getRedactColor();
|
|
||||||
if (fallbackToBoxOnly) {
|
if (fallbackToBoxOnly) {
|
||||||
// Use the new visual redaction with OCR restoration fallback
|
|
||||||
return helper.performVisualRedactionWithOcrRestoration(
|
return helper.performVisualRedactionWithOcrRestoration(
|
||||||
request, listOfText, useRegex, wholeWord);
|
request, listOfText, useRegex, wholeWord);
|
||||||
}
|
}
|
||||||
|
|
||||||
return RedactionService.finalizeRedaction(
|
return RedactionService.finalizeRedaction(
|
||||||
doc,
|
doc,
|
||||||
allFound,
|
allFound,
|
||||||
effectiveColor,
|
request.getRedactColor(),
|
||||||
request.getCustomPadding(),
|
request.getCustomPadding(),
|
||||||
request.getConvertPDFToImage(),
|
request.getConvertPDFToImage(),
|
||||||
false);
|
false);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new IOException("Moderate redaction failed: " + e.getMessage(), e);
|
throw new IOException("Moderate redaction failed: " + e.getMessage(), e);
|
||||||
} finally {
|
}
|
||||||
if (doc != null)
|
}
|
||||||
try {
|
|
||||||
doc.close();
|
private byte[] toByteArray(PDDocument doc) throws IOException {
|
||||||
} catch (IOException ignore) {
|
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||||
}
|
doc.save(baos);
|
||||||
if (fallback != null)
|
return baos.toByteArray();
|
||||||
try {
|
|
||||||
fallback.close();
|
|
||||||
} catch (IOException ignore) {
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -5,10 +5,17 @@ import org.apache.pdfbox.pdmodel.font.PDFont;
|
|||||||
import lombok.experimental.UtilityClass;
|
import lombok.experimental.UtilityClass;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@UtilityClass
|
@UtilityClass
|
||||||
public class TextEncodingHelper {
|
public class TextEncodingHelper {
|
||||||
|
|
||||||
|
private final Pattern PATTERN = Pattern.compile("^[A-Z]+$");
|
||||||
|
private final Pattern REGEX = Pattern.compile("^[A-Z]{6}\\+.*");
|
||||||
|
private final Pattern REGEXP = Pattern.compile("^[A-Z]{5}\\+.*");
|
||||||
|
private final Pattern PATTERN1 = Pattern.compile("^[A-Z]{4}\\+.*");
|
||||||
|
|
||||||
public boolean canEncodeCharacters(PDFont font, String text) {
|
public boolean canEncodeCharacters(PDFont font, String text) {
|
||||||
if (font == null || text == null) {
|
if (font == null || text == null) {
|
||||||
return false;
|
return false;
|
||||||
@ -421,21 +428,21 @@ public class TextEncodingHelper {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fontName.matches("^[A-Z]{6}\\+.*")) {
|
if (REGEX.matcher(fontName).matches()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fontName.matches("^[A-Z]{5}\\+.*")) {
|
if (REGEXP.matcher(fontName).matches()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fontName.matches("^[A-Z]{4}\\+.*")) {
|
if (PATTERN1.matcher(fontName).matches()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fontName.contains("+")) {
|
if (fontName.contains("+")) {
|
||||||
String prefix = fontName.split("\\+")[0];
|
String prefix = fontName.split("\\+")[0];
|
||||||
if (prefix.matches("^[A-Z]+$") && prefix.length() >= 4) {
|
if (PATTERN.matcher(prefix).matches() && prefix.length() >= 4) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -510,68 +517,4 @@ public class TextEncodingHelper {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean canEncodeAnyCharacter(PDFont font) {
|
|
||||||
if (font == null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
String[] testStrings = {
|
|
||||||
"a", "A", "0", " ", ".", "!", "e", "i", "o", "u", "n", "t", "r", "s", "l", "1", "2",
|
|
||||||
"3", "4", "5", "6", "7", "8", "9", ",", ".", ";", ":", "?", "!", "(", ")", "[", "]",
|
|
||||||
"{", "}", "hello", "test", "sample", "abc", "123", "ABC"
|
|
||||||
};
|
|
||||||
|
|
||||||
for (String testStr : testStrings) {
|
|
||||||
try {
|
|
||||||
byte[] encoded = font.encode(testStr);
|
|
||||||
if (encoded.length > 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int code = 0; code <= 0xFFFF; code += 100) {
|
|
||||||
try {
|
|
||||||
String testStr = String.valueOf((char) code);
|
|
||||||
byte[] encoded = font.encode(testStr);
|
|
||||||
if (encoded.length > 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isValidFont(PDFont font) {
|
|
||||||
if (font == null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
String name = font.getName();
|
|
||||||
if (name != null && !name.trim().isEmpty()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (canCalculateBasicWidths(font)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (canEncodeAnyCharacter(font)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -80,10 +80,6 @@ public class WidthCalculator {
|
|||||||
Float charWidth =
|
Float charWidth =
|
||||||
calculateSingleCharacterWidth(font, character, fontSize, codePoint);
|
calculateSingleCharacterWidth(font, character, fontSize, codePoint);
|
||||||
|
|
||||||
if (charWidth == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
totalWidth += charWidth;
|
totalWidth += charWidth;
|
||||||
if (previousCodePoint != -1) {
|
if (previousCodePoint != -1) {
|
||||||
totalWidth += calculateKerning(font, previousCodePoint, codePoint, fontSize);
|
totalWidth += calculateKerning(font, previousCodePoint, codePoint, fontSize);
|
||||||
@ -203,9 +199,6 @@ public class WidthCalculator {
|
|||||||
|
|
||||||
Float charWidth =
|
Float charWidth =
|
||||||
calculateGlyphWidthComprehensively(font, character, codePoint, fontSize);
|
calculateGlyphWidthComprehensively(font, character, codePoint, fontSize);
|
||||||
if (charWidth == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
totalWidth += charWidth;
|
totalWidth += charWidth;
|
||||||
i += Character.charCount(codePoint);
|
i += Character.charCount(codePoint);
|
||||||
@ -514,64 +507,4 @@ public class WidthCalculator {
|
|||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public float calculateMinimumTextWidth(PDFont font, String text, float fontSize) {
|
|
||||||
if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
float minWidth = calculateAccurateWidth(font, text, fontSize);
|
|
||||||
if (minWidth > 0) {
|
|
||||||
return minWidth * 0.8f;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
}
|
|
||||||
|
|
||||||
return text.length() * fontSize * 0.3f;
|
|
||||||
}
|
|
||||||
|
|
||||||
public float calculateMaximumTextWidth(PDFont font, String text, float fontSize) {
|
|
||||||
if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
float maxWidth = calculateAccurateWidth(font, text, fontSize);
|
|
||||||
if (maxWidth > 0) {
|
|
||||||
return maxWidth * 1.2f;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
}
|
|
||||||
|
|
||||||
return text.length() * fontSize * 1.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean canCalculateWidthForText(PDFont font, String text) {
|
|
||||||
if (font == null || text == null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (text.isEmpty()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
Float width = calculateDirectWidth(font, text, 12f);
|
|
||||||
if (width != null) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
Float width = calculateCharacterByCharacterWidth(font, text, 12f);
|
|
||||||
if (width != null) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -13,20 +13,7 @@
|
|||||||
color: #6c757d !important;
|
color: #6c757d !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.btn-primary:focus {
|
.btn-primary:focus, .form-check-input:focus, .form-control:focus, .form-select:focus {
|
||||||
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
|
|
||||||
outline: 2px solid #0d6efd;
|
|
||||||
outline-offset: 2px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.form-check-input:focus {
|
|
||||||
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
|
|
||||||
outline: 2px solid #0d6efd;
|
|
||||||
outline-offset: 2px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.form-control:focus, .form-select:focus {
|
|
||||||
border-color: #0d6efd;
|
|
||||||
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
|
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
|
||||||
outline: 2px solid #0d6efd;
|
outline: 2px solid #0d6efd;
|
||||||
outline-offset: 2px;
|
outline-offset: 2px;
|
||||||
@ -36,20 +23,6 @@
|
|||||||
background-color: #0d6efd;
|
background-color: #0d6efd;
|
||||||
border-color: #0d6efd;
|
border-color: #0d6efd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
.sr-only {
|
|
||||||
position: absolute;
|
|
||||||
width: 1px;
|
|
||||||
height: 1px;
|
|
||||||
padding: 0;
|
|
||||||
margin: -1px;
|
|
||||||
overflow: hidden;
|
|
||||||
clip: rect(0, 0, 0, 0);
|
|
||||||
white-space: nowrap;
|
|
||||||
border: 0;
|
|
||||||
}
|
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user