mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
refactor redaction services to improve resource management and streamline text processing
Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
f236505cae
commit
3ac7f0df4c
@ -30,51 +30,39 @@ class AggressiveRedactionService implements RedactionModeStrategy {
|
||||
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
|
||||
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
|
||||
|
||||
PDDocument doc = null;
|
||||
PDDocument fb = null;
|
||||
try {
|
||||
doc = pdfDocumentFactory.load(request.getFileInput());
|
||||
try (PDDocument doc = pdfDocumentFactory.load(request.getFileInput())) {
|
||||
Map<Integer, List<PDFText>> allFound =
|
||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||
if (allFound.isEmpty()) {
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
doc.save(baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
return toByteArray(doc);
|
||||
}
|
||||
|
||||
helper.performTextReplacementAggressive(doc, allFound, listOfText, useRegex, wholeWord);
|
||||
Map<Integer, List<PDFText>> residual =
|
||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||
boolean residualExists = residual.values().stream().mapToInt(List::size).sum() > 0;
|
||||
String effectiveColor =
|
||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||
? "#000000"
|
||||
: request.getRedactColor();
|
||||
|
||||
if (residualExists) {
|
||||
// Use the new visual redaction with OCR restoration fallback
|
||||
return helper.performVisualRedactionWithOcrRestoration(
|
||||
request, listOfText, useRegex, wholeWord);
|
||||
}
|
||||
|
||||
return RedactionService.finalizeRedaction(
|
||||
doc,
|
||||
allFound,
|
||||
request.getRedactColor(),
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(), /*text removal*/
|
||||
request.getConvertPDFToImage(),
|
||||
true);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Aggressive redaction failed: " + e.getMessage(), e);
|
||||
} finally {
|
||||
if (doc != null)
|
||||
try {
|
||||
doc.close();
|
||||
} catch (IOException ignore) {
|
||||
}
|
||||
if (fb != null)
|
||||
try {
|
||||
fb.close();
|
||||
} catch (IOException ignore) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] toByteArray(PDDocument doc) throws IOException {
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
doc.save(baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -29,49 +29,36 @@ class ModerateRedactionService implements RedactionModeStrategy {
|
||||
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
|
||||
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
|
||||
|
||||
PDDocument doc = null;
|
||||
PDDocument fallback = null;
|
||||
try {
|
||||
doc = pdfDocumentFactory.load(request.getFileInput());
|
||||
try (PDDocument doc = pdfDocumentFactory.load(request.getFileInput())) {
|
||||
Map<Integer, List<PDFText>> allFound =
|
||||
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
|
||||
if (allFound.isEmpty()) {
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
doc.save(baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
return toByteArray(doc);
|
||||
}
|
||||
|
||||
boolean fallbackToBoxOnly =
|
||||
helper.performTextReplacement(doc, allFound, listOfText, useRegex, wholeWord);
|
||||
String effectiveColor =
|
||||
(request.getRedactColor() == null || request.getRedactColor().isBlank())
|
||||
? "#000000"
|
||||
: request.getRedactColor();
|
||||
if (fallbackToBoxOnly) {
|
||||
// Use the new visual redaction with OCR restoration fallback
|
||||
return helper.performVisualRedactionWithOcrRestoration(
|
||||
request, listOfText, useRegex, wholeWord);
|
||||
}
|
||||
|
||||
return RedactionService.finalizeRedaction(
|
||||
doc,
|
||||
allFound,
|
||||
effectiveColor,
|
||||
request.getRedactColor(),
|
||||
request.getCustomPadding(),
|
||||
request.getConvertPDFToImage(),
|
||||
false);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Moderate redaction failed: " + e.getMessage(), e);
|
||||
} finally {
|
||||
if (doc != null)
|
||||
try {
|
||||
doc.close();
|
||||
} catch (IOException ignore) {
|
||||
}
|
||||
if (fallback != null)
|
||||
try {
|
||||
fallback.close();
|
||||
} catch (IOException ignore) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] toByteArray(PDDocument doc) throws IOException {
|
||||
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
doc.save(baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -5,10 +5,17 @@ import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@Slf4j
|
||||
@UtilityClass
|
||||
public class TextEncodingHelper {
|
||||
|
||||
private final Pattern PATTERN = Pattern.compile("^[A-Z]+$");
|
||||
private final Pattern REGEX = Pattern.compile("^[A-Z]{6}\\+.*");
|
||||
private final Pattern REGEXP = Pattern.compile("^[A-Z]{5}\\+.*");
|
||||
private final Pattern PATTERN1 = Pattern.compile("^[A-Z]{4}\\+.*");
|
||||
|
||||
public boolean canEncodeCharacters(PDFont font, String text) {
|
||||
if (font == null || text == null) {
|
||||
return false;
|
||||
@ -421,21 +428,21 @@ public class TextEncodingHelper {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (fontName.matches("^[A-Z]{6}\\+.*")) {
|
||||
if (REGEX.matcher(fontName).matches()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (fontName.matches("^[A-Z]{5}\\+.*")) {
|
||||
if (REGEXP.matcher(fontName).matches()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (fontName.matches("^[A-Z]{4}\\+.*")) {
|
||||
if (PATTERN1.matcher(fontName).matches()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (fontName.contains("+")) {
|
||||
String prefix = fontName.split("\\+")[0];
|
||||
if (prefix.matches("^[A-Z]+$") && prefix.length() >= 4) {
|
||||
if (PATTERN.matcher(prefix).matches() && prefix.length() >= 4) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -510,68 +517,4 @@ public class TextEncodingHelper {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean canEncodeAnyCharacter(PDFont font) {
|
||||
if (font == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
String[] testStrings = {
|
||||
"a", "A", "0", " ", ".", "!", "e", "i", "o", "u", "n", "t", "r", "s", "l", "1", "2",
|
||||
"3", "4", "5", "6", "7", "8", "9", ",", ".", ";", ":", "?", "!", "(", ")", "[", "]",
|
||||
"{", "}", "hello", "test", "sample", "abc", "123", "ABC"
|
||||
};
|
||||
|
||||
for (String testStr : testStrings) {
|
||||
try {
|
||||
byte[] encoded = font.encode(testStr);
|
||||
if (encoded.length > 0) {
|
||||
return true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
for (int code = 0; code <= 0xFFFF; code += 100) {
|
||||
try {
|
||||
String testStr = String.valueOf((char) code);
|
||||
byte[] encoded = font.encode(testStr);
|
||||
if (encoded.length > 0) {
|
||||
return true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isValidFont(PDFont font) {
|
||||
if (font == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
String name = font.getName();
|
||||
if (name != null && !name.trim().isEmpty()) {
|
||||
return true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
|
||||
try {
|
||||
if (canCalculateBasicWidths(font)) {
|
||||
return true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
|
||||
try {
|
||||
if (canEncodeAnyCharacter(font)) {
|
||||
return true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -80,10 +80,6 @@ public class WidthCalculator {
|
||||
Float charWidth =
|
||||
calculateSingleCharacterWidth(font, character, fontSize, codePoint);
|
||||
|
||||
if (charWidth == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
totalWidth += charWidth;
|
||||
if (previousCodePoint != -1) {
|
||||
totalWidth += calculateKerning(font, previousCodePoint, codePoint, fontSize);
|
||||
@ -203,9 +199,6 @@ public class WidthCalculator {
|
||||
|
||||
Float charWidth =
|
||||
calculateGlyphWidthComprehensively(font, character, codePoint, fontSize);
|
||||
if (charWidth == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
totalWidth += charWidth;
|
||||
i += Character.charCount(codePoint);
|
||||
@ -514,64 +507,4 @@ public class WidthCalculator {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public float calculateMinimumTextWidth(PDFont font, String text, float fontSize) {
|
||||
if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
try {
|
||||
float minWidth = calculateAccurateWidth(font, text, fontSize);
|
||||
if (minWidth > 0) {
|
||||
return minWidth * 0.8f;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
|
||||
return text.length() * fontSize * 0.3f;
|
||||
}
|
||||
|
||||
public float calculateMaximumTextWidth(PDFont font, String text, float fontSize) {
|
||||
if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
try {
|
||||
float maxWidth = calculateAccurateWidth(font, text, fontSize);
|
||||
if (maxWidth > 0) {
|
||||
return maxWidth * 1.2f;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
|
||||
return text.length() * fontSize * 1.0f;
|
||||
}
|
||||
|
||||
public boolean canCalculateWidthForText(PDFont font, String text) {
|
||||
if (font == null || text == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (text.isEmpty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
try {
|
||||
Float width = calculateDirectWidth(font, text, 12f);
|
||||
if (width != null) {
|
||||
return true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
|
||||
try {
|
||||
Float width = calculateCharacterByCharacterWidth(font, text, 12f);
|
||||
if (width != null) {
|
||||
return true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -13,20 +13,7 @@
|
||||
color: #6c757d !important;
|
||||
}
|
||||
|
||||
.btn-primary:focus {
|
||||
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
|
||||
outline: 2px solid #0d6efd;
|
||||
outline-offset: 2px;
|
||||
}
|
||||
|
||||
.form-check-input:focus {
|
||||
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
|
||||
outline: 2px solid #0d6efd;
|
||||
outline-offset: 2px;
|
||||
}
|
||||
|
||||
.form-control:focus, .form-select:focus {
|
||||
border-color: #0d6efd;
|
||||
.btn-primary:focus, .form-check-input:focus, .form-control:focus, .form-select:focus {
|
||||
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
|
||||
outline: 2px solid #0d6efd;
|
||||
outline-offset: 2px;
|
||||
@ -36,20 +23,6 @@
|
||||
background-color: #0d6efd;
|
||||
border-color: #0d6efd;
|
||||
}
|
||||
|
||||
|
||||
|
||||
.sr-only {
|
||||
position: absolute;
|
||||
width: 1px;
|
||||
height: 1px;
|
||||
padding: 0;
|
||||
margin: -1px;
|
||||
overflow: hidden;
|
||||
clip: rect(0, 0, 0, 0);
|
||||
white-space: nowrap;
|
||||
border: 0;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user