refactor redaction services to improve resource management and streamline text processing

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
Balázs Szücs 2025-08-25 19:53:19 +02:00
parent f236505cae
commit 3ac7f0df4c
6 changed files with 394 additions and 707 deletions

View File

@ -30,51 +30,39 @@ class AggressiveRedactionService implements RedactionModeStrategy {
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
PDDocument doc = null;
PDDocument fb = null;
try {
doc = pdfDocumentFactory.load(request.getFileInput());
try (PDDocument doc = pdfDocumentFactory.load(request.getFileInput())) {
Map<Integer, List<PDFText>> allFound =
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
if (allFound.isEmpty()) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
doc.save(baos);
return baos.toByteArray();
}
return toByteArray(doc);
}
helper.performTextReplacementAggressive(doc, allFound, listOfText, useRegex, wholeWord);
Map<Integer, List<PDFText>> residual =
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
boolean residualExists = residual.values().stream().mapToInt(List::size).sum() > 0;
String effectiveColor =
(request.getRedactColor() == null || request.getRedactColor().isBlank())
? "#000000"
: request.getRedactColor();
if (residualExists) {
// Use the new visual redaction with OCR restoration fallback
return helper.performVisualRedactionWithOcrRestoration(
request, listOfText, useRegex, wholeWord);
}
return RedactionService.finalizeRedaction(
doc,
allFound,
request.getRedactColor(),
request.getCustomPadding(),
request.getConvertPDFToImage(), /*text removal*/
request.getConvertPDFToImage(),
true);
} catch (Exception e) {
throw new IOException("Aggressive redaction failed: " + e.getMessage(), e);
} finally {
if (doc != null)
try {
doc.close();
} catch (IOException ignore) {
}
if (fb != null)
try {
fb.close();
} catch (IOException ignore) {
}
}
}
private byte[] toByteArray(PDDocument doc) throws IOException {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
doc.save(baos);
return baos.toByteArray();
}
}
}

View File

@ -29,49 +29,36 @@ class ModerateRedactionService implements RedactionModeStrategy {
boolean useRegex = Boolean.TRUE.equals(request.getUseRegex());
boolean wholeWord = Boolean.TRUE.equals(request.getWholeWordSearch());
PDDocument doc = null;
PDDocument fallback = null;
try {
doc = pdfDocumentFactory.load(request.getFileInput());
try (PDDocument doc = pdfDocumentFactory.load(request.getFileInput())) {
Map<Integer, List<PDFText>> allFound =
RedactionService.findTextToRedact(doc, listOfText, useRegex, wholeWord);
if (allFound.isEmpty()) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
doc.save(baos);
return baos.toByteArray();
}
return toByteArray(doc);
}
boolean fallbackToBoxOnly =
helper.performTextReplacement(doc, allFound, listOfText, useRegex, wholeWord);
String effectiveColor =
(request.getRedactColor() == null || request.getRedactColor().isBlank())
? "#000000"
: request.getRedactColor();
if (fallbackToBoxOnly) {
// Use the new visual redaction with OCR restoration fallback
return helper.performVisualRedactionWithOcrRestoration(
request, listOfText, useRegex, wholeWord);
}
return RedactionService.finalizeRedaction(
doc,
allFound,
effectiveColor,
request.getRedactColor(),
request.getCustomPadding(),
request.getConvertPDFToImage(),
false);
} catch (Exception e) {
throw new IOException("Moderate redaction failed: " + e.getMessage(), e);
} finally {
if (doc != null)
try {
doc.close();
} catch (IOException ignore) {
}
if (fallback != null)
try {
fallback.close();
} catch (IOException ignore) {
}
}
}
private byte[] toByteArray(PDDocument doc) throws IOException {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
doc.save(baos);
return baos.toByteArray();
}
}
}

View File

@ -5,10 +5,17 @@ import org.apache.pdfbox.pdmodel.font.PDFont;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
import java.util.regex.Pattern;
@Slf4j
@UtilityClass
public class TextEncodingHelper {
private final Pattern PATTERN = Pattern.compile("^[A-Z]+$");
private final Pattern REGEX = Pattern.compile("^[A-Z]{6}\\+.*");
private final Pattern REGEXP = Pattern.compile("^[A-Z]{5}\\+.*");
private final Pattern PATTERN1 = Pattern.compile("^[A-Z]{4}\\+.*");
public boolean canEncodeCharacters(PDFont font, String text) {
if (font == null || text == null) {
return false;
@ -421,21 +428,21 @@ public class TextEncodingHelper {
return false;
}
if (fontName.matches("^[A-Z]{6}\\+.*")) {
if (REGEX.matcher(fontName).matches()) {
return true;
}
if (fontName.matches("^[A-Z]{5}\\+.*")) {
if (REGEXP.matcher(fontName).matches()) {
return true;
}
if (fontName.matches("^[A-Z]{4}\\+.*")) {
if (PATTERN1.matcher(fontName).matches()) {
return true;
}
if (fontName.contains("+")) {
String prefix = fontName.split("\\+")[0];
if (prefix.matches("^[A-Z]+$") && prefix.length() >= 4) {
if (PATTERN.matcher(prefix).matches() && prefix.length() >= 4) {
return true;
}
}
@ -510,68 +517,4 @@ public class TextEncodingHelper {
return false;
}
public boolean canEncodeAnyCharacter(PDFont font) {
if (font == null) {
return false;
}
String[] testStrings = {
"a", "A", "0", " ", ".", "!", "e", "i", "o", "u", "n", "t", "r", "s", "l", "1", "2",
"3", "4", "5", "6", "7", "8", "9", ",", ".", ";", ":", "?", "!", "(", ")", "[", "]",
"{", "}", "hello", "test", "sample", "abc", "123", "ABC"
};
for (String testStr : testStrings) {
try {
byte[] encoded = font.encode(testStr);
if (encoded.length > 0) {
return true;
}
} catch (Exception e) {
}
}
for (int code = 0; code <= 0xFFFF; code += 100) {
try {
String testStr = String.valueOf((char) code);
byte[] encoded = font.encode(testStr);
if (encoded.length > 0) {
return true;
}
} catch (Exception e) {
}
}
return false;
}
public boolean isValidFont(PDFont font) {
if (font == null) {
return false;
}
try {
String name = font.getName();
if (name != null && !name.trim().isEmpty()) {
return true;
}
} catch (Exception e) {
}
try {
if (canCalculateBasicWidths(font)) {
return true;
}
} catch (Exception e) {
}
try {
if (canEncodeAnyCharacter(font)) {
return true;
}
} catch (Exception e) {
}
return false;
}
}

View File

@ -80,10 +80,6 @@ public class WidthCalculator {
Float charWidth =
calculateSingleCharacterWidth(font, character, fontSize, codePoint);
if (charWidth == null) {
return null;
}
totalWidth += charWidth;
if (previousCodePoint != -1) {
totalWidth += calculateKerning(font, previousCodePoint, codePoint, fontSize);
@ -203,9 +199,6 @@ public class WidthCalculator {
Float charWidth =
calculateGlyphWidthComprehensively(font, character, codePoint, fontSize);
if (charWidth == null) {
return null;
}
totalWidth += charWidth;
i += Character.charCount(codePoint);
@ -514,64 +507,4 @@ public class WidthCalculator {
return false;
}
public float calculateMinimumTextWidth(PDFont font, String text, float fontSize) {
if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
return 0;
}
try {
float minWidth = calculateAccurateWidth(font, text, fontSize);
if (minWidth > 0) {
return minWidth * 0.8f;
}
} catch (Exception e) {
}
return text.length() * fontSize * 0.3f;
}
public float calculateMaximumTextWidth(PDFont font, String text, float fontSize) {
if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
return 0;
}
try {
float maxWidth = calculateAccurateWidth(font, text, fontSize);
if (maxWidth > 0) {
return maxWidth * 1.2f;
}
} catch (Exception e) {
}
return text.length() * fontSize * 1.0f;
}
public boolean canCalculateWidthForText(PDFont font, String text) {
if (font == null || text == null) {
return false;
}
if (text.isEmpty()) {
return true;
}
try {
Float width = calculateDirectWidth(font, text, 12f);
if (width != null) {
return true;
}
} catch (Exception e) {
}
try {
Float width = calculateCharacterByCharacterWidth(font, text, 12f);
if (width != null) {
return true;
}
} catch (Exception e) {
}
return true;
}
}

View File

@ -13,20 +13,7 @@
color: #6c757d !important;
}
.btn-primary:focus {
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
outline: 2px solid #0d6efd;
outline-offset: 2px;
}
.form-check-input:focus {
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
outline: 2px solid #0d6efd;
outline-offset: 2px;
}
.form-control:focus, .form-select:focus {
border-color: #0d6efd;
.btn-primary:focus, .form-check-input:focus, .form-control:focus, .form-select:focus {
box-shadow: 0 0 0 0.2rem rgba(13, 110, 253, 0.25);
outline: 2px solid #0d6efd;
outline-offset: 2px;
@ -36,20 +23,6 @@
background-color: #0d6efd;
border-color: #0d6efd;
}
.sr-only {
position: absolute;
width: 1px;
height: 1px;
padding: 0;
margin: -1px;
overflow: hidden;
clip: rect(0, 0, 0, 0);
white-space: nowrap;
border: 0;
}
</style>
</head>