refactor: simplify text search logic in RedactController and improve TextFinder page handling. Fix potential document close issue.

This commit is contained in:
Balázs Szücs 2025-07-15 18:11:31 +02:00
parent 344602cba4
commit a1e0e6f2fd
2 changed files with 12 additions and 35 deletions

View File

@ -462,6 +462,7 @@ public class RedactController {
if (fallbackDocument == null) {
document.close();
}
document.close();
} catch (IOException e) {
log.warn("Failed to close main document: {}", e.getMessage());
}
@ -486,43 +487,16 @@ public class RedactController {
if (text.isEmpty()) continue;
try {
int pageCount = document.getNumberOfPages();
for (int pageIndex = 0; pageIndex < pageCount; pageIndex++) {
try (PDDocument singlePageDoc = new PDDocument()) {
PDPage page = document.getPage(pageIndex);
singlePageDoc.addPage(page);
TextFinder textFinder = new TextFinder(text, useRegex, wholeWordSearch);
textFinder.getText(document);
TextFinder pageFinder = new TextFinder(text, useRegex, wholeWordSearch);
pageFinder.getText(singlePageDoc);
for (PDFText found : pageFinder.getFoundTexts()) {
PDFText adjustedText =
new PDFText(
pageIndex,
found.getX1(),
found.getY1(),
found.getX2(),
found.getY2(),
found.getText());
allFoundTextsByPage
.computeIfAbsent(pageIndex, k -> new ArrayList<>())
.add(adjustedText);
}
} catch (Exception e) {
log.error(
"Error processing page {} for search term '{}': {}",
pageIndex,
text,
e.getMessage());
}
for (PDFText found : textFinder.getFoundTexts()) {
allFoundTextsByPage
.computeIfAbsent(found.getPageIndex(), k -> new ArrayList<>())
.add(found);
}
} catch (Exception e) {
log.error(
"Error initializing TextFinder for search term '{}': {}",
text,
e.getMessage());
log.error("Error processing search term '{}': {}", text, e.getMessage());
}
}

View File

@ -57,9 +57,10 @@ public class TextFinder extends PDFTextStripper {
}
@Override
protected void endPage(PDPage page) {
protected void endPage(PDPage page) throws IOException {
String text = pageTextBuilder.toString();
if (text.isEmpty() || this.searchTerm == null || this.searchTerm.isEmpty()) {
super.endPage(page);
return;
}
@ -107,6 +108,8 @@ public class TextFinder extends PDFTextStripper {
matcher.group()));
}
}
super.endPage(page);
}
public List<PDFText> getFoundTexts() {