mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
enhance placeholder creation and width calculation with improved error handling and logging
Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
8c6aa246a7
commit
04d9b6ace2
@ -41,6 +41,7 @@ import org.springframework.web.multipart.MultipartFile;
|
|||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
import lombok.Getter;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
@ -704,55 +705,134 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
String createPlaceholderWithFont(String originalWord, PDFont font) {
|
String createPlaceholderWithFont(String originalWord, PDFont font) {
|
||||||
|
try {
|
||||||
if (originalWord == null || originalWord.isEmpty()) {
|
if (originalWord == null || originalWord.isEmpty()) {
|
||||||
return originalWord;
|
log.debug(
|
||||||
|
"createPlaceholderWithFont: originalWord is null or empty, returning space");
|
||||||
|
return " ";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (font != null && TextEncodingHelper.isFontSubset(font.getName())) {
|
if (font != null && TextEncodingHelper.isFontSubset(font.getName())) {
|
||||||
try {
|
try {
|
||||||
float originalWidth = safeGetStringWidth(font, originalWord) / FONT_SCALE_FACTOR;
|
float originalWidth =
|
||||||
return createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f);
|
safeGetStringWidth(font, originalWord) / FONT_SCALE_FACTOR;
|
||||||
|
String result =
|
||||||
|
createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f);
|
||||||
|
if (result == null) {
|
||||||
|
log.warn("createAlternativePlaceholder returned null, using fallback");
|
||||||
|
return " ".repeat(Math.max(1, originalWord.length()));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return "";
|
log.debug(
|
||||||
|
"Error in createPlaceholderWithFont subset logic: {}", e.getMessage());
|
||||||
|
return " ".repeat(Math.max(1, originalWord.length()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return " ".repeat(originalWord.length());
|
|
||||||
|
int length = Math.max(1, originalWord.length());
|
||||||
|
String result = " ".repeat(length);
|
||||||
|
log.debug("createPlaceholderWithFont: returning '{}' for '{}'", result, originalWord);
|
||||||
|
return result;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Unexpected error in createPlaceholderWithFont: {}", e.getMessage());
|
||||||
|
return " ";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
String createPlaceholderWithWidth(
|
String createPlaceholderWithWidth(
|
||||||
String originalWord, float targetWidth, PDFont font, float fontSize) {
|
String originalWord, float targetWidth, PDFont font, float fontSize) {
|
||||||
|
try {
|
||||||
if (originalWord == null || originalWord.isEmpty()) {
|
if (originalWord == null || originalWord.isEmpty()) {
|
||||||
return originalWord;
|
log.debug(
|
||||||
|
"createPlaceholderWithWidth: originalWord is null or empty, returning space");
|
||||||
|
return " ";
|
||||||
}
|
}
|
||||||
if (font == null || fontSize <= 0) {
|
if (font == null || fontSize <= 0) {
|
||||||
return " ".repeat(originalWord.length());
|
int length = Math.max(1, originalWord.length());
|
||||||
|
String result = " ".repeat(length);
|
||||||
|
log.debug(
|
||||||
|
"createPlaceholderWithWidth: invalid font/size, returning '{}' for '{}'",
|
||||||
|
result,
|
||||||
|
originalWord);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
try {
|
|
||||||
if (!WidthCalculator.isWidthCalculationReliable(font)) {
|
if (!WidthCalculator.isWidthCalculationReliable(font)) {
|
||||||
return " ".repeat(originalWord.length());
|
int length = Math.max(1, originalWord.length());
|
||||||
|
String result = " ".repeat(length);
|
||||||
|
log.debug(
|
||||||
|
"createPlaceholderWithWidth: font not reliable, returning '{}' for '{}'",
|
||||||
|
result,
|
||||||
|
originalWord);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (TextEncodingHelper.isFontSubset(font.getName())) {
|
if (TextEncodingHelper.isFontSubset(font.getName())) {
|
||||||
return createSubsetFontPlaceholder(originalWord, targetWidth, font, fontSize);
|
String result =
|
||||||
|
createSubsetFontPlaceholder(originalWord, targetWidth, font, fontSize);
|
||||||
|
if (result == null) {
|
||||||
|
log.warn("createSubsetFontPlaceholder returned null, using fallback");
|
||||||
|
return " ".repeat(Math.max(1, originalWord.length()));
|
||||||
}
|
}
|
||||||
|
log.debug(
|
||||||
|
"createPlaceholderWithWidth: subset font, returning '{}' for '{}'",
|
||||||
|
result,
|
||||||
|
originalWord);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
float spaceWidth = WidthCalculator.calculateAccurateWidth(font, " ", fontSize);
|
float spaceWidth = WidthCalculator.calculateAccurateWidth(font, " ", fontSize);
|
||||||
if (spaceWidth <= 0) {
|
if (spaceWidth <= 0) {
|
||||||
|
log.debug(
|
||||||
|
"createPlaceholderWithWidth: invalid space width, using alternative placeholder");
|
||||||
return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
|
return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
int spaceCount = Math.max(1, Math.round(targetWidth / spaceWidth));
|
int spaceCount = Math.max(1, Math.round(targetWidth / spaceWidth));
|
||||||
int maxSpaces =
|
int maxSpaces =
|
||||||
Math.max(
|
Math.max(
|
||||||
originalWord.length() * 2, Math.round(targetWidth / spaceWidth * 1.5f));
|
originalWord.length() * 2,
|
||||||
return " ".repeat(Math.min(spaceCount, maxSpaces));
|
Math.round(targetWidth / spaceWidth * 1.5f));
|
||||||
|
int finalSpaces = Math.min(spaceCount, maxSpaces);
|
||||||
|
String result = " ".repeat(finalSpaces);
|
||||||
|
|
||||||
|
log.debug(
|
||||||
|
"createPlaceholderWithWidth: calculated {} spaces for '{}' (targetWidth: {}, spaceWidth: {})",
|
||||||
|
finalSpaces,
|
||||||
|
originalWord,
|
||||||
|
targetWidth,
|
||||||
|
spaceWidth);
|
||||||
|
return result;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
|
log.debug("Error calculating space width, using alternative: {}", e.getMessage());
|
||||||
|
String result =
|
||||||
|
createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
|
||||||
|
if (result == null) {
|
||||||
|
return " ".repeat(Math.max(1, originalWord.length()));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Unexpected error in createPlaceholderWithWidth: {}", e.getMessage());
|
||||||
|
return " ".repeat(Math.max(1, originalWord.length()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String createSubsetFontPlaceholder(
|
private String createSubsetFontPlaceholder(
|
||||||
String originalWord, float targetWidth, PDFont font, float fontSize) {
|
String originalWord, float targetWidth, PDFont font, float fontSize) {
|
||||||
try {
|
try {
|
||||||
return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
|
String result = createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
|
||||||
|
if (result == null) {
|
||||||
|
log.warn(
|
||||||
|
"createAlternativePlaceholder returned null in subset font, using fallback");
|
||||||
|
return " ".repeat(Math.max(1, originalWord != null ? originalWord.length() : 1));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return "";
|
log.error("Error in createSubsetFontPlaceholder: {}", e.getMessage());
|
||||||
|
return " ".repeat(Math.max(1, originalWord != null ? originalWord.length() : 1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -785,9 +865,12 @@ public class RedactionService {
|
|||||||
} catch (Exception ignored) {
|
} catch (Exception ignored) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return "";
|
log.debug(
|
||||||
|
"createAlternativePlaceholder: no suitable alternative found, returning spaces");
|
||||||
|
return " ".repeat(Math.max(1, originalWord != null ? originalWord.length() : 1));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return "";
|
log.error("Unexpected error in createAlternativePlaceholder: {}", e.getMessage());
|
||||||
|
return " ".repeat(Math.max(1, originalWord != null ? originalWord.length() : 1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -971,7 +1054,7 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
if (isTextShowingOperator(opName) && i > 0) {
|
if (isTextShowingOperator(opName) && i > 0) {
|
||||||
String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font);
|
String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font);
|
||||||
if (!textContent.isEmpty()) {
|
if (textContent != null && !textContent.trim().isEmpty()) {
|
||||||
if (aggressive
|
if (aggressive
|
||||||
&& gs.font != null
|
&& gs.font != null
|
||||||
&& tokens.get(i - 1) instanceof COSString cs) {
|
&& tokens.get(i - 1) instanceof COSString cs) {
|
||||||
@ -1017,7 +1100,7 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
if (isTextShowingOperator(opName) && i > 0) {
|
if (isTextShowingOperator(opName) && i > 0) {
|
||||||
String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font);
|
String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font);
|
||||||
if (!textContent.isEmpty()) {
|
if (textContent != null && !textContent.trim().isEmpty()) {
|
||||||
segments.add(
|
segments.add(
|
||||||
new TextSegment(
|
new TextSegment(
|
||||||
i - 1,
|
i - 1,
|
||||||
@ -1070,11 +1153,14 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
List<TextSegment> textSegments = extractTextSegments(page, tokens, this.aggressiveMode);
|
List<TextSegment> textSegments = extractTextSegments(page, tokens, this.aggressiveMode);
|
||||||
String completeText = buildCompleteText(textSegments);
|
String completeText = buildCompleteText(textSegments);
|
||||||
List<MatchRange> matches =
|
List<MatchRange> matches;
|
||||||
this.aggressiveMode
|
if (this.aggressiveMode) {
|
||||||
? findAllMatchesAggressive(
|
matches =
|
||||||
textSegments, tokens, targetWords, useRegex, wholeWordSearch)
|
findAllMatchesAggressive(
|
||||||
: findAllMatches(completeText, targetWords, useRegex, wholeWordSearch);
|
textSegments, tokens, targetWords, useRegex, wholeWordSearch);
|
||||||
|
} else {
|
||||||
|
matches = findMatchesInSegments(textSegments, targetWords, useRegex, wholeWordSearch);
|
||||||
|
}
|
||||||
return applyRedactionsToTokens(tokens, textSegments, matches);
|
return applyRedactionsToTokens(tokens, textSegments, matches);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1329,27 +1415,36 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private String applyRedactionsToSegmentText(TextSegment segment, List<MatchRange> matches) {
|
private String applyRedactionsToSegmentText(TextSegment segment, List<MatchRange> matches) {
|
||||||
|
if (segment == null || matches == null || matches.isEmpty()) {
|
||||||
|
return segment != null && segment.getText() != null ? segment.getText() : "";
|
||||||
|
}
|
||||||
|
|
||||||
String text = segment.getText();
|
String text = segment.getText();
|
||||||
if (!this.aggressiveMode
|
if (text == null) return "";
|
||||||
|
|
||||||
|
if (!aggressiveMode
|
||||||
&& segment.getFont() != null
|
&& segment.getFont() != null
|
||||||
&& !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), text)) {
|
&& !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), text)) {
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
StringBuilder result = new StringBuilder(text);
|
StringBuilder result = new StringBuilder(text);
|
||||||
for (MatchRange match : matches) {
|
for (MatchRange match : matches) {
|
||||||
int segmentStart = Math.max(0, match.getStartPos() - segment.getStartPos());
|
int segmentStart = Math.max(0, match.getStartPos() - segment.getStartPos());
|
||||||
int segmentEnd = Math.min(text.length(), match.getEndPos() - segment.getStartPos());
|
int segmentEnd = Math.min(text.length(), match.getEndPos() - segment.getStartPos());
|
||||||
|
|
||||||
if (segmentStart < text.length() && segmentEnd > segmentStart) {
|
if (segmentStart < text.length() && segmentEnd > segmentStart) {
|
||||||
String originalPart = text.substring(segmentStart, segmentEnd);
|
String originalPart = text.substring(segmentStart, segmentEnd);
|
||||||
if (!this.aggressiveMode
|
|
||||||
|
if (!aggressiveMode
|
||||||
&& segment.getFont() != null
|
&& segment.getFont() != null
|
||||||
&& !TextEncodingHelper.isTextSegmentRemovable(
|
&& !TextEncodingHelper.isTextSegmentRemovable(
|
||||||
segment.getFont(), originalPart)) {
|
segment.getFont(), originalPart)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.aggressiveMode) {
|
if (aggressiveMode) {
|
||||||
result.replace(segmentStart, segmentEnd, "");
|
result.replace(segmentStart, segmentEnd, "");
|
||||||
} else {
|
} else {
|
||||||
float originalWidth = 0;
|
float originalWidth = 0;
|
||||||
@ -1359,19 +1454,26 @@ public class RedactionService {
|
|||||||
/ FONT_SCALE_FACTOR
|
/ FONT_SCALE_FACTOR
|
||||||
* segment.getFontSize();
|
* segment.getFontSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
String placeholder =
|
String placeholder =
|
||||||
(originalWidth > 0)
|
originalWidth > 0
|
||||||
? createPlaceholderWithWidth(
|
? createPlaceholderWithWidth(
|
||||||
originalPart,
|
originalPart,
|
||||||
originalWidth,
|
originalWidth,
|
||||||
segment.getFont(),
|
segment.getFont(),
|
||||||
segment.getFontSize())
|
segment.getFontSize())
|
||||||
: createPlaceholderWithFont(originalPart, segment.getFont());
|
: createPlaceholderWithFont(
|
||||||
|
originalPart, segment.getFont());
|
||||||
|
|
||||||
|
if (placeholder == null) placeholder = " ";
|
||||||
result.replace(segmentStart, segmentEnd, placeholder);
|
result.replace(segmentStart, segmentEnd, placeholder);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result.toString();
|
return result.toString();
|
||||||
|
} catch (Exception e) {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<MatchRange> findAllMatchesAggressive(
|
private List<MatchRange> findAllMatchesAggressive(
|
||||||
@ -1569,6 +1671,50 @@ public class RedactionService {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<MatchRange> findMatchesInSegments(
|
||||||
|
List<TextSegment> segments,
|
||||||
|
Set<String> targetWords,
|
||||||
|
boolean useRegex,
|
||||||
|
boolean wholeWordSearch) {
|
||||||
|
List<MatchRange> allMatches = new ArrayList<>();
|
||||||
|
List<Pattern> patterns =
|
||||||
|
TextFinderUtils.createOptimizedSearchPatterns(
|
||||||
|
targetWords, useRegex, wholeWordSearch);
|
||||||
|
|
||||||
|
for (TextSegment segment : segments) {
|
||||||
|
String segmentText = segment.getText();
|
||||||
|
if (segmentText == null || segmentText.isEmpty()) continue;
|
||||||
|
|
||||||
|
if (segment.getFont() != null
|
||||||
|
&& !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), segmentText)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Pattern pattern : patterns) {
|
||||||
|
try {
|
||||||
|
var matcher = pattern.matcher(segmentText);
|
||||||
|
while (matcher.find()) {
|
||||||
|
int matchStart = matcher.start();
|
||||||
|
int matchEnd = matcher.end();
|
||||||
|
|
||||||
|
if (matchStart >= 0
|
||||||
|
&& matchEnd <= segmentText.length()
|
||||||
|
&& matchStart < matchEnd) {
|
||||||
|
allMatches.add(
|
||||||
|
new MatchRange(
|
||||||
|
segment.getStartPos() + matchStart,
|
||||||
|
segment.getStartPos() + matchEnd));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
allMatches.sort(Comparator.comparingInt(MatchRange::getStartPos));
|
||||||
|
return allMatches;
|
||||||
|
}
|
||||||
|
|
||||||
private List<Object> applyRedactionsToTokens(
|
private List<Object> applyRedactionsToTokens(
|
||||||
List<Object> tokens, List<TextSegment> textSegments, List<MatchRange> matches) {
|
List<Object> tokens, List<TextSegment> textSegments, List<MatchRange> matches) {
|
||||||
List<Object> newTokens = new ArrayList<>(tokens);
|
List<Object> newTokens = new ArrayList<>(tokens);
|
||||||
@ -1618,24 +1764,97 @@ public class RedactionService {
|
|||||||
for (Map.Entry<Integer, List<MatchRange>> entry : matchesBySegment.entrySet()) {
|
for (Map.Entry<Integer, List<MatchRange>> entry : matchesBySegment.entrySet()) {
|
||||||
int segmentIndex = entry.getKey();
|
int segmentIndex = entry.getKey();
|
||||||
List<MatchRange> segmentMatches = entry.getValue();
|
List<MatchRange> segmentMatches = entry.getValue();
|
||||||
|
|
||||||
|
if (segmentIndex < 0 || segmentIndex >= textSegments.size()) {
|
||||||
|
log.warn(
|
||||||
|
"Invalid segment index: {} (textSegments size: {})",
|
||||||
|
segmentIndex,
|
||||||
|
textSegments.size());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
TextSegment segment = textSegments.get(segmentIndex);
|
TextSegment segment = textSegments.get(segmentIndex);
|
||||||
|
if (segment == null) {
|
||||||
|
log.warn("Segment is null at index: {}", segmentIndex);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug(
|
||||||
|
"Creating task for segment {} with operator '{}' and {} matches",
|
||||||
|
segmentIndex,
|
||||||
|
segment.operatorName,
|
||||||
|
segmentMatches.size());
|
||||||
|
|
||||||
|
try {
|
||||||
if ("Tj".equals(segment.operatorName) || "'".equals(segment.operatorName)) {
|
if ("Tj".equals(segment.operatorName) || "'".equals(segment.operatorName)) {
|
||||||
String newText = applyRedactionsToSegmentText(segment, segmentMatches);
|
String newText = applyRedactionsToSegmentText(segment, segmentMatches);
|
||||||
float adjustment = 0;
|
if (newText == null) {
|
||||||
adjustment = calculateWidthAdjustment(segment, segmentMatches);
|
log.warn(
|
||||||
|
"applyRedactionsToSegmentText returned null for segment {}, using empty string",
|
||||||
|
segmentIndex);
|
||||||
|
newText = ""; // Ensure it's never null
|
||||||
|
}
|
||||||
|
float adjustment = calculateWidthAdjustment(segment, segmentMatches);
|
||||||
tasks.add(new ModificationTask(segment, newText, adjustment));
|
tasks.add(new ModificationTask(segment, newText, adjustment));
|
||||||
|
log.debug(
|
||||||
|
"Created Tj/' task with newText: '{}' (length: {})",
|
||||||
|
newText,
|
||||||
|
newText.length());
|
||||||
} else if ("TJ".equals(segment.operatorName)) {
|
} else if ("TJ".equals(segment.operatorName)) {
|
||||||
tasks.add(new ModificationTask(segment, null, 0));
|
tasks.add(
|
||||||
|
new ModificationTask(
|
||||||
|
segment, "", 0)); // Use empty string instead of null for TJ
|
||||||
|
log.debug("Created TJ task with empty newText (was null)");
|
||||||
|
} else {
|
||||||
|
log.debug("Skipping segment with operator: {}", segment.operatorName);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error creating task for segment {}: {}", segmentIndex, e.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tasks.sort((a, b) -> Integer.compare(b.segment.tokenIndex, a.segment.tokenIndex));
|
tasks.sort((a, b) -> Integer.compare(b.segment.tokenIndex, a.segment.tokenIndex));
|
||||||
for (ModificationTask task : tasks) {
|
|
||||||
|
int processedCount = 0;
|
||||||
|
int maxTasksToProcess = Math.min(tasks.size(), 1000); // Safety limit
|
||||||
|
|
||||||
|
for (int i = 0; i < maxTasksToProcess && i < tasks.size(); i++) {
|
||||||
|
ModificationTask task = tasks.get(i);
|
||||||
|
try {
|
||||||
List<MatchRange> segmentMatches =
|
List<MatchRange> segmentMatches =
|
||||||
matchesBySegment.getOrDefault(
|
matchesBySegment.getOrDefault(
|
||||||
textSegments.indexOf(task.segment), Collections.emptyList());
|
textSegments.indexOf(task.segment), Collections.emptyList());
|
||||||
|
|
||||||
|
if (task.segment.tokenIndex >= newTokens.size()) {
|
||||||
|
log.debug(
|
||||||
|
"Skipping segment with invalid token index {} (tokens size: {})",
|
||||||
|
task.segment.tokenIndex,
|
||||||
|
newTokens.size());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (task.segment.getText() == null || task.segment.getText().isEmpty()) {
|
||||||
|
log.debug(
|
||||||
|
"Skipping segment with empty text at index {}",
|
||||||
|
task.segment.tokenIndex);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
modifyTokenForRedaction(
|
modifyTokenForRedaction(
|
||||||
newTokens, task.segment, task.newText, task.adjustment, segmentMatches);
|
newTokens, task.segment, task.newText, task.adjustment, segmentMatches);
|
||||||
|
processedCount++;
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn(
|
||||||
|
"Failed to process modification task for segment at {}: {}",
|
||||||
|
task.segment.tokenIndex,
|
||||||
|
e.getMessage());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug(
|
||||||
|
"Successfully processed {} out of {} modification tasks",
|
||||||
|
processedCount,
|
||||||
|
tasks.size());
|
||||||
return newTokens;
|
return newTokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1837,20 +2056,67 @@ public class RedactionService {
|
|||||||
String newText,
|
String newText,
|
||||||
float adjustment,
|
float adjustment,
|
||||||
List<MatchRange> matches) {
|
List<MatchRange> matches) {
|
||||||
if (tokens == null
|
// Defensive null handling
|
||||||
|| segment == null
|
if (tokens == null || segment == null) {
|
||||||
|| newText == null
|
log.warn(
|
||||||
|| !isValidTokenIndex(tokens, segment.tokenIndex)
|
"Invalid input to modifyTokenForRedaction: tokens={}, segment={}",
|
||||||
|| segment.operatorName == null) {
|
tokens == null ? "null" : "valid",
|
||||||
log.warn("Invalid input to modifyTokenForRedaction");
|
segment == null ? "null" : "valid");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle null newText by providing a default
|
||||||
|
if (newText == null) {
|
||||||
|
log.warn("newText is null, providing default empty string");
|
||||||
|
log.warn(
|
||||||
|
"Segment details: tokenIndex={}, operatorName={}, font={}, fontSize={}, text='{}'",
|
||||||
|
segment.tokenIndex,
|
||||||
|
segment.operatorName,
|
||||||
|
segment.getFont() != null ? segment.getFont().getName() : "null",
|
||||||
|
segment.getFontSize(),
|
||||||
|
segment.getText() != null ? segment.getText() : "null");
|
||||||
|
log.warn("This should not happen with the new null safety measures!");
|
||||||
|
newText = ""; // Default to empty string
|
||||||
|
}
|
||||||
|
if (!isValidTokenIndex(tokens, segment.tokenIndex)) {
|
||||||
|
log.warn(
|
||||||
|
"Invalid input to modifyTokenForRedaction: invalid token index {} (tokens size: {})",
|
||||||
|
segment.tokenIndex,
|
||||||
|
tokens.size());
|
||||||
|
log.debug(
|
||||||
|
"Segment details: operator={}, font={}, fontSize={}, startPos={}, endPos={}",
|
||||||
|
segment.operatorName,
|
||||||
|
segment.getFont(),
|
||||||
|
segment.getFontSize(),
|
||||||
|
segment.getStartPos(),
|
||||||
|
segment.getEndPos());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (segment.operatorName == null) {
|
||||||
|
log.warn("Invalid input to modifyTokenForRedaction: operatorName is null");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
Object token = tokens.get(segment.tokenIndex);
|
||||||
|
|
||||||
|
if (token == null) {
|
||||||
|
log.warn("Token at index {} is null, skipping modification", segment.tokenIndex);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isValidTokenForOperator(token, segment.operatorName)) {
|
||||||
|
log.warn(
|
||||||
|
"Token at index {} is not valid for operator {}, skipping modification",
|
||||||
|
segment.tokenIndex,
|
||||||
|
segment.operatorName);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
TokenModificationResult result =
|
TokenModificationResult result =
|
||||||
performTokenModification(
|
performTokenModification(
|
||||||
tokens,
|
tokens,
|
||||||
tokens.get(segment.tokenIndex),
|
token,
|
||||||
segment.operatorName,
|
segment.operatorName,
|
||||||
newText,
|
newText,
|
||||||
adjustment,
|
adjustment,
|
||||||
@ -1860,12 +2126,24 @@ public class RedactionService {
|
|||||||
if (!result.isSuccess()) {
|
if (!result.isSuccess()) {
|
||||||
performFallbackModification(tokens, segment.tokenIndex, newText);
|
performFallbackModification(tokens, segment.tokenIndex, newText);
|
||||||
}
|
}
|
||||||
|
} catch (IndexOutOfBoundsException e) {
|
||||||
|
log.warn(
|
||||||
|
"Token index {} is out of bounds (tokens size: {}), skipping modification",
|
||||||
|
segment.tokenIndex,
|
||||||
|
tokens.size());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error(
|
log.error(
|
||||||
"Token modification failed at index {}: {}",
|
"Token modification failed at index {}: {}",
|
||||||
segment.tokenIndex,
|
segment.tokenIndex,
|
||||||
e.getMessage());
|
e.getMessage());
|
||||||
|
try {
|
||||||
performEmergencyFallback(tokens, segment.tokenIndex);
|
performEmergencyFallback(tokens, segment.tokenIndex);
|
||||||
|
} catch (Exception emergencyError) {
|
||||||
|
log.error(
|
||||||
|
"Emergency fallback also failed at index {}: {}",
|
||||||
|
segment.tokenIndex,
|
||||||
|
emergencyError.getMessage());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1873,6 +2151,18 @@ public class RedactionService {
|
|||||||
return index >= 0 && index < tokens.size();
|
return index >= 0 && index < tokens.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean isValidTokenForOperator(Object token, String operatorName) {
|
||||||
|
if (token == null || operatorName == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return switch (operatorName) {
|
||||||
|
case "Tj", "'", "\"" -> token instanceof COSString;
|
||||||
|
case "TJ" -> token instanceof COSArray;
|
||||||
|
default -> true;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
private COSArray createRedactedTJArray(
|
private COSArray createRedactedTJArray(
|
||||||
COSArray originalArray, TextSegment segment, List<MatchRange> matches) {
|
COSArray originalArray, TextSegment segment, List<MatchRange> matches) {
|
||||||
|
|
||||||
@ -2403,30 +2693,45 @@ public class RedactionService {
|
|||||||
return totalMods;
|
return totalMods;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class WidthCalculationResult {
|
private List<TextSegment> extractTextSegmentsFromXObject(
|
||||||
private final float adjustment;
|
PDResources resources, List<Object> tokens) {
|
||||||
private final int processedMatches;
|
List<TextSegment> segments = new ArrayList<>();
|
||||||
private final List<String> warnings;
|
int currentTextPos = 0;
|
||||||
|
GraphicsState gs = new GraphicsState();
|
||||||
public WidthCalculationResult(
|
for (int i = 0; i < tokens.size(); i++) {
|
||||||
float adjustment, int processedMatches, List<String> warnings) {
|
Object currentToken = tokens.get(i);
|
||||||
this.adjustment = adjustment;
|
if (currentToken instanceof Operator op) {
|
||||||
this.processedMatches = processedMatches;
|
String opName = op.getName();
|
||||||
this.warnings = new ArrayList<>(warnings);
|
if ("Tf".equals(opName) && i >= 2) {
|
||||||
|
try {
|
||||||
|
COSName fontName = (COSName) tokens.get(i - 2);
|
||||||
|
COSBase fontSizeBase = (COSBase) tokens.get(i - 1);
|
||||||
|
if (fontSizeBase instanceof COSNumber cosNumber) {
|
||||||
|
gs.setFont(resources.getFont(fontName));
|
||||||
|
gs.setFontSize(cosNumber.floatValue());
|
||||||
}
|
}
|
||||||
|
} catch (ClassCastException | IOException ignored) {
|
||||||
public float getAdjustment() {
|
|
||||||
return adjustment;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getProcessedMatches() {
|
|
||||||
return processedMatches;
|
|
||||||
}
|
}
|
||||||
|
if (isTextShowingOperator(opName) && i > 0) {
|
||||||
public List<String> getWarnings() {
|
String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font);
|
||||||
return new ArrayList<>(warnings);
|
if (textContent != null && !textContent.trim().isEmpty()) {
|
||||||
|
segments.add(
|
||||||
|
new TextSegment(
|
||||||
|
i - 1,
|
||||||
|
opName,
|
||||||
|
textContent,
|
||||||
|
currentTextPos,
|
||||||
|
currentTextPos + textContent.length(),
|
||||||
|
gs.font,
|
||||||
|
gs.fontSize));
|
||||||
|
currentTextPos += textContent.length();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return segments;
|
||||||
|
}
|
||||||
|
|
||||||
private int wipeAllTextInFormXObject(PDDocument document, PDFormXObject formXObject)
|
private int wipeAllTextInFormXObject(PDDocument document, PDFormXObject formXObject)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
@ -2485,25 +2790,20 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class TokenModificationResult {
|
private static class WidthCalculationResult {
|
||||||
private final boolean success;
|
@Getter private final float adjustment;
|
||||||
private final String errorMessage;
|
@Getter private final int processedMatches;
|
||||||
|
private final List<String> warnings;
|
||||||
|
|
||||||
private TokenModificationResult(boolean success, String errorMessage) {
|
public WidthCalculationResult(
|
||||||
this.success = success;
|
float adjustment, int processedMatches, List<String> warnings) {
|
||||||
this.errorMessage = errorMessage;
|
this.adjustment = adjustment;
|
||||||
|
this.processedMatches = processedMatches;
|
||||||
|
this.warnings = new ArrayList<>(warnings);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static TokenModificationResult success() {
|
public List<String> getWarnings() {
|
||||||
return new TokenModificationResult(true, null);
|
return new ArrayList<>(warnings);
|
||||||
}
|
|
||||||
|
|
||||||
public static TokenModificationResult failure(String errorMessage) {
|
|
||||||
return new TokenModificationResult(false, errorMessage);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isSuccess() {
|
|
||||||
return success;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2556,44 +2856,22 @@ public class RedactionService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<TextSegment> extractTextSegmentsFromXObject(
|
private static class TokenModificationResult {
|
||||||
PDResources resources, List<Object> tokens) {
|
@Getter private final boolean success;
|
||||||
List<TextSegment> segments = new ArrayList<>();
|
private final String errorMessage;
|
||||||
int currentTextPos = 0;
|
|
||||||
GraphicsState gs = new GraphicsState();
|
private TokenModificationResult(boolean success, String errorMessage) {
|
||||||
for (int i = 0; i < tokens.size(); i++) {
|
this.success = success;
|
||||||
Object currentToken = tokens.get(i);
|
this.errorMessage = errorMessage;
|
||||||
if (currentToken instanceof Operator op) {
|
|
||||||
String opName = op.getName();
|
|
||||||
if ("Tf".equals(opName) && i >= 2) {
|
|
||||||
try {
|
|
||||||
COSName fontName = (COSName) tokens.get(i - 2);
|
|
||||||
COSBase fontSizeBase = (COSBase) tokens.get(i - 1);
|
|
||||||
if (fontSizeBase instanceof COSNumber cosNumber) {
|
|
||||||
gs.setFont(resources.getFont(fontName));
|
|
||||||
gs.setFontSize(cosNumber.floatValue());
|
|
||||||
}
|
}
|
||||||
} catch (ClassCastException | IOException ignored) {
|
|
||||||
|
public static TokenModificationResult success() {
|
||||||
|
return new TokenModificationResult(true, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static TokenModificationResult failure(String errorMessage) {
|
||||||
|
return new TokenModificationResult(false, errorMessage);
|
||||||
}
|
}
|
||||||
if (isTextShowingOperator(opName) && i > 0) {
|
|
||||||
String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font);
|
|
||||||
if (!textContent.isEmpty()) {
|
|
||||||
segments.add(
|
|
||||||
new TextSegment(
|
|
||||||
i - 1,
|
|
||||||
opName,
|
|
||||||
textContent,
|
|
||||||
currentTextPos,
|
|
||||||
currentTextPos + textContent.length(),
|
|
||||||
gs.font,
|
|
||||||
gs.fontSize));
|
|
||||||
currentTextPos += textContent.length();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return segments;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
|
@ -1,7 +1,14 @@
|
|||||||
package stirling.software.SPDF.utils.text;
|
package stirling.software.SPDF.utils.text;
|
||||||
|
|
||||||
|
import java.text.Normalizer;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||||
|
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
||||||
|
|
||||||
import lombok.experimental.UtilityClass;
|
import lombok.experimental.UtilityClass;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
@ -11,127 +18,572 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
public class WidthCalculator {
|
public class WidthCalculator {
|
||||||
|
|
||||||
private final int FONT_SCALE_FACTOR = 1000;
|
private final int FONT_SCALE_FACTOR = 1000;
|
||||||
|
private final float CONSERVATIVE_CHAR_WIDTH_RATIO = 0.55f;
|
||||||
|
private final float BBOX_CHAR_WIDTH_RATIO = 0.65f;
|
||||||
|
|
||||||
|
private final Map<String, Float> widthCache = new ConcurrentHashMap<>();
|
||||||
|
private final Map<String, Boolean> reliabilityCache = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
private String createCacheKey(PDFont font, String text, float fontSize) {
|
||||||
|
return String.format("%s|%s|%.2f", font.getName(), text, fontSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String createReliabilityCacheKey(PDFont font) {
|
||||||
|
return font.getName();
|
||||||
|
}
|
||||||
|
|
||||||
public float calculateAccurateWidth(PDFont font, String text, float fontSize) {
|
public float calculateAccurateWidth(PDFont font, String text, float fontSize) {
|
||||||
if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
|
return calculateAccurateWidth(font, text, fontSize, true);
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!TextEncodingHelper.canEncodeCharacters(font, text)) {
|
public float calculateAccurateWidth(
|
||||||
log.debug(
|
PDFont font, String text, float fontSize, boolean useCache) {
|
||||||
"Text cannot be encoded by font {}, using fallback width calculation",
|
if (font == null || text == null || text.isEmpty() || fontSize <= 0) return 0;
|
||||||
font.getName());
|
|
||||||
return calculateFallbackWidth(font, text, fontSize);
|
if (useCache) {
|
||||||
|
String cacheKey = createCacheKey(font, text, fontSize);
|
||||||
|
Float cachedWidth = widthCache.get(cacheKey);
|
||||||
|
if (cachedWidth != null) return cachedWidth;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String normalizedText = normalizeText(text);
|
||||||
|
|
||||||
|
Float directWidth = calculateDirectWidth(font, normalizedText, fontSize);
|
||||||
|
if (directWidth != null) {
|
||||||
|
if (useCache) widthCache.put(createCacheKey(font, text, fontSize), directWidth);
|
||||||
|
return directWidth;
|
||||||
|
}
|
||||||
|
|
||||||
|
Float charByCharWidth = calculateCharacterByCharacterWidth(font, normalizedText, fontSize);
|
||||||
|
if (charByCharWidth != null) {
|
||||||
|
if (useCache) widthCache.put(createCacheKey(font, text, fontSize), charByCharWidth);
|
||||||
|
return charByCharWidth;
|
||||||
|
}
|
||||||
|
|
||||||
|
Float glyphWidth = calculateGlyphBasedWidth(font, normalizedText, fontSize);
|
||||||
|
if (glyphWidth != null) {
|
||||||
|
if (useCache) widthCache.put(createCacheKey(font, text, fontSize), glyphWidth);
|
||||||
|
return glyphWidth;
|
||||||
|
}
|
||||||
|
|
||||||
|
float fallbackWidth = calculateComprehensiveFallbackWidth(font, normalizedText, fontSize);
|
||||||
|
if (useCache) widthCache.put(createCacheKey(font, text, fontSize), fallbackWidth);
|
||||||
|
return fallbackWidth;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String normalizeText(String text) {
|
||||||
|
return Normalizer.normalize(text, Normalizer.Form.NFC);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Float calculateDirectWidth(PDFont font, String text, float fontSize) {
|
||||||
|
if (!TextEncodingHelper.canEncodeCharacters(font, text)) return null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
float rawWidth = font.getStringWidth(text);
|
float rawWidth = font.getStringWidth(text);
|
||||||
float scaledWidth = (rawWidth / FONT_SCALE_FACTOR) * fontSize;
|
float scaledWidth = (rawWidth / FONT_SCALE_FACTOR) * fontSize;
|
||||||
|
return rawWidth >= 0 && scaledWidth >= 0 ? scaledWidth : null;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
log.debug(
|
private Float calculateCharacterByCharacterWidth(PDFont font, String text, float fontSize) {
|
||||||
"Direct width calculation successful for font {}: {} -> {}",
|
try {
|
||||||
font.getName(),
|
List<Integer> codePoints = getCodePoints(text);
|
||||||
rawWidth,
|
float totalWidth = 0;
|
||||||
scaledWidth);
|
int previousCodePoint = -1;
|
||||||
return scaledWidth;
|
|
||||||
|
for (int codePoint : codePoints) {
|
||||||
|
String character = new String(Character.toChars(codePoint));
|
||||||
|
Float charWidth = calculateSingleCharacterWidth(font, character, fontSize);
|
||||||
|
if (charWidth == null) return null;
|
||||||
|
|
||||||
|
totalWidth += charWidth;
|
||||||
|
if (previousCodePoint != -1) {
|
||||||
|
totalWidth += calculateKerning(font, previousCodePoint, codePoint, fontSize);
|
||||||
|
}
|
||||||
|
previousCodePoint = codePoint;
|
||||||
|
}
|
||||||
|
return totalWidth;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Integer> getCodePoints(String text) {
|
||||||
|
List<Integer> codePoints = new ArrayList<>();
|
||||||
|
for (int i = 0; i < text.length(); ) {
|
||||||
|
int codePoint = text.codePointAt(i);
|
||||||
|
codePoints.add(codePoint);
|
||||||
|
i += Character.charCount(codePoint);
|
||||||
|
}
|
||||||
|
return codePoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Float calculateSingleCharacterWidth(PDFont font, String character, float fontSize) {
|
||||||
|
try {
|
||||||
|
byte[] encoded = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
encoded = font.encode(character);
|
||||||
|
if (encoded.length == 0) encoded = null;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Direct encoding failed for '{}': {}", character, e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (encoded == null && font instanceof PDType0Font) {
|
||||||
|
try {
|
||||||
|
encoded = character.getBytes("UTF-8");
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("UTF-8 encoding failed for '{}': {}", character, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (encoded != null && encoded.length > 0) {
|
||||||
|
Float width = calculateGlyphWidth(font, encoded, fontSize);
|
||||||
|
if (width != null && width >= 0) return width;
|
||||||
|
}
|
||||||
|
|
||||||
|
return calculateAverageCharacterWidth(font, fontSize);
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.debug(
|
log.debug(
|
||||||
"Direct width calculation failed for font {}: {}",
|
"Single character width calculation failed for '{}': {}",
|
||||||
font.getName(),
|
character,
|
||||||
e.getMessage());
|
e.getMessage());
|
||||||
return calculateWidthWithCharacterIteration(font, text, fontSize);
|
return calculateAverageCharacterWidth(font, fontSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private float calculateWidthWithCharacterIteration(PDFont font, String text, float fontSize) {
|
private Float calculateGlyphWidth(PDFont font, byte[] encoded, float fontSize) {
|
||||||
|
for (byte b : encoded) {
|
||||||
|
try {
|
||||||
|
int glyphCode = b & 0xFF;
|
||||||
|
float glyphWidth = font.getWidth(glyphCode);
|
||||||
|
|
||||||
|
if (glyphWidth > 0) {
|
||||||
|
return (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try alternative width methods
|
||||||
|
try {
|
||||||
|
glyphWidth = font.getWidthFromFont(glyphCode);
|
||||||
|
if (glyphWidth > 0) {
|
||||||
|
return (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug(
|
||||||
|
"getWidthFromFont failed for glyph {}: {}", glyphCode, e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Glyph width calculation failed for byte {}: {}", b, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private float calculateKerning(
|
||||||
|
PDFont font, int leftCodePoint, int rightCodePoint, float fontSize) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Float calculateGlyphBasedWidth(PDFont font, String text, float fontSize) {
|
||||||
try {
|
try {
|
||||||
float totalWidth = 0;
|
float totalWidth = 0;
|
||||||
|
|
||||||
for (int i = 0; i < text.length(); i++) {
|
for (int i = 0; i < text.length(); ) {
|
||||||
String character = text.substring(i, i + 1);
|
int codePoint = text.codePointAt(i);
|
||||||
try {
|
String character = new String(Character.toChars(codePoint));
|
||||||
byte[] encoded = font.encode(character);
|
|
||||||
if (encoded.length > 0) {
|
|
||||||
int glyphCode = encoded[0] & 0xFF;
|
|
||||||
float glyphWidth = font.getWidth(glyphCode);
|
|
||||||
|
|
||||||
if (glyphWidth == 0) {
|
// Try to get glyph information more comprehensively
|
||||||
try {
|
Float charWidth =
|
||||||
glyphWidth = font.getWidthFromFont(glyphCode);
|
calculateGlyphWidthComprehensively(font, character, codePoint, fontSize);
|
||||||
} catch (Exception e2) {
|
if (charWidth == null) {
|
||||||
glyphWidth = font.getAverageFontWidth();
|
return null;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
totalWidth += (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
|
totalWidth += charWidth;
|
||||||
} else {
|
i += Character.charCount(codePoint);
|
||||||
totalWidth += (font.getAverageFontWidth() / FONT_SCALE_FACTOR) * fontSize;
|
|
||||||
}
|
|
||||||
} catch (Exception e2) {
|
|
||||||
totalWidth += (font.getAverageFontWidth() / FONT_SCALE_FACTOR) * fontSize;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log.debug("Character iteration width calculation: {}", totalWidth);
|
log.debug("Glyph-based width calculation: {}", totalWidth);
|
||||||
return totalWidth;
|
return totalWidth;
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.debug("Character iteration failed: {}", e.getMessage());
|
log.debug("Glyph-based calculation failed: {}", e.getMessage());
|
||||||
return calculateFallbackWidth(font, text, fontSize);
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private float calculateFallbackWidth(PDFont font, String text, float fontSize) {
|
private Float calculateGlyphWidthComprehensively(
|
||||||
|
PDFont font, String character, int codePoint, float fontSize) {
|
||||||
try {
|
try {
|
||||||
|
// Method 1: Try standard encoding
|
||||||
|
try {
|
||||||
|
byte[] encoded = font.encode(character);
|
||||||
|
if (encoded.length > 0) {
|
||||||
|
Float width = calculateWidthFromEncodedBytes(font, encoded, fontSize);
|
||||||
|
if (width != null && width >= 0) {
|
||||||
|
return width;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug(
|
||||||
|
"Standard encoding failed for U+{}: {}",
|
||||||
|
Integer.toHexString(codePoint),
|
||||||
|
e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Method 2: Try Unicode code point directly
|
||||||
|
try {
|
||||||
|
float glyphWidth = font.getWidth(codePoint);
|
||||||
|
if (glyphWidth > 0) {
|
||||||
|
return (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug(
|
||||||
|
"Unicode code point width failed for U+{}: {}",
|
||||||
|
Integer.toHexString(codePoint),
|
||||||
|
e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Method 3: Character category based estimation
|
||||||
|
return calculateCategoryBasedWidth(font, codePoint, fontSize);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Comprehensive glyph width calculation failed: {}", e.getMessage());
|
||||||
|
return calculateAverageCharacterWidth(font, fontSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Float calculateWidthFromEncodedBytes(PDFont font, byte[] encoded, float fontSize) {
|
||||||
|
// Try each byte as a potential glyph code
|
||||||
|
for (byte b : encoded) {
|
||||||
|
try {
|
||||||
|
int glyphCode = b & 0xFF;
|
||||||
|
float width = font.getWidth(glyphCode);
|
||||||
|
if (width > 0) {
|
||||||
|
return (width / FONT_SCALE_FACTOR) * fontSize;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
// Continue trying other bytes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try multi-byte interpretation for Unicode fonts
|
||||||
|
if (encoded.length >= 2 && font instanceof PDType0Font) {
|
||||||
|
try {
|
||||||
|
int glyphCode = ((encoded[0] & 0xFF) << 8) | (encoded[1] & 0xFF);
|
||||||
|
float width = font.getWidth(glyphCode);
|
||||||
|
if (width > 0) {
|
||||||
|
return (width / FONT_SCALE_FACTOR) * fontSize;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Multi-byte glyph code interpretation failed: {}", e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Float calculateCategoryBasedWidth(PDFont font, int codePoint, float fontSize) {
|
||||||
|
try {
|
||||||
|
int category = Character.getType(codePoint);
|
||||||
|
float baseWidth = calculateAverageCharacterWidth(font, fontSize);
|
||||||
|
|
||||||
|
// Adjust width based on character category
|
||||||
|
float multiplier =
|
||||||
|
switch (category) {
|
||||||
|
case Character.UPPERCASE_LETTER -> 1.2f;
|
||||||
|
case Character.LOWERCASE_LETTER -> 1.0f;
|
||||||
|
case Character.DECIMAL_DIGIT_NUMBER -> 1.0f;
|
||||||
|
case Character.SPACE_SEPARATOR -> 0.5f;
|
||||||
|
case Character.DASH_PUNCTUATION -> 0.8f;
|
||||||
|
case Character.OTHER_PUNCTUATION -> 0.6f;
|
||||||
|
case Character.CURRENCY_SYMBOL -> 1.1f;
|
||||||
|
case Character.MATH_SYMBOL -> 1.0f;
|
||||||
|
case Character.MODIFIER_LETTER -> 0.7f;
|
||||||
|
case Character.NON_SPACING_MARK -> 0.0f; // Combining characters
|
||||||
|
case Character.ENCLOSING_MARK -> 0.0f;
|
||||||
|
case Character.COMBINING_SPACING_MARK -> 0.3f;
|
||||||
|
default -> 1.0f;
|
||||||
|
};
|
||||||
|
|
||||||
|
return baseWidth * multiplier;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Category-based width calculation failed: {}", e.getMessage());
|
||||||
|
return calculateAverageCharacterWidth(font, fontSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private float calculateAverageCharacterWidth(PDFont font, float fontSize) {
|
||||||
|
try {
|
||||||
|
float avgWidth = font.getAverageFontWidth();
|
||||||
|
return (avgWidth / FONT_SCALE_FACTOR) * fontSize;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Average character width calculation failed: {}", e.getMessage());
|
||||||
|
return CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private float calculateComprehensiveFallbackWidth(PDFont font, String text, float fontSize) {
|
||||||
|
try {
|
||||||
|
// Strategy 1: Use font bounding box with character analysis
|
||||||
if (font.getFontDescriptor() != null
|
if (font.getFontDescriptor() != null
|
||||||
&& font.getFontDescriptor().getFontBoundingBox() != null) {
|
&& font.getFontDescriptor().getFontBoundingBox() != null) {
|
||||||
|
|
||||||
PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox();
|
PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox();
|
||||||
float avgCharWidth =
|
float avgCharWidth = bbox.getWidth() / FONT_SCALE_FACTOR;
|
||||||
bbox.getWidth() / FONT_SCALE_FACTOR * 0.6f; // Conservative estimate
|
|
||||||
float fallbackWidth = text.length() * avgCharWidth * fontSize;
|
|
||||||
|
|
||||||
log.debug("Bounding box fallback width: {}", fallbackWidth);
|
// Analyze text composition for better estimation
|
||||||
return fallbackWidth;
|
float adjustedWidth = analyzeTextComposition(text, avgCharWidth, fontSize);
|
||||||
|
log.debug("Bounding box based fallback width: {}", adjustedWidth);
|
||||||
|
return adjustedWidth;
|
||||||
}
|
}
|
||||||
|
|
||||||
float avgWidth = font.getAverageFontWidth();
|
// Strategy 2: Enhanced average width calculation
|
||||||
float fallbackWidth = (text.length() * avgWidth / FONT_SCALE_FACTOR) * fontSize;
|
float enhancedAverage = calculateEnhancedAverageWidth(font, text, fontSize);
|
||||||
|
log.debug("Enhanced average fallback width: {}", enhancedAverage);
|
||||||
log.debug("Average width fallback: {}", fallbackWidth);
|
return enhancedAverage;
|
||||||
return fallbackWidth;
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
float conservativeWidth = text.length() * 0.5f * fontSize;
|
// Ultimate fallback
|
||||||
log.debug(
|
float conservativeWidth = text.length() * CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
|
||||||
"Conservative fallback width for font {}: {}",
|
log.debug("Conservative fallback width: {}", conservativeWidth);
|
||||||
font.getName(),
|
|
||||||
conservativeWidth);
|
|
||||||
return conservativeWidth;
|
return conservativeWidth;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private float analyzeTextComposition(String text, float avgCharWidth, float fontSize) {
|
||||||
|
float totalWidth = 0;
|
||||||
|
int spaceCount = 0;
|
||||||
|
int upperCount = 0;
|
||||||
|
int lowerCount = 0;
|
||||||
|
int digitCount = 0;
|
||||||
|
int punctCount = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < text.length(); ) {
|
||||||
|
int codePoint = text.codePointAt(i);
|
||||||
|
int category = Character.getType(codePoint);
|
||||||
|
|
||||||
|
switch (category) {
|
||||||
|
case Character.SPACE_SEPARATOR -> {
|
||||||
|
spaceCount++;
|
||||||
|
totalWidth += avgCharWidth * 0.5f * fontSize;
|
||||||
|
}
|
||||||
|
case Character.UPPERCASE_LETTER -> {
|
||||||
|
upperCount++;
|
||||||
|
totalWidth += avgCharWidth * 1.2f * fontSize;
|
||||||
|
}
|
||||||
|
case Character.LOWERCASE_LETTER -> {
|
||||||
|
lowerCount++;
|
||||||
|
totalWidth += avgCharWidth * 1.0f * fontSize;
|
||||||
|
}
|
||||||
|
case Character.DECIMAL_DIGIT_NUMBER -> {
|
||||||
|
digitCount++;
|
||||||
|
totalWidth += avgCharWidth * 1.0f * fontSize;
|
||||||
|
}
|
||||||
|
case Character.OTHER_PUNCTUATION, Character.DASH_PUNCTUATION -> {
|
||||||
|
punctCount++;
|
||||||
|
totalWidth += avgCharWidth * 0.7f * fontSize;
|
||||||
|
}
|
||||||
|
default -> totalWidth += avgCharWidth * BBOX_CHAR_WIDTH_RATIO * fontSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
i += Character.charCount(codePoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log composition analysis for debugging
|
||||||
|
log.debug(
|
||||||
|
"Text composition analysis - Spaces: {}, Upper: {}, Lower: {}, Digits: {}, Punct: {}",
|
||||||
|
spaceCount,
|
||||||
|
upperCount,
|
||||||
|
lowerCount,
|
||||||
|
digitCount,
|
||||||
|
punctCount);
|
||||||
|
|
||||||
|
return totalWidth;
|
||||||
|
}
|
||||||
|
|
||||||
|
private float calculateEnhancedAverageWidth(PDFont font, String text, float fontSize) {
|
||||||
|
try {
|
||||||
|
float baseAverage = font.getAverageFontWidth();
|
||||||
|
|
||||||
|
// Try to get more specific metrics
|
||||||
|
float capHeight = 0;
|
||||||
|
float xHeight = 0;
|
||||||
|
|
||||||
|
if (font.getFontDescriptor() != null) {
|
||||||
|
capHeight = font.getFontDescriptor().getCapHeight();
|
||||||
|
xHeight = font.getFontDescriptor().getXHeight();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use metrics to adjust the average width estimation
|
||||||
|
float adjustmentFactor = 1.0f;
|
||||||
|
if (capHeight > 0 && xHeight > 0) {
|
||||||
|
adjustmentFactor = Math.max(0.8f, Math.min(1.2f, xHeight / capHeight));
|
||||||
|
}
|
||||||
|
|
||||||
|
float adjustedAverage = (baseAverage * adjustmentFactor / FONT_SCALE_FACTOR) * fontSize;
|
||||||
|
return text.length() * adjustedAverage;
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Enhanced average width calculation failed: {}", e.getMessage());
|
||||||
|
return text.length() * CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isWidthCalculationReliable(PDFont font) {
|
public boolean isWidthCalculationReliable(PDFont font) {
|
||||||
if (font == null) {
|
if (font == null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check cache first
|
||||||
|
String cacheKey = createReliabilityCacheKey(font);
|
||||||
|
Boolean cachedResult = reliabilityCache.get(cacheKey);
|
||||||
|
if (cachedResult != null) {
|
||||||
|
log.debug(
|
||||||
|
"Using cached reliability result for font {}: {}",
|
||||||
|
font.getName(),
|
||||||
|
cachedResult);
|
||||||
|
return cachedResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean result = performReliabilityCheck(font);
|
||||||
|
|
||||||
|
// Cache the result
|
||||||
|
reliabilityCache.put(cacheKey, result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean performReliabilityCheck(PDFont font) {
|
||||||
|
try {
|
||||||
|
// Check if font is damaged
|
||||||
if (font.isDamaged()) {
|
if (font.isDamaged()) {
|
||||||
log.debug("Font {} is damaged", font.getName());
|
log.debug("Font {} is damaged", font.getName());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check basic width calculation capability
|
||||||
if (!TextEncodingHelper.canCalculateBasicWidths(font)) {
|
if (!TextEncodingHelper.canCalculateBasicWidths(font)) {
|
||||||
log.debug("Font {} cannot perform basic width calculations", font.getName());
|
log.debug("Font {} cannot perform basic width calculations", font.getName());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (TextEncodingHelper.hasCustomEncoding(font)) {
|
// Test with a simple character
|
||||||
log.debug("Font {} has custom encoding", font.getName());
|
try {
|
||||||
|
font.getStringWidth("A");
|
||||||
|
return true;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Font {} failed basic width test: {}", font.getName(), e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we can at least get average width
|
||||||
|
try {
|
||||||
|
float avgWidth = font.getAverageFontWidth();
|
||||||
|
return avgWidth > 0;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug(
|
||||||
|
"Font {} cannot provide average width: {}", font.getName(), e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Reliability check failed for font {}: {}", font.getName(), e.getMessage());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public float calculateCharacterWidth(PDFont font, String character, float fontSize) {
|
||||||
|
if (font == null || character == null || character.isEmpty() || fontSize <= 0) return 0;
|
||||||
|
|
||||||
|
String cacheKey = createCacheKey(font, character, fontSize);
|
||||||
|
Float cachedWidth = widthCache.get(cacheKey);
|
||||||
|
if (cachedWidth != null) return cachedWidth;
|
||||||
|
|
||||||
|
Float width = calculateSingleCharacterWidth(font, character, fontSize);
|
||||||
|
if (width == null) width = calculateAverageCharacterWidth(font, fontSize);
|
||||||
|
|
||||||
|
widthCache.put(cacheKey, width);
|
||||||
|
return width;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String createWidthMatchingPlaceholder(
|
||||||
|
String originalText,
|
||||||
|
float targetWidth,
|
||||||
|
PDFont font,
|
||||||
|
float fontSize,
|
||||||
|
String placeholderChar) {
|
||||||
|
if (originalText == null || originalText.isEmpty() || targetWidth <= 0) return "";
|
||||||
|
|
||||||
|
if (placeholderChar == null || placeholderChar.isEmpty()) placeholderChar = " ";
|
||||||
|
|
||||||
|
try {
|
||||||
|
float placeholderCharWidth = calculateCharacterWidth(font, placeholderChar, fontSize);
|
||||||
|
if (placeholderCharWidth <= 0) {
|
||||||
|
return " ".repeat(Math.max(1, originalText.length()));
|
||||||
|
}
|
||||||
|
|
||||||
|
int placeholderCount = Math.max(1, Math.round(targetWidth / placeholderCharWidth));
|
||||||
|
int originalLength = originalText.length();
|
||||||
|
int maxReasonableLength = Math.max(originalLength * 3, Math.max(placeholderCount, 10));
|
||||||
|
placeholderCount = Math.min(placeholderCount, maxReasonableLength);
|
||||||
|
placeholderCount = Math.max(1, placeholderCount);
|
||||||
|
|
||||||
|
return placeholderChar.repeat(placeholderCount);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
return " ".repeat(Math.max(1, originalText.length()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean canCalculateTextWidth(PDFont font, String text) {
|
||||||
|
if (font == null || text == null || text.isEmpty()) return false;
|
||||||
|
if (!isWidthCalculationReliable(font)) return false;
|
||||||
|
|
||||||
|
List<Integer> codePoints = getCodePoints(text);
|
||||||
|
int testSampleSize = Math.min(5, codePoints.size());
|
||||||
|
|
||||||
|
for (int i = 0; i < testSampleSize; i++) {
|
||||||
|
int codePoint = codePoints.get(i);
|
||||||
|
String character = new String(Character.toChars(codePoint));
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!TextEncodingHelper.canEncodeCharacters(font, character)) {
|
||||||
|
log.debug(
|
||||||
|
"Cannot encode character U+{} in text '{}'",
|
||||||
|
Integer.toHexString(codePoint),
|
||||||
|
text);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
float width = calculateCharacterWidth(font, character, 12.0f);
|
||||||
|
if (width <= 0) {
|
||||||
|
log.debug(
|
||||||
|
"Character U+{} has invalid width: {}",
|
||||||
|
Integer.toHexString(codePoint),
|
||||||
|
width);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug(
|
||||||
|
"Error testing character U+{}: {}",
|
||||||
|
Integer.toHexString(codePoint),
|
||||||
|
e.getMessage());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void clearWidthCache() {
|
||||||
|
widthCache.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void clearReliabilityCache() {
|
||||||
|
reliabilityCache.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user