mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
enhance placeholder creation and width calculation with improved error handling and logging
Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
8c6aa246a7
commit
04d9b6ace2
@ -41,6 +41,7 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@ -704,55 +705,134 @@ public class RedactionService {
|
||||
}
|
||||
|
||||
String createPlaceholderWithFont(String originalWord, PDFont font) {
|
||||
try {
|
||||
if (originalWord == null || originalWord.isEmpty()) {
|
||||
return originalWord;
|
||||
log.debug(
|
||||
"createPlaceholderWithFont: originalWord is null or empty, returning space");
|
||||
return " ";
|
||||
}
|
||||
|
||||
if (font != null && TextEncodingHelper.isFontSubset(font.getName())) {
|
||||
try {
|
||||
float originalWidth = safeGetStringWidth(font, originalWord) / FONT_SCALE_FACTOR;
|
||||
return createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f);
|
||||
float originalWidth =
|
||||
safeGetStringWidth(font, originalWord) / FONT_SCALE_FACTOR;
|
||||
String result =
|
||||
createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f);
|
||||
if (result == null) {
|
||||
log.warn("createAlternativePlaceholder returned null, using fallback");
|
||||
return " ".repeat(Math.max(1, originalWord.length()));
|
||||
}
|
||||
return result;
|
||||
} catch (Exception e) {
|
||||
return "";
|
||||
log.debug(
|
||||
"Error in createPlaceholderWithFont subset logic: {}", e.getMessage());
|
||||
return " ".repeat(Math.max(1, originalWord.length()));
|
||||
}
|
||||
}
|
||||
return " ".repeat(originalWord.length());
|
||||
|
||||
int length = Math.max(1, originalWord.length());
|
||||
String result = " ".repeat(length);
|
||||
log.debug("createPlaceholderWithFont: returning '{}' for '{}'", result, originalWord);
|
||||
return result;
|
||||
} catch (Exception e) {
|
||||
log.error("Unexpected error in createPlaceholderWithFont: {}", e.getMessage());
|
||||
return " ";
|
||||
}
|
||||
}
|
||||
|
||||
String createPlaceholderWithWidth(
|
||||
String originalWord, float targetWidth, PDFont font, float fontSize) {
|
||||
try {
|
||||
if (originalWord == null || originalWord.isEmpty()) {
|
||||
return originalWord;
|
||||
log.debug(
|
||||
"createPlaceholderWithWidth: originalWord is null or empty, returning space");
|
||||
return " ";
|
||||
}
|
||||
if (font == null || fontSize <= 0) {
|
||||
return " ".repeat(originalWord.length());
|
||||
int length = Math.max(1, originalWord.length());
|
||||
String result = " ".repeat(length);
|
||||
log.debug(
|
||||
"createPlaceholderWithWidth: invalid font/size, returning '{}' for '{}'",
|
||||
result,
|
||||
originalWord);
|
||||
return result;
|
||||
}
|
||||
try {
|
||||
|
||||
if (!WidthCalculator.isWidthCalculationReliable(font)) {
|
||||
return " ".repeat(originalWord.length());
|
||||
int length = Math.max(1, originalWord.length());
|
||||
String result = " ".repeat(length);
|
||||
log.debug(
|
||||
"createPlaceholderWithWidth: font not reliable, returning '{}' for '{}'",
|
||||
result,
|
||||
originalWord);
|
||||
return result;
|
||||
}
|
||||
|
||||
if (TextEncodingHelper.isFontSubset(font.getName())) {
|
||||
return createSubsetFontPlaceholder(originalWord, targetWidth, font, fontSize);
|
||||
String result =
|
||||
createSubsetFontPlaceholder(originalWord, targetWidth, font, fontSize);
|
||||
if (result == null) {
|
||||
log.warn("createSubsetFontPlaceholder returned null, using fallback");
|
||||
return " ".repeat(Math.max(1, originalWord.length()));
|
||||
}
|
||||
log.debug(
|
||||
"createPlaceholderWithWidth: subset font, returning '{}' for '{}'",
|
||||
result,
|
||||
originalWord);
|
||||
return result;
|
||||
}
|
||||
|
||||
try {
|
||||
float spaceWidth = WidthCalculator.calculateAccurateWidth(font, " ", fontSize);
|
||||
if (spaceWidth <= 0) {
|
||||
log.debug(
|
||||
"createPlaceholderWithWidth: invalid space width, using alternative placeholder");
|
||||
return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
|
||||
}
|
||||
|
||||
int spaceCount = Math.max(1, Math.round(targetWidth / spaceWidth));
|
||||
int maxSpaces =
|
||||
Math.max(
|
||||
originalWord.length() * 2, Math.round(targetWidth / spaceWidth * 1.5f));
|
||||
return " ".repeat(Math.min(spaceCount, maxSpaces));
|
||||
originalWord.length() * 2,
|
||||
Math.round(targetWidth / spaceWidth * 1.5f));
|
||||
int finalSpaces = Math.min(spaceCount, maxSpaces);
|
||||
String result = " ".repeat(finalSpaces);
|
||||
|
||||
log.debug(
|
||||
"createPlaceholderWithWidth: calculated {} spaces for '{}' (targetWidth: {}, spaceWidth: {})",
|
||||
finalSpaces,
|
||||
originalWord,
|
||||
targetWidth,
|
||||
spaceWidth);
|
||||
return result;
|
||||
} catch (Exception e) {
|
||||
return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
|
||||
log.debug("Error calculating space width, using alternative: {}", e.getMessage());
|
||||
String result =
|
||||
createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
|
||||
if (result == null) {
|
||||
return " ".repeat(Math.max(1, originalWord.length()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Unexpected error in createPlaceholderWithWidth: {}", e.getMessage());
|
||||
return " ".repeat(Math.max(1, originalWord.length()));
|
||||
}
|
||||
}
|
||||
|
||||
private String createSubsetFontPlaceholder(
|
||||
String originalWord, float targetWidth, PDFont font, float fontSize) {
|
||||
try {
|
||||
return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
|
||||
String result = createAlternativePlaceholder(originalWord, targetWidth, font, fontSize);
|
||||
if (result == null) {
|
||||
log.warn(
|
||||
"createAlternativePlaceholder returned null in subset font, using fallback");
|
||||
return " ".repeat(Math.max(1, originalWord != null ? originalWord.length() : 1));
|
||||
}
|
||||
return result;
|
||||
} catch (Exception e) {
|
||||
return "";
|
||||
log.error("Error in createSubsetFontPlaceholder: {}", e.getMessage());
|
||||
return " ".repeat(Math.max(1, originalWord != null ? originalWord.length() : 1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -785,9 +865,12 @@ public class RedactionService {
|
||||
} catch (Exception ignored) {
|
||||
}
|
||||
}
|
||||
return "";
|
||||
log.debug(
|
||||
"createAlternativePlaceholder: no suitable alternative found, returning spaces");
|
||||
return " ".repeat(Math.max(1, originalWord != null ? originalWord.length() : 1));
|
||||
} catch (Exception e) {
|
||||
return "";
|
||||
log.error("Unexpected error in createAlternativePlaceholder: {}", e.getMessage());
|
||||
return " ".repeat(Math.max(1, originalWord != null ? originalWord.length() : 1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -971,7 +1054,7 @@ public class RedactionService {
|
||||
}
|
||||
if (isTextShowingOperator(opName) && i > 0) {
|
||||
String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font);
|
||||
if (!textContent.isEmpty()) {
|
||||
if (textContent != null && !textContent.trim().isEmpty()) {
|
||||
if (aggressive
|
||||
&& gs.font != null
|
||||
&& tokens.get(i - 1) instanceof COSString cs) {
|
||||
@ -1017,7 +1100,7 @@ public class RedactionService {
|
||||
}
|
||||
if (isTextShowingOperator(opName) && i > 0) {
|
||||
String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font);
|
||||
if (!textContent.isEmpty()) {
|
||||
if (textContent != null && !textContent.trim().isEmpty()) {
|
||||
segments.add(
|
||||
new TextSegment(
|
||||
i - 1,
|
||||
@ -1070,11 +1153,14 @@ public class RedactionService {
|
||||
}
|
||||
List<TextSegment> textSegments = extractTextSegments(page, tokens, this.aggressiveMode);
|
||||
String completeText = buildCompleteText(textSegments);
|
||||
List<MatchRange> matches =
|
||||
this.aggressiveMode
|
||||
? findAllMatchesAggressive(
|
||||
textSegments, tokens, targetWords, useRegex, wholeWordSearch)
|
||||
: findAllMatches(completeText, targetWords, useRegex, wholeWordSearch);
|
||||
List<MatchRange> matches;
|
||||
if (this.aggressiveMode) {
|
||||
matches =
|
||||
findAllMatchesAggressive(
|
||||
textSegments, tokens, targetWords, useRegex, wholeWordSearch);
|
||||
} else {
|
||||
matches = findMatchesInSegments(textSegments, targetWords, useRegex, wholeWordSearch);
|
||||
}
|
||||
return applyRedactionsToTokens(tokens, textSegments, matches);
|
||||
}
|
||||
|
||||
@ -1329,27 +1415,36 @@ public class RedactionService {
|
||||
}
|
||||
|
||||
private String applyRedactionsToSegmentText(TextSegment segment, List<MatchRange> matches) {
|
||||
if (segment == null || matches == null || matches.isEmpty()) {
|
||||
return segment != null && segment.getText() != null ? segment.getText() : "";
|
||||
}
|
||||
|
||||
String text = segment.getText();
|
||||
if (!this.aggressiveMode
|
||||
if (text == null) return "";
|
||||
|
||||
if (!aggressiveMode
|
||||
&& segment.getFont() != null
|
||||
&& !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), text)) {
|
||||
return text;
|
||||
}
|
||||
|
||||
try {
|
||||
StringBuilder result = new StringBuilder(text);
|
||||
for (MatchRange match : matches) {
|
||||
int segmentStart = Math.max(0, match.getStartPos() - segment.getStartPos());
|
||||
int segmentEnd = Math.min(text.length(), match.getEndPos() - segment.getStartPos());
|
||||
|
||||
if (segmentStart < text.length() && segmentEnd > segmentStart) {
|
||||
String originalPart = text.substring(segmentStart, segmentEnd);
|
||||
if (!this.aggressiveMode
|
||||
|
||||
if (!aggressiveMode
|
||||
&& segment.getFont() != null
|
||||
&& !TextEncodingHelper.isTextSegmentRemovable(
|
||||
segment.getFont(), originalPart)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (this.aggressiveMode) {
|
||||
if (aggressiveMode) {
|
||||
result.replace(segmentStart, segmentEnd, "");
|
||||
} else {
|
||||
float originalWidth = 0;
|
||||
@ -1359,19 +1454,26 @@ public class RedactionService {
|
||||
/ FONT_SCALE_FACTOR
|
||||
* segment.getFontSize();
|
||||
}
|
||||
|
||||
String placeholder =
|
||||
(originalWidth > 0)
|
||||
originalWidth > 0
|
||||
? createPlaceholderWithWidth(
|
||||
originalPart,
|
||||
originalWidth,
|
||||
segment.getFont(),
|
||||
segment.getFontSize())
|
||||
: createPlaceholderWithFont(originalPart, segment.getFont());
|
||||
: createPlaceholderWithFont(
|
||||
originalPart, segment.getFont());
|
||||
|
||||
if (placeholder == null) placeholder = " ";
|
||||
result.replace(segmentStart, segmentEnd, placeholder);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result.toString();
|
||||
} catch (Exception e) {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
private List<MatchRange> findAllMatchesAggressive(
|
||||
@ -1569,6 +1671,50 @@ public class RedactionService {
|
||||
return result;
|
||||
}
|
||||
|
||||
private List<MatchRange> findMatchesInSegments(
|
||||
List<TextSegment> segments,
|
||||
Set<String> targetWords,
|
||||
boolean useRegex,
|
||||
boolean wholeWordSearch) {
|
||||
List<MatchRange> allMatches = new ArrayList<>();
|
||||
List<Pattern> patterns =
|
||||
TextFinderUtils.createOptimizedSearchPatterns(
|
||||
targetWords, useRegex, wholeWordSearch);
|
||||
|
||||
for (TextSegment segment : segments) {
|
||||
String segmentText = segment.getText();
|
||||
if (segmentText == null || segmentText.isEmpty()) continue;
|
||||
|
||||
if (segment.getFont() != null
|
||||
&& !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), segmentText)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (Pattern pattern : patterns) {
|
||||
try {
|
||||
var matcher = pattern.matcher(segmentText);
|
||||
while (matcher.find()) {
|
||||
int matchStart = matcher.start();
|
||||
int matchEnd = matcher.end();
|
||||
|
||||
if (matchStart >= 0
|
||||
&& matchEnd <= segmentText.length()
|
||||
&& matchStart < matchEnd) {
|
||||
allMatches.add(
|
||||
new MatchRange(
|
||||
segment.getStartPos() + matchStart,
|
||||
segment.getStartPos() + matchEnd));
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
allMatches.sort(Comparator.comparingInt(MatchRange::getStartPos));
|
||||
return allMatches;
|
||||
}
|
||||
|
||||
private List<Object> applyRedactionsToTokens(
|
||||
List<Object> tokens, List<TextSegment> textSegments, List<MatchRange> matches) {
|
||||
List<Object> newTokens = new ArrayList<>(tokens);
|
||||
@ -1618,24 +1764,97 @@ public class RedactionService {
|
||||
for (Map.Entry<Integer, List<MatchRange>> entry : matchesBySegment.entrySet()) {
|
||||
int segmentIndex = entry.getKey();
|
||||
List<MatchRange> segmentMatches = entry.getValue();
|
||||
|
||||
if (segmentIndex < 0 || segmentIndex >= textSegments.size()) {
|
||||
log.warn(
|
||||
"Invalid segment index: {} (textSegments size: {})",
|
||||
segmentIndex,
|
||||
textSegments.size());
|
||||
continue;
|
||||
}
|
||||
|
||||
TextSegment segment = textSegments.get(segmentIndex);
|
||||
if (segment == null) {
|
||||
log.warn("Segment is null at index: {}", segmentIndex);
|
||||
continue;
|
||||
}
|
||||
|
||||
log.debug(
|
||||
"Creating task for segment {} with operator '{}' and {} matches",
|
||||
segmentIndex,
|
||||
segment.operatorName,
|
||||
segmentMatches.size());
|
||||
|
||||
try {
|
||||
if ("Tj".equals(segment.operatorName) || "'".equals(segment.operatorName)) {
|
||||
String newText = applyRedactionsToSegmentText(segment, segmentMatches);
|
||||
float adjustment = 0;
|
||||
adjustment = calculateWidthAdjustment(segment, segmentMatches);
|
||||
if (newText == null) {
|
||||
log.warn(
|
||||
"applyRedactionsToSegmentText returned null for segment {}, using empty string",
|
||||
segmentIndex);
|
||||
newText = ""; // Ensure it's never null
|
||||
}
|
||||
float adjustment = calculateWidthAdjustment(segment, segmentMatches);
|
||||
tasks.add(new ModificationTask(segment, newText, adjustment));
|
||||
log.debug(
|
||||
"Created Tj/' task with newText: '{}' (length: {})",
|
||||
newText,
|
||||
newText.length());
|
||||
} else if ("TJ".equals(segment.operatorName)) {
|
||||
tasks.add(new ModificationTask(segment, null, 0));
|
||||
tasks.add(
|
||||
new ModificationTask(
|
||||
segment, "", 0)); // Use empty string instead of null for TJ
|
||||
log.debug("Created TJ task with empty newText (was null)");
|
||||
} else {
|
||||
log.debug("Skipping segment with operator: {}", segment.operatorName);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Error creating task for segment {}: {}", segmentIndex, e.getMessage());
|
||||
}
|
||||
}
|
||||
tasks.sort((a, b) -> Integer.compare(b.segment.tokenIndex, a.segment.tokenIndex));
|
||||
for (ModificationTask task : tasks) {
|
||||
|
||||
int processedCount = 0;
|
||||
int maxTasksToProcess = Math.min(tasks.size(), 1000); // Safety limit
|
||||
|
||||
for (int i = 0; i < maxTasksToProcess && i < tasks.size(); i++) {
|
||||
ModificationTask task = tasks.get(i);
|
||||
try {
|
||||
List<MatchRange> segmentMatches =
|
||||
matchesBySegment.getOrDefault(
|
||||
textSegments.indexOf(task.segment), Collections.emptyList());
|
||||
|
||||
if (task.segment.tokenIndex >= newTokens.size()) {
|
||||
log.debug(
|
||||
"Skipping segment with invalid token index {} (tokens size: {})",
|
||||
task.segment.tokenIndex,
|
||||
newTokens.size());
|
||||
continue;
|
||||
}
|
||||
|
||||
if (task.segment.getText() == null || task.segment.getText().isEmpty()) {
|
||||
log.debug(
|
||||
"Skipping segment with empty text at index {}",
|
||||
task.segment.tokenIndex);
|
||||
continue;
|
||||
}
|
||||
|
||||
modifyTokenForRedaction(
|
||||
newTokens, task.segment, task.newText, task.adjustment, segmentMatches);
|
||||
processedCount++;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.warn(
|
||||
"Failed to process modification task for segment at {}: {}",
|
||||
task.segment.tokenIndex,
|
||||
e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
log.debug(
|
||||
"Successfully processed {} out of {} modification tasks",
|
||||
processedCount,
|
||||
tasks.size());
|
||||
return newTokens;
|
||||
}
|
||||
|
||||
@ -1837,20 +2056,67 @@ public class RedactionService {
|
||||
String newText,
|
||||
float adjustment,
|
||||
List<MatchRange> matches) {
|
||||
if (tokens == null
|
||||
|| segment == null
|
||||
|| newText == null
|
||||
|| !isValidTokenIndex(tokens, segment.tokenIndex)
|
||||
|| segment.operatorName == null) {
|
||||
log.warn("Invalid input to modifyTokenForRedaction");
|
||||
// Defensive null handling
|
||||
if (tokens == null || segment == null) {
|
||||
log.warn(
|
||||
"Invalid input to modifyTokenForRedaction: tokens={}, segment={}",
|
||||
tokens == null ? "null" : "valid",
|
||||
segment == null ? "null" : "valid");
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle null newText by providing a default
|
||||
if (newText == null) {
|
||||
log.warn("newText is null, providing default empty string");
|
||||
log.warn(
|
||||
"Segment details: tokenIndex={}, operatorName={}, font={}, fontSize={}, text='{}'",
|
||||
segment.tokenIndex,
|
||||
segment.operatorName,
|
||||
segment.getFont() != null ? segment.getFont().getName() : "null",
|
||||
segment.getFontSize(),
|
||||
segment.getText() != null ? segment.getText() : "null");
|
||||
log.warn("This should not happen with the new null safety measures!");
|
||||
newText = ""; // Default to empty string
|
||||
}
|
||||
if (!isValidTokenIndex(tokens, segment.tokenIndex)) {
|
||||
log.warn(
|
||||
"Invalid input to modifyTokenForRedaction: invalid token index {} (tokens size: {})",
|
||||
segment.tokenIndex,
|
||||
tokens.size());
|
||||
log.debug(
|
||||
"Segment details: operator={}, font={}, fontSize={}, startPos={}, endPos={}",
|
||||
segment.operatorName,
|
||||
segment.getFont(),
|
||||
segment.getFontSize(),
|
||||
segment.getStartPos(),
|
||||
segment.getEndPos());
|
||||
return;
|
||||
}
|
||||
if (segment.operatorName == null) {
|
||||
log.warn("Invalid input to modifyTokenForRedaction: operatorName is null");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
Object token = tokens.get(segment.tokenIndex);
|
||||
|
||||
if (token == null) {
|
||||
log.warn("Token at index {} is null, skipping modification", segment.tokenIndex);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isValidTokenForOperator(token, segment.operatorName)) {
|
||||
log.warn(
|
||||
"Token at index {} is not valid for operator {}, skipping modification",
|
||||
segment.tokenIndex,
|
||||
segment.operatorName);
|
||||
return;
|
||||
}
|
||||
|
||||
TokenModificationResult result =
|
||||
performTokenModification(
|
||||
tokens,
|
||||
tokens.get(segment.tokenIndex),
|
||||
token,
|
||||
segment.operatorName,
|
||||
newText,
|
||||
adjustment,
|
||||
@ -1860,12 +2126,24 @@ public class RedactionService {
|
||||
if (!result.isSuccess()) {
|
||||
performFallbackModification(tokens, segment.tokenIndex, newText);
|
||||
}
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
log.warn(
|
||||
"Token index {} is out of bounds (tokens size: {}), skipping modification",
|
||||
segment.tokenIndex,
|
||||
tokens.size());
|
||||
} catch (Exception e) {
|
||||
log.error(
|
||||
"Token modification failed at index {}: {}",
|
||||
segment.tokenIndex,
|
||||
e.getMessage());
|
||||
try {
|
||||
performEmergencyFallback(tokens, segment.tokenIndex);
|
||||
} catch (Exception emergencyError) {
|
||||
log.error(
|
||||
"Emergency fallback also failed at index {}: {}",
|
||||
segment.tokenIndex,
|
||||
emergencyError.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1873,6 +2151,18 @@ public class RedactionService {
|
||||
return index >= 0 && index < tokens.size();
|
||||
}
|
||||
|
||||
private boolean isValidTokenForOperator(Object token, String operatorName) {
|
||||
if (token == null || operatorName == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return switch (operatorName) {
|
||||
case "Tj", "'", "\"" -> token instanceof COSString;
|
||||
case "TJ" -> token instanceof COSArray;
|
||||
default -> true;
|
||||
};
|
||||
}
|
||||
|
||||
private COSArray createRedactedTJArray(
|
||||
COSArray originalArray, TextSegment segment, List<MatchRange> matches) {
|
||||
|
||||
@ -2403,30 +2693,45 @@ public class RedactionService {
|
||||
return totalMods;
|
||||
}
|
||||
|
||||
private static class WidthCalculationResult {
|
||||
private final float adjustment;
|
||||
private final int processedMatches;
|
||||
private final List<String> warnings;
|
||||
|
||||
public WidthCalculationResult(
|
||||
float adjustment, int processedMatches, List<String> warnings) {
|
||||
this.adjustment = adjustment;
|
||||
this.processedMatches = processedMatches;
|
||||
this.warnings = new ArrayList<>(warnings);
|
||||
private List<TextSegment> extractTextSegmentsFromXObject(
|
||||
PDResources resources, List<Object> tokens) {
|
||||
List<TextSegment> segments = new ArrayList<>();
|
||||
int currentTextPos = 0;
|
||||
GraphicsState gs = new GraphicsState();
|
||||
for (int i = 0; i < tokens.size(); i++) {
|
||||
Object currentToken = tokens.get(i);
|
||||
if (currentToken instanceof Operator op) {
|
||||
String opName = op.getName();
|
||||
if ("Tf".equals(opName) && i >= 2) {
|
||||
try {
|
||||
COSName fontName = (COSName) tokens.get(i - 2);
|
||||
COSBase fontSizeBase = (COSBase) tokens.get(i - 1);
|
||||
if (fontSizeBase instanceof COSNumber cosNumber) {
|
||||
gs.setFont(resources.getFont(fontName));
|
||||
gs.setFontSize(cosNumber.floatValue());
|
||||
}
|
||||
|
||||
public float getAdjustment() {
|
||||
return adjustment;
|
||||
} catch (ClassCastException | IOException ignored) {
|
||||
}
|
||||
|
||||
public int getProcessedMatches() {
|
||||
return processedMatches;
|
||||
}
|
||||
|
||||
public List<String> getWarnings() {
|
||||
return new ArrayList<>(warnings);
|
||||
if (isTextShowingOperator(opName) && i > 0) {
|
||||
String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font);
|
||||
if (textContent != null && !textContent.trim().isEmpty()) {
|
||||
segments.add(
|
||||
new TextSegment(
|
||||
i - 1,
|
||||
opName,
|
||||
textContent,
|
||||
currentTextPos,
|
||||
currentTextPos + textContent.length(),
|
||||
gs.font,
|
||||
gs.fontSize));
|
||||
currentTextPos += textContent.length();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return segments;
|
||||
}
|
||||
|
||||
private int wipeAllTextInFormXObject(PDDocument document, PDFormXObject formXObject)
|
||||
throws IOException {
|
||||
@ -2485,25 +2790,20 @@ public class RedactionService {
|
||||
}
|
||||
}
|
||||
|
||||
private static class TokenModificationResult {
|
||||
private final boolean success;
|
||||
private final String errorMessage;
|
||||
private static class WidthCalculationResult {
|
||||
@Getter private final float adjustment;
|
||||
@Getter private final int processedMatches;
|
||||
private final List<String> warnings;
|
||||
|
||||
private TokenModificationResult(boolean success, String errorMessage) {
|
||||
this.success = success;
|
||||
this.errorMessage = errorMessage;
|
||||
public WidthCalculationResult(
|
||||
float adjustment, int processedMatches, List<String> warnings) {
|
||||
this.adjustment = adjustment;
|
||||
this.processedMatches = processedMatches;
|
||||
this.warnings = new ArrayList<>(warnings);
|
||||
}
|
||||
|
||||
public static TokenModificationResult success() {
|
||||
return new TokenModificationResult(true, null);
|
||||
}
|
||||
|
||||
public static TokenModificationResult failure(String errorMessage) {
|
||||
return new TokenModificationResult(false, errorMessage);
|
||||
}
|
||||
|
||||
public boolean isSuccess() {
|
||||
return success;
|
||||
public List<String> getWarnings() {
|
||||
return new ArrayList<>(warnings);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2556,44 +2856,22 @@ public class RedactionService {
|
||||
}
|
||||
}
|
||||
|
||||
private List<TextSegment> extractTextSegmentsFromXObject(
|
||||
PDResources resources, List<Object> tokens) {
|
||||
List<TextSegment> segments = new ArrayList<>();
|
||||
int currentTextPos = 0;
|
||||
GraphicsState gs = new GraphicsState();
|
||||
for (int i = 0; i < tokens.size(); i++) {
|
||||
Object currentToken = tokens.get(i);
|
||||
if (currentToken instanceof Operator op) {
|
||||
String opName = op.getName();
|
||||
if ("Tf".equals(opName) && i >= 2) {
|
||||
try {
|
||||
COSName fontName = (COSName) tokens.get(i - 2);
|
||||
COSBase fontSizeBase = (COSBase) tokens.get(i - 1);
|
||||
if (fontSizeBase instanceof COSNumber cosNumber) {
|
||||
gs.setFont(resources.getFont(fontName));
|
||||
gs.setFontSize(cosNumber.floatValue());
|
||||
private static class TokenModificationResult {
|
||||
@Getter private final boolean success;
|
||||
private final String errorMessage;
|
||||
|
||||
private TokenModificationResult(boolean success, String errorMessage) {
|
||||
this.success = success;
|
||||
this.errorMessage = errorMessage;
|
||||
}
|
||||
} catch (ClassCastException | IOException ignored) {
|
||||
|
||||
public static TokenModificationResult success() {
|
||||
return new TokenModificationResult(true, null);
|
||||
}
|
||||
|
||||
public static TokenModificationResult failure(String errorMessage) {
|
||||
return new TokenModificationResult(false, errorMessage);
|
||||
}
|
||||
if (isTextShowingOperator(opName) && i > 0) {
|
||||
String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font);
|
||||
if (!textContent.isEmpty()) {
|
||||
segments.add(
|
||||
new TextSegment(
|
||||
i - 1,
|
||||
opName,
|
||||
textContent,
|
||||
currentTextPos,
|
||||
currentTextPos + textContent.length(),
|
||||
gs.font,
|
||||
gs.fontSize));
|
||||
currentTextPos += textContent.length();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return segments;
|
||||
}
|
||||
|
||||
@Data
|
||||
|
@ -1,7 +1,14 @@
|
||||
package stirling.software.SPDF.utils.text;
|
||||
|
||||
import java.text.Normalizer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -11,127 +18,572 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public class WidthCalculator {
|
||||
|
||||
private final int FONT_SCALE_FACTOR = 1000;
|
||||
private final float CONSERVATIVE_CHAR_WIDTH_RATIO = 0.55f;
|
||||
private final float BBOX_CHAR_WIDTH_RATIO = 0.65f;
|
||||
|
||||
private final Map<String, Float> widthCache = new ConcurrentHashMap<>();
|
||||
private final Map<String, Boolean> reliabilityCache = new ConcurrentHashMap<>();
|
||||
|
||||
private String createCacheKey(PDFont font, String text, float fontSize) {
|
||||
return String.format("%s|%s|%.2f", font.getName(), text, fontSize);
|
||||
}
|
||||
|
||||
private String createReliabilityCacheKey(PDFont font) {
|
||||
return font.getName();
|
||||
}
|
||||
|
||||
public float calculateAccurateWidth(PDFont font, String text, float fontSize) {
|
||||
if (font == null || text == null || text.isEmpty() || fontSize <= 0) {
|
||||
return 0;
|
||||
return calculateAccurateWidth(font, text, fontSize, true);
|
||||
}
|
||||
|
||||
if (!TextEncodingHelper.canEncodeCharacters(font, text)) {
|
||||
log.debug(
|
||||
"Text cannot be encoded by font {}, using fallback width calculation",
|
||||
font.getName());
|
||||
return calculateFallbackWidth(font, text, fontSize);
|
||||
public float calculateAccurateWidth(
|
||||
PDFont font, String text, float fontSize, boolean useCache) {
|
||||
if (font == null || text == null || text.isEmpty() || fontSize <= 0) return 0;
|
||||
|
||||
if (useCache) {
|
||||
String cacheKey = createCacheKey(font, text, fontSize);
|
||||
Float cachedWidth = widthCache.get(cacheKey);
|
||||
if (cachedWidth != null) return cachedWidth;
|
||||
}
|
||||
|
||||
String normalizedText = normalizeText(text);
|
||||
|
||||
Float directWidth = calculateDirectWidth(font, normalizedText, fontSize);
|
||||
if (directWidth != null) {
|
||||
if (useCache) widthCache.put(createCacheKey(font, text, fontSize), directWidth);
|
||||
return directWidth;
|
||||
}
|
||||
|
||||
Float charByCharWidth = calculateCharacterByCharacterWidth(font, normalizedText, fontSize);
|
||||
if (charByCharWidth != null) {
|
||||
if (useCache) widthCache.put(createCacheKey(font, text, fontSize), charByCharWidth);
|
||||
return charByCharWidth;
|
||||
}
|
||||
|
||||
Float glyphWidth = calculateGlyphBasedWidth(font, normalizedText, fontSize);
|
||||
if (glyphWidth != null) {
|
||||
if (useCache) widthCache.put(createCacheKey(font, text, fontSize), glyphWidth);
|
||||
return glyphWidth;
|
||||
}
|
||||
|
||||
float fallbackWidth = calculateComprehensiveFallbackWidth(font, normalizedText, fontSize);
|
||||
if (useCache) widthCache.put(createCacheKey(font, text, fontSize), fallbackWidth);
|
||||
return fallbackWidth;
|
||||
}
|
||||
|
||||
private String normalizeText(String text) {
|
||||
return Normalizer.normalize(text, Normalizer.Form.NFC);
|
||||
}
|
||||
|
||||
private Float calculateDirectWidth(PDFont font, String text, float fontSize) {
|
||||
if (!TextEncodingHelper.canEncodeCharacters(font, text)) return null;
|
||||
|
||||
try {
|
||||
float rawWidth = font.getStringWidth(text);
|
||||
float scaledWidth = (rawWidth / FONT_SCALE_FACTOR) * fontSize;
|
||||
return rawWidth >= 0 && scaledWidth >= 0 ? scaledWidth : null;
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
log.debug(
|
||||
"Direct width calculation successful for font {}: {} -> {}",
|
||||
font.getName(),
|
||||
rawWidth,
|
||||
scaledWidth);
|
||||
return scaledWidth;
|
||||
private Float calculateCharacterByCharacterWidth(PDFont font, String text, float fontSize) {
|
||||
try {
|
||||
List<Integer> codePoints = getCodePoints(text);
|
||||
float totalWidth = 0;
|
||||
int previousCodePoint = -1;
|
||||
|
||||
for (int codePoint : codePoints) {
|
||||
String character = new String(Character.toChars(codePoint));
|
||||
Float charWidth = calculateSingleCharacterWidth(font, character, fontSize);
|
||||
if (charWidth == null) return null;
|
||||
|
||||
totalWidth += charWidth;
|
||||
if (previousCodePoint != -1) {
|
||||
totalWidth += calculateKerning(font, previousCodePoint, codePoint, fontSize);
|
||||
}
|
||||
previousCodePoint = codePoint;
|
||||
}
|
||||
return totalWidth;
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private List<Integer> getCodePoints(String text) {
|
||||
List<Integer> codePoints = new ArrayList<>();
|
||||
for (int i = 0; i < text.length(); ) {
|
||||
int codePoint = text.codePointAt(i);
|
||||
codePoints.add(codePoint);
|
||||
i += Character.charCount(codePoint);
|
||||
}
|
||||
return codePoints;
|
||||
}
|
||||
|
||||
private Float calculateSingleCharacterWidth(PDFont font, String character, float fontSize) {
|
||||
try {
|
||||
byte[] encoded = null;
|
||||
|
||||
try {
|
||||
encoded = font.encode(character);
|
||||
if (encoded.length == 0) encoded = null;
|
||||
} catch (Exception e) {
|
||||
log.debug("Direct encoding failed for '{}': {}", character, e.getMessage());
|
||||
}
|
||||
|
||||
if (encoded == null && font instanceof PDType0Font) {
|
||||
try {
|
||||
encoded = character.getBytes("UTF-8");
|
||||
} catch (Exception e) {
|
||||
log.debug("UTF-8 encoding failed for '{}': {}", character, e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
if (encoded != null && encoded.length > 0) {
|
||||
Float width = calculateGlyphWidth(font, encoded, fontSize);
|
||||
if (width != null && width >= 0) return width;
|
||||
}
|
||||
|
||||
return calculateAverageCharacterWidth(font, fontSize);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.debug(
|
||||
"Direct width calculation failed for font {}: {}",
|
||||
font.getName(),
|
||||
"Single character width calculation failed for '{}': {}",
|
||||
character,
|
||||
e.getMessage());
|
||||
return calculateWidthWithCharacterIteration(font, text, fontSize);
|
||||
return calculateAverageCharacterWidth(font, fontSize);
|
||||
}
|
||||
}
|
||||
|
||||
private float calculateWidthWithCharacterIteration(PDFont font, String text, float fontSize) {
|
||||
private Float calculateGlyphWidth(PDFont font, byte[] encoded, float fontSize) {
|
||||
for (byte b : encoded) {
|
||||
try {
|
||||
int glyphCode = b & 0xFF;
|
||||
float glyphWidth = font.getWidth(glyphCode);
|
||||
|
||||
if (glyphWidth > 0) {
|
||||
return (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
|
||||
}
|
||||
|
||||
// Try alternative width methods
|
||||
try {
|
||||
glyphWidth = font.getWidthFromFont(glyphCode);
|
||||
if (glyphWidth > 0) {
|
||||
return (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.debug(
|
||||
"getWidthFromFont failed for glyph {}: {}", glyphCode, e.getMessage());
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.debug("Glyph width calculation failed for byte {}: {}", b, e.getMessage());
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private float calculateKerning(
|
||||
PDFont font, int leftCodePoint, int rightCodePoint, float fontSize) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
private Float calculateGlyphBasedWidth(PDFont font, String text, float fontSize) {
|
||||
try {
|
||||
float totalWidth = 0;
|
||||
|
||||
for (int i = 0; i < text.length(); i++) {
|
||||
String character = text.substring(i, i + 1);
|
||||
try {
|
||||
byte[] encoded = font.encode(character);
|
||||
if (encoded.length > 0) {
|
||||
int glyphCode = encoded[0] & 0xFF;
|
||||
float glyphWidth = font.getWidth(glyphCode);
|
||||
for (int i = 0; i < text.length(); ) {
|
||||
int codePoint = text.codePointAt(i);
|
||||
String character = new String(Character.toChars(codePoint));
|
||||
|
||||
if (glyphWidth == 0) {
|
||||
try {
|
||||
glyphWidth = font.getWidthFromFont(glyphCode);
|
||||
} catch (Exception e2) {
|
||||
glyphWidth = font.getAverageFontWidth();
|
||||
}
|
||||
// Try to get glyph information more comprehensively
|
||||
Float charWidth =
|
||||
calculateGlyphWidthComprehensively(font, character, codePoint, fontSize);
|
||||
if (charWidth == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
totalWidth += (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
|
||||
} else {
|
||||
totalWidth += (font.getAverageFontWidth() / FONT_SCALE_FACTOR) * fontSize;
|
||||
}
|
||||
} catch (Exception e2) {
|
||||
totalWidth += (font.getAverageFontWidth() / FONT_SCALE_FACTOR) * fontSize;
|
||||
}
|
||||
totalWidth += charWidth;
|
||||
i += Character.charCount(codePoint);
|
||||
}
|
||||
|
||||
log.debug("Character iteration width calculation: {}", totalWidth);
|
||||
log.debug("Glyph-based width calculation: {}", totalWidth);
|
||||
return totalWidth;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.debug("Character iteration failed: {}", e.getMessage());
|
||||
return calculateFallbackWidth(font, text, fontSize);
|
||||
log.debug("Glyph-based calculation failed: {}", e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private float calculateFallbackWidth(PDFont font, String text, float fontSize) {
|
||||
private Float calculateGlyphWidthComprehensively(
|
||||
PDFont font, String character, int codePoint, float fontSize) {
|
||||
try {
|
||||
// Method 1: Try standard encoding
|
||||
try {
|
||||
byte[] encoded = font.encode(character);
|
||||
if (encoded.length > 0) {
|
||||
Float width = calculateWidthFromEncodedBytes(font, encoded, fontSize);
|
||||
if (width != null && width >= 0) {
|
||||
return width;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.debug(
|
||||
"Standard encoding failed for U+{}: {}",
|
||||
Integer.toHexString(codePoint),
|
||||
e.getMessage());
|
||||
}
|
||||
|
||||
// Method 2: Try Unicode code point directly
|
||||
try {
|
||||
float glyphWidth = font.getWidth(codePoint);
|
||||
if (glyphWidth > 0) {
|
||||
return (glyphWidth / FONT_SCALE_FACTOR) * fontSize;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.debug(
|
||||
"Unicode code point width failed for U+{}: {}",
|
||||
Integer.toHexString(codePoint),
|
||||
e.getMessage());
|
||||
}
|
||||
|
||||
// Method 3: Character category based estimation
|
||||
return calculateCategoryBasedWidth(font, codePoint, fontSize);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.debug("Comprehensive glyph width calculation failed: {}", e.getMessage());
|
||||
return calculateAverageCharacterWidth(font, fontSize);
|
||||
}
|
||||
}
|
||||
|
||||
private Float calculateWidthFromEncodedBytes(PDFont font, byte[] encoded, float fontSize) {
|
||||
// Try each byte as a potential glyph code
|
||||
for (byte b : encoded) {
|
||||
try {
|
||||
int glyphCode = b & 0xFF;
|
||||
float width = font.getWidth(glyphCode);
|
||||
if (width > 0) {
|
||||
return (width / FONT_SCALE_FACTOR) * fontSize;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// Continue trying other bytes
|
||||
}
|
||||
}
|
||||
|
||||
// Try multi-byte interpretation for Unicode fonts
|
||||
if (encoded.length >= 2 && font instanceof PDType0Font) {
|
||||
try {
|
||||
int glyphCode = ((encoded[0] & 0xFF) << 8) | (encoded[1] & 0xFF);
|
||||
float width = font.getWidth(glyphCode);
|
||||
if (width > 0) {
|
||||
return (width / FONT_SCALE_FACTOR) * fontSize;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.debug("Multi-byte glyph code interpretation failed: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private Float calculateCategoryBasedWidth(PDFont font, int codePoint, float fontSize) {
|
||||
try {
|
||||
int category = Character.getType(codePoint);
|
||||
float baseWidth = calculateAverageCharacterWidth(font, fontSize);
|
||||
|
||||
// Adjust width based on character category
|
||||
float multiplier =
|
||||
switch (category) {
|
||||
case Character.UPPERCASE_LETTER -> 1.2f;
|
||||
case Character.LOWERCASE_LETTER -> 1.0f;
|
||||
case Character.DECIMAL_DIGIT_NUMBER -> 1.0f;
|
||||
case Character.SPACE_SEPARATOR -> 0.5f;
|
||||
case Character.DASH_PUNCTUATION -> 0.8f;
|
||||
case Character.OTHER_PUNCTUATION -> 0.6f;
|
||||
case Character.CURRENCY_SYMBOL -> 1.1f;
|
||||
case Character.MATH_SYMBOL -> 1.0f;
|
||||
case Character.MODIFIER_LETTER -> 0.7f;
|
||||
case Character.NON_SPACING_MARK -> 0.0f; // Combining characters
|
||||
case Character.ENCLOSING_MARK -> 0.0f;
|
||||
case Character.COMBINING_SPACING_MARK -> 0.3f;
|
||||
default -> 1.0f;
|
||||
};
|
||||
|
||||
return baseWidth * multiplier;
|
||||
} catch (Exception e) {
|
||||
log.debug("Category-based width calculation failed: {}", e.getMessage());
|
||||
return calculateAverageCharacterWidth(font, fontSize);
|
||||
}
|
||||
}
|
||||
|
||||
private float calculateAverageCharacterWidth(PDFont font, float fontSize) {
|
||||
try {
|
||||
float avgWidth = font.getAverageFontWidth();
|
||||
return (avgWidth / FONT_SCALE_FACTOR) * fontSize;
|
||||
} catch (Exception e) {
|
||||
log.debug("Average character width calculation failed: {}", e.getMessage());
|
||||
return CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
|
||||
}
|
||||
}
|
||||
|
||||
private float calculateComprehensiveFallbackWidth(PDFont font, String text, float fontSize) {
|
||||
try {
|
||||
// Strategy 1: Use font bounding box with character analysis
|
||||
if (font.getFontDescriptor() != null
|
||||
&& font.getFontDescriptor().getFontBoundingBox() != null) {
|
||||
|
||||
PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox();
|
||||
float avgCharWidth =
|
||||
bbox.getWidth() / FONT_SCALE_FACTOR * 0.6f; // Conservative estimate
|
||||
float fallbackWidth = text.length() * avgCharWidth * fontSize;
|
||||
float avgCharWidth = bbox.getWidth() / FONT_SCALE_FACTOR;
|
||||
|
||||
log.debug("Bounding box fallback width: {}", fallbackWidth);
|
||||
return fallbackWidth;
|
||||
// Analyze text composition for better estimation
|
||||
float adjustedWidth = analyzeTextComposition(text, avgCharWidth, fontSize);
|
||||
log.debug("Bounding box based fallback width: {}", adjustedWidth);
|
||||
return adjustedWidth;
|
||||
}
|
||||
|
||||
float avgWidth = font.getAverageFontWidth();
|
||||
float fallbackWidth = (text.length() * avgWidth / FONT_SCALE_FACTOR) * fontSize;
|
||||
|
||||
log.debug("Average width fallback: {}", fallbackWidth);
|
||||
return fallbackWidth;
|
||||
// Strategy 2: Enhanced average width calculation
|
||||
float enhancedAverage = calculateEnhancedAverageWidth(font, text, fontSize);
|
||||
log.debug("Enhanced average fallback width: {}", enhancedAverage);
|
||||
return enhancedAverage;
|
||||
|
||||
} catch (Exception e) {
|
||||
float conservativeWidth = text.length() * 0.5f * fontSize;
|
||||
log.debug(
|
||||
"Conservative fallback width for font {}: {}",
|
||||
font.getName(),
|
||||
conservativeWidth);
|
||||
// Ultimate fallback
|
||||
float conservativeWidth = text.length() * CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
|
||||
log.debug("Conservative fallback width: {}", conservativeWidth);
|
||||
return conservativeWidth;
|
||||
}
|
||||
}
|
||||
|
||||
private float analyzeTextComposition(String text, float avgCharWidth, float fontSize) {
|
||||
float totalWidth = 0;
|
||||
int spaceCount = 0;
|
||||
int upperCount = 0;
|
||||
int lowerCount = 0;
|
||||
int digitCount = 0;
|
||||
int punctCount = 0;
|
||||
|
||||
for (int i = 0; i < text.length(); ) {
|
||||
int codePoint = text.codePointAt(i);
|
||||
int category = Character.getType(codePoint);
|
||||
|
||||
switch (category) {
|
||||
case Character.SPACE_SEPARATOR -> {
|
||||
spaceCount++;
|
||||
totalWidth += avgCharWidth * 0.5f * fontSize;
|
||||
}
|
||||
case Character.UPPERCASE_LETTER -> {
|
||||
upperCount++;
|
||||
totalWidth += avgCharWidth * 1.2f * fontSize;
|
||||
}
|
||||
case Character.LOWERCASE_LETTER -> {
|
||||
lowerCount++;
|
||||
totalWidth += avgCharWidth * 1.0f * fontSize;
|
||||
}
|
||||
case Character.DECIMAL_DIGIT_NUMBER -> {
|
||||
digitCount++;
|
||||
totalWidth += avgCharWidth * 1.0f * fontSize;
|
||||
}
|
||||
case Character.OTHER_PUNCTUATION, Character.DASH_PUNCTUATION -> {
|
||||
punctCount++;
|
||||
totalWidth += avgCharWidth * 0.7f * fontSize;
|
||||
}
|
||||
default -> totalWidth += avgCharWidth * BBOX_CHAR_WIDTH_RATIO * fontSize;
|
||||
}
|
||||
|
||||
i += Character.charCount(codePoint);
|
||||
}
|
||||
|
||||
// Log composition analysis for debugging
|
||||
log.debug(
|
||||
"Text composition analysis - Spaces: {}, Upper: {}, Lower: {}, Digits: {}, Punct: {}",
|
||||
spaceCount,
|
||||
upperCount,
|
||||
lowerCount,
|
||||
digitCount,
|
||||
punctCount);
|
||||
|
||||
return totalWidth;
|
||||
}
|
||||
|
||||
private float calculateEnhancedAverageWidth(PDFont font, String text, float fontSize) {
|
||||
try {
|
||||
float baseAverage = font.getAverageFontWidth();
|
||||
|
||||
// Try to get more specific metrics
|
||||
float capHeight = 0;
|
||||
float xHeight = 0;
|
||||
|
||||
if (font.getFontDescriptor() != null) {
|
||||
capHeight = font.getFontDescriptor().getCapHeight();
|
||||
xHeight = font.getFontDescriptor().getXHeight();
|
||||
}
|
||||
|
||||
// Use metrics to adjust the average width estimation
|
||||
float adjustmentFactor = 1.0f;
|
||||
if (capHeight > 0 && xHeight > 0) {
|
||||
adjustmentFactor = Math.max(0.8f, Math.min(1.2f, xHeight / capHeight));
|
||||
}
|
||||
|
||||
float adjustedAverage = (baseAverage * adjustmentFactor / FONT_SCALE_FACTOR) * fontSize;
|
||||
return text.length() * adjustedAverage;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.debug("Enhanced average width calculation failed: {}", e.getMessage());
|
||||
return text.length() * CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isWidthCalculationReliable(PDFont font) {
|
||||
if (font == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check cache first
|
||||
String cacheKey = createReliabilityCacheKey(font);
|
||||
Boolean cachedResult = reliabilityCache.get(cacheKey);
|
||||
if (cachedResult != null) {
|
||||
log.debug(
|
||||
"Using cached reliability result for font {}: {}",
|
||||
font.getName(),
|
||||
cachedResult);
|
||||
return cachedResult;
|
||||
}
|
||||
|
||||
boolean result = performReliabilityCheck(font);
|
||||
|
||||
// Cache the result
|
||||
reliabilityCache.put(cacheKey, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
private boolean performReliabilityCheck(PDFont font) {
|
||||
try {
|
||||
// Check if font is damaged
|
||||
if (font.isDamaged()) {
|
||||
log.debug("Font {} is damaged", font.getName());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check basic width calculation capability
|
||||
if (!TextEncodingHelper.canCalculateBasicWidths(font)) {
|
||||
log.debug("Font {} cannot perform basic width calculations", font.getName());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (TextEncodingHelper.hasCustomEncoding(font)) {
|
||||
log.debug("Font {} has custom encoding", font.getName());
|
||||
// Test with a simple character
|
||||
try {
|
||||
font.getStringWidth("A");
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
log.debug("Font {} failed basic width test: {}", font.getName(), e.getMessage());
|
||||
}
|
||||
|
||||
// Check if we can at least get average width
|
||||
try {
|
||||
float avgWidth = font.getAverageFontWidth();
|
||||
return avgWidth > 0;
|
||||
} catch (Exception e) {
|
||||
log.debug(
|
||||
"Font {} cannot provide average width: {}", font.getName(), e.getMessage());
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.debug("Reliability check failed for font {}: {}", font.getName(), e.getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public float calculateCharacterWidth(PDFont font, String character, float fontSize) {
|
||||
if (font == null || character == null || character.isEmpty() || fontSize <= 0) return 0;
|
||||
|
||||
String cacheKey = createCacheKey(font, character, fontSize);
|
||||
Float cachedWidth = widthCache.get(cacheKey);
|
||||
if (cachedWidth != null) return cachedWidth;
|
||||
|
||||
Float width = calculateSingleCharacterWidth(font, character, fontSize);
|
||||
if (width == null) width = calculateAverageCharacterWidth(font, fontSize);
|
||||
|
||||
widthCache.put(cacheKey, width);
|
||||
return width;
|
||||
}
|
||||
|
||||
public String createWidthMatchingPlaceholder(
|
||||
String originalText,
|
||||
float targetWidth,
|
||||
PDFont font,
|
||||
float fontSize,
|
||||
String placeholderChar) {
|
||||
if (originalText == null || originalText.isEmpty() || targetWidth <= 0) return "";
|
||||
|
||||
if (placeholderChar == null || placeholderChar.isEmpty()) placeholderChar = " ";
|
||||
|
||||
try {
|
||||
float placeholderCharWidth = calculateCharacterWidth(font, placeholderChar, fontSize);
|
||||
if (placeholderCharWidth <= 0) {
|
||||
return " ".repeat(Math.max(1, originalText.length()));
|
||||
}
|
||||
|
||||
int placeholderCount = Math.max(1, Math.round(targetWidth / placeholderCharWidth));
|
||||
int originalLength = originalText.length();
|
||||
int maxReasonableLength = Math.max(originalLength * 3, Math.max(placeholderCount, 10));
|
||||
placeholderCount = Math.min(placeholderCount, maxReasonableLength);
|
||||
placeholderCount = Math.max(1, placeholderCount);
|
||||
|
||||
return placeholderChar.repeat(placeholderCount);
|
||||
|
||||
} catch (Exception e) {
|
||||
return " ".repeat(Math.max(1, originalText.length()));
|
||||
}
|
||||
}
|
||||
|
||||
public boolean canCalculateTextWidth(PDFont font, String text) {
|
||||
if (font == null || text == null || text.isEmpty()) return false;
|
||||
if (!isWidthCalculationReliable(font)) return false;
|
||||
|
||||
List<Integer> codePoints = getCodePoints(text);
|
||||
int testSampleSize = Math.min(5, codePoints.size());
|
||||
|
||||
for (int i = 0; i < testSampleSize; i++) {
|
||||
int codePoint = codePoints.get(i);
|
||||
String character = new String(Character.toChars(codePoint));
|
||||
|
||||
try {
|
||||
if (!TextEncodingHelper.canEncodeCharacters(font, character)) {
|
||||
log.debug(
|
||||
"Cannot encode character U+{} in text '{}'",
|
||||
Integer.toHexString(codePoint),
|
||||
text);
|
||||
return false;
|
||||
}
|
||||
|
||||
float width = calculateCharacterWidth(font, character, 12.0f);
|
||||
if (width <= 0) {
|
||||
log.debug(
|
||||
"Character U+{} has invalid width: {}",
|
||||
Integer.toHexString(codePoint),
|
||||
width);
|
||||
return false;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.debug(
|
||||
"Error testing character U+{}: {}",
|
||||
Integer.toHexString(codePoint),
|
||||
e.getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public void clearWidthCache() {
|
||||
widthCache.clear();
|
||||
}
|
||||
|
||||
public void clearReliabilityCache() {
|
||||
reliabilityCache.clear();
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user