diff --git a/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java b/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java index 05c2a9b68..dd5efca48 100644 --- a/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java +++ b/app/core/src/main/java/stirling/software/SPDF/service/RedactionService.java @@ -41,6 +41,7 @@ import org.springframework.web.multipart.MultipartFile; import lombok.AllArgsConstructor; import lombok.Data; +import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -704,55 +705,134 @@ public class RedactionService { } String createPlaceholderWithFont(String originalWord, PDFont font) { - if (originalWord == null || originalWord.isEmpty()) { - return originalWord; - } - if (font != null && TextEncodingHelper.isFontSubset(font.getName())) { - try { - float originalWidth = safeGetStringWidth(font, originalWord) / FONT_SCALE_FACTOR; - return createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f); - } catch (Exception e) { - return ""; + try { + if (originalWord == null || originalWord.isEmpty()) { + log.debug( + "createPlaceholderWithFont: originalWord is null or empty, returning space"); + return " "; } + + if (font != null && TextEncodingHelper.isFontSubset(font.getName())) { + try { + float originalWidth = + safeGetStringWidth(font, originalWord) / FONT_SCALE_FACTOR; + String result = + createAlternativePlaceholder(originalWord, originalWidth, font, 1.0f); + if (result == null) { + log.warn("createAlternativePlaceholder returned null, using fallback"); + return " ".repeat(Math.max(1, originalWord.length())); + } + return result; + } catch (Exception e) { + log.debug( + "Error in createPlaceholderWithFont subset logic: {}", e.getMessage()); + return " ".repeat(Math.max(1, originalWord.length())); + } + } + + int length = Math.max(1, originalWord.length()); + String result = " ".repeat(length); + log.debug("createPlaceholderWithFont: returning '{}' for '{}'", result, originalWord); + return result; + } catch (Exception e) { + log.error("Unexpected error in createPlaceholderWithFont: {}", e.getMessage()); + return " "; } - return " ".repeat(originalWord.length()); } String createPlaceholderWithWidth( String originalWord, float targetWidth, PDFont font, float fontSize) { - if (originalWord == null || originalWord.isEmpty()) { - return originalWord; - } - if (font == null || fontSize <= 0) { - return " ".repeat(originalWord.length()); - } try { + if (originalWord == null || originalWord.isEmpty()) { + log.debug( + "createPlaceholderWithWidth: originalWord is null or empty, returning space"); + return " "; + } + if (font == null || fontSize <= 0) { + int length = Math.max(1, originalWord.length()); + String result = " ".repeat(length); + log.debug( + "createPlaceholderWithWidth: invalid font/size, returning '{}' for '{}'", + result, + originalWord); + return result; + } + if (!WidthCalculator.isWidthCalculationReliable(font)) { - return " ".repeat(originalWord.length()); + int length = Math.max(1, originalWord.length()); + String result = " ".repeat(length); + log.debug( + "createPlaceholderWithWidth: font not reliable, returning '{}' for '{}'", + result, + originalWord); + return result; } + if (TextEncodingHelper.isFontSubset(font.getName())) { - return createSubsetFontPlaceholder(originalWord, targetWidth, font, fontSize); + String result = + createSubsetFontPlaceholder(originalWord, targetWidth, font, fontSize); + if (result == null) { + log.warn("createSubsetFontPlaceholder returned null, using fallback"); + return " ".repeat(Math.max(1, originalWord.length())); + } + log.debug( + "createPlaceholderWithWidth: subset font, returning '{}' for '{}'", + result, + originalWord); + return result; } - float spaceWidth = WidthCalculator.calculateAccurateWidth(font, " ", fontSize); - if (spaceWidth <= 0) { - return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize); + + try { + float spaceWidth = WidthCalculator.calculateAccurateWidth(font, " ", fontSize); + if (spaceWidth <= 0) { + log.debug( + "createPlaceholderWithWidth: invalid space width, using alternative placeholder"); + return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize); + } + + int spaceCount = Math.max(1, Math.round(targetWidth / spaceWidth)); + int maxSpaces = + Math.max( + originalWord.length() * 2, + Math.round(targetWidth / spaceWidth * 1.5f)); + int finalSpaces = Math.min(spaceCount, maxSpaces); + String result = " ".repeat(finalSpaces); + + log.debug( + "createPlaceholderWithWidth: calculated {} spaces for '{}' (targetWidth: {}, spaceWidth: {})", + finalSpaces, + originalWord, + targetWidth, + spaceWidth); + return result; + } catch (Exception e) { + log.debug("Error calculating space width, using alternative: {}", e.getMessage()); + String result = + createAlternativePlaceholder(originalWord, targetWidth, font, fontSize); + if (result == null) { + return " ".repeat(Math.max(1, originalWord.length())); + } + return result; } - int spaceCount = Math.max(1, Math.round(targetWidth / spaceWidth)); - int maxSpaces = - Math.max( - originalWord.length() * 2, Math.round(targetWidth / spaceWidth * 1.5f)); - return " ".repeat(Math.min(spaceCount, maxSpaces)); } catch (Exception e) { - return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize); + log.error("Unexpected error in createPlaceholderWithWidth: {}", e.getMessage()); + return " ".repeat(Math.max(1, originalWord.length())); } } private String createSubsetFontPlaceholder( String originalWord, float targetWidth, PDFont font, float fontSize) { try { - return createAlternativePlaceholder(originalWord, targetWidth, font, fontSize); + String result = createAlternativePlaceholder(originalWord, targetWidth, font, fontSize); + if (result == null) { + log.warn( + "createAlternativePlaceholder returned null in subset font, using fallback"); + return " ".repeat(Math.max(1, originalWord != null ? originalWord.length() : 1)); + } + return result; } catch (Exception e) { - return ""; + log.error("Error in createSubsetFontPlaceholder: {}", e.getMessage()); + return " ".repeat(Math.max(1, originalWord != null ? originalWord.length() : 1)); } } @@ -785,9 +865,12 @@ public class RedactionService { } catch (Exception ignored) { } } - return ""; + log.debug( + "createAlternativePlaceholder: no suitable alternative found, returning spaces"); + return " ".repeat(Math.max(1, originalWord != null ? originalWord.length() : 1)); } catch (Exception e) { - return ""; + log.error("Unexpected error in createAlternativePlaceholder: {}", e.getMessage()); + return " ".repeat(Math.max(1, originalWord != null ? originalWord.length() : 1)); } } @@ -971,7 +1054,7 @@ public class RedactionService { } if (isTextShowingOperator(opName) && i > 0) { String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font); - if (!textContent.isEmpty()) { + if (textContent != null && !textContent.trim().isEmpty()) { if (aggressive && gs.font != null && tokens.get(i - 1) instanceof COSString cs) { @@ -1017,7 +1100,7 @@ public class RedactionService { } if (isTextShowingOperator(opName) && i > 0) { String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font); - if (!textContent.isEmpty()) { + if (textContent != null && !textContent.trim().isEmpty()) { segments.add( new TextSegment( i - 1, @@ -1070,11 +1153,14 @@ public class RedactionService { } List textSegments = extractTextSegments(page, tokens, this.aggressiveMode); String completeText = buildCompleteText(textSegments); - List matches = - this.aggressiveMode - ? findAllMatchesAggressive( - textSegments, tokens, targetWords, useRegex, wholeWordSearch) - : findAllMatches(completeText, targetWords, useRegex, wholeWordSearch); + List matches; + if (this.aggressiveMode) { + matches = + findAllMatchesAggressive( + textSegments, tokens, targetWords, useRegex, wholeWordSearch); + } else { + matches = findMatchesInSegments(textSegments, targetWords, useRegex, wholeWordSearch); + } return applyRedactionsToTokens(tokens, textSegments, matches); } @@ -1329,49 +1415,65 @@ public class RedactionService { } private String applyRedactionsToSegmentText(TextSegment segment, List matches) { + if (segment == null || matches == null || matches.isEmpty()) { + return segment != null && segment.getText() != null ? segment.getText() : ""; + } + String text = segment.getText(); - if (!this.aggressiveMode + if (text == null) return ""; + + if (!aggressiveMode && segment.getFont() != null && !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), text)) { return text; } - StringBuilder result = new StringBuilder(text); - for (MatchRange match : matches) { - int segmentStart = Math.max(0, match.getStartPos() - segment.getStartPos()); - int segmentEnd = Math.min(text.length(), match.getEndPos() - segment.getStartPos()); - if (segmentStart < text.length() && segmentEnd > segmentStart) { - String originalPart = text.substring(segmentStart, segmentEnd); - if (!this.aggressiveMode - && segment.getFont() != null - && !TextEncodingHelper.isTextSegmentRemovable( - segment.getFont(), originalPart)) { - continue; - } + try { + StringBuilder result = new StringBuilder(text); + for (MatchRange match : matches) { + int segmentStart = Math.max(0, match.getStartPos() - segment.getStartPos()); + int segmentEnd = Math.min(text.length(), match.getEndPos() - segment.getStartPos()); - if (this.aggressiveMode) { - result.replace(segmentStart, segmentEnd, ""); - } else { - float originalWidth = 0; - if (segment.getFont() != null && segment.getFontSize() > 0) { - originalWidth = - safeGetStringWidth(segment.getFont(), originalPart) - / FONT_SCALE_FACTOR - * segment.getFontSize(); + if (segmentStart < text.length() && segmentEnd > segmentStart) { + String originalPart = text.substring(segmentStart, segmentEnd); + + if (!aggressiveMode + && segment.getFont() != null + && !TextEncodingHelper.isTextSegmentRemovable( + segment.getFont(), originalPart)) { + continue; + } + + if (aggressiveMode) { + result.replace(segmentStart, segmentEnd, ""); + } else { + float originalWidth = 0; + if (segment.getFont() != null && segment.getFontSize() > 0) { + originalWidth = + safeGetStringWidth(segment.getFont(), originalPart) + / FONT_SCALE_FACTOR + * segment.getFontSize(); + } + + String placeholder = + originalWidth > 0 + ? createPlaceholderWithWidth( + originalPart, + originalWidth, + segment.getFont(), + segment.getFontSize()) + : createPlaceholderWithFont( + originalPart, segment.getFont()); + + if (placeholder == null) placeholder = " "; + result.replace(segmentStart, segmentEnd, placeholder); } - String placeholder = - (originalWidth > 0) - ? createPlaceholderWithWidth( - originalPart, - originalWidth, - segment.getFont(), - segment.getFontSize()) - : createPlaceholderWithFont(originalPart, segment.getFont()); - result.replace(segmentStart, segmentEnd, placeholder); } } + return result.toString(); + } catch (Exception e) { + return text; } - return result.toString(); } private List findAllMatchesAggressive( @@ -1569,6 +1671,50 @@ public class RedactionService { return result; } + private List findMatchesInSegments( + List segments, + Set targetWords, + boolean useRegex, + boolean wholeWordSearch) { + List allMatches = new ArrayList<>(); + List patterns = + TextFinderUtils.createOptimizedSearchPatterns( + targetWords, useRegex, wholeWordSearch); + + for (TextSegment segment : segments) { + String segmentText = segment.getText(); + if (segmentText == null || segmentText.isEmpty()) continue; + + if (segment.getFont() != null + && !TextEncodingHelper.isTextSegmentRemovable(segment.getFont(), segmentText)) { + continue; + } + + for (Pattern pattern : patterns) { + try { + var matcher = pattern.matcher(segmentText); + while (matcher.find()) { + int matchStart = matcher.start(); + int matchEnd = matcher.end(); + + if (matchStart >= 0 + && matchEnd <= segmentText.length() + && matchStart < matchEnd) { + allMatches.add( + new MatchRange( + segment.getStartPos() + matchStart, + segment.getStartPos() + matchEnd)); + } + } + } catch (Exception e) { + } + } + } + + allMatches.sort(Comparator.comparingInt(MatchRange::getStartPos)); + return allMatches; + } + private List applyRedactionsToTokens( List tokens, List textSegments, List matches) { List newTokens = new ArrayList<>(tokens); @@ -1618,24 +1764,97 @@ public class RedactionService { for (Map.Entry> entry : matchesBySegment.entrySet()) { int segmentIndex = entry.getKey(); List segmentMatches = entry.getValue(); + + if (segmentIndex < 0 || segmentIndex >= textSegments.size()) { + log.warn( + "Invalid segment index: {} (textSegments size: {})", + segmentIndex, + textSegments.size()); + continue; + } + TextSegment segment = textSegments.get(segmentIndex); - if ("Tj".equals(segment.operatorName) || "'".equals(segment.operatorName)) { - String newText = applyRedactionsToSegmentText(segment, segmentMatches); - float adjustment = 0; - adjustment = calculateWidthAdjustment(segment, segmentMatches); - tasks.add(new ModificationTask(segment, newText, adjustment)); - } else if ("TJ".equals(segment.operatorName)) { - tasks.add(new ModificationTask(segment, null, 0)); + if (segment == null) { + log.warn("Segment is null at index: {}", segmentIndex); + continue; + } + + log.debug( + "Creating task for segment {} with operator '{}' and {} matches", + segmentIndex, + segment.operatorName, + segmentMatches.size()); + + try { + if ("Tj".equals(segment.operatorName) || "'".equals(segment.operatorName)) { + String newText = applyRedactionsToSegmentText(segment, segmentMatches); + if (newText == null) { + log.warn( + "applyRedactionsToSegmentText returned null for segment {}, using empty string", + segmentIndex); + newText = ""; // Ensure it's never null + } + float adjustment = calculateWidthAdjustment(segment, segmentMatches); + tasks.add(new ModificationTask(segment, newText, adjustment)); + log.debug( + "Created Tj/' task with newText: '{}' (length: {})", + newText, + newText.length()); + } else if ("TJ".equals(segment.operatorName)) { + tasks.add( + new ModificationTask( + segment, "", 0)); // Use empty string instead of null for TJ + log.debug("Created TJ task with empty newText (was null)"); + } else { + log.debug("Skipping segment with operator: {}", segment.operatorName); + } + } catch (Exception e) { + log.error("Error creating task for segment {}: {}", segmentIndex, e.getMessage()); } } tasks.sort((a, b) -> Integer.compare(b.segment.tokenIndex, a.segment.tokenIndex)); - for (ModificationTask task : tasks) { - List segmentMatches = - matchesBySegment.getOrDefault( - textSegments.indexOf(task.segment), Collections.emptyList()); - modifyTokenForRedaction( - newTokens, task.segment, task.newText, task.adjustment, segmentMatches); + + int processedCount = 0; + int maxTasksToProcess = Math.min(tasks.size(), 1000); // Safety limit + + for (int i = 0; i < maxTasksToProcess && i < tasks.size(); i++) { + ModificationTask task = tasks.get(i); + try { + List segmentMatches = + matchesBySegment.getOrDefault( + textSegments.indexOf(task.segment), Collections.emptyList()); + + if (task.segment.tokenIndex >= newTokens.size()) { + log.debug( + "Skipping segment with invalid token index {} (tokens size: {})", + task.segment.tokenIndex, + newTokens.size()); + continue; + } + + if (task.segment.getText() == null || task.segment.getText().isEmpty()) { + log.debug( + "Skipping segment with empty text at index {}", + task.segment.tokenIndex); + continue; + } + + modifyTokenForRedaction( + newTokens, task.segment, task.newText, task.adjustment, segmentMatches); + processedCount++; + + } catch (Exception e) { + log.warn( + "Failed to process modification task for segment at {}: {}", + task.segment.tokenIndex, + e.getMessage()); + } } + + log.debug( + "Successfully processed {} out of {} modification tasks", + processedCount, + tasks.size()); return newTokens; } @@ -1837,20 +2056,67 @@ public class RedactionService { String newText, float adjustment, List matches) { - if (tokens == null - || segment == null - || newText == null - || !isValidTokenIndex(tokens, segment.tokenIndex) - || segment.operatorName == null) { - log.warn("Invalid input to modifyTokenForRedaction"); + // Defensive null handling + if (tokens == null || segment == null) { + log.warn( + "Invalid input to modifyTokenForRedaction: tokens={}, segment={}", + tokens == null ? "null" : "valid", + segment == null ? "null" : "valid"); + return; + } + + // Handle null newText by providing a default + if (newText == null) { + log.warn("newText is null, providing default empty string"); + log.warn( + "Segment details: tokenIndex={}, operatorName={}, font={}, fontSize={}, text='{}'", + segment.tokenIndex, + segment.operatorName, + segment.getFont() != null ? segment.getFont().getName() : "null", + segment.getFontSize(), + segment.getText() != null ? segment.getText() : "null"); + log.warn("This should not happen with the new null safety measures!"); + newText = ""; // Default to empty string + } + if (!isValidTokenIndex(tokens, segment.tokenIndex)) { + log.warn( + "Invalid input to modifyTokenForRedaction: invalid token index {} (tokens size: {})", + segment.tokenIndex, + tokens.size()); + log.debug( + "Segment details: operator={}, font={}, fontSize={}, startPos={}, endPos={}", + segment.operatorName, + segment.getFont(), + segment.getFontSize(), + segment.getStartPos(), + segment.getEndPos()); + return; + } + if (segment.operatorName == null) { + log.warn("Invalid input to modifyTokenForRedaction: operatorName is null"); return; } try { + Object token = tokens.get(segment.tokenIndex); + + if (token == null) { + log.warn("Token at index {} is null, skipping modification", segment.tokenIndex); + return; + } + + if (!isValidTokenForOperator(token, segment.operatorName)) { + log.warn( + "Token at index {} is not valid for operator {}, skipping modification", + segment.tokenIndex, + segment.operatorName); + return; + } + TokenModificationResult result = performTokenModification( tokens, - tokens.get(segment.tokenIndex), + token, segment.operatorName, newText, adjustment, @@ -1860,12 +2126,24 @@ public class RedactionService { if (!result.isSuccess()) { performFallbackModification(tokens, segment.tokenIndex, newText); } + } catch (IndexOutOfBoundsException e) { + log.warn( + "Token index {} is out of bounds (tokens size: {}), skipping modification", + segment.tokenIndex, + tokens.size()); } catch (Exception e) { log.error( "Token modification failed at index {}: {}", segment.tokenIndex, e.getMessage()); - performEmergencyFallback(tokens, segment.tokenIndex); + try { + performEmergencyFallback(tokens, segment.tokenIndex); + } catch (Exception emergencyError) { + log.error( + "Emergency fallback also failed at index {}: {}", + segment.tokenIndex, + emergencyError.getMessage()); + } } } @@ -1873,6 +2151,18 @@ public class RedactionService { return index >= 0 && index < tokens.size(); } + private boolean isValidTokenForOperator(Object token, String operatorName) { + if (token == null || operatorName == null) { + return false; + } + + return switch (operatorName) { + case "Tj", "'", "\"" -> token instanceof COSString; + case "TJ" -> token instanceof COSArray; + default -> true; + }; + } + private COSArray createRedactedTJArray( COSArray originalArray, TextSegment segment, List matches) { @@ -2403,29 +2693,44 @@ public class RedactionService { return totalMods; } - private static class WidthCalculationResult { - private final float adjustment; - private final int processedMatches; - private final List warnings; - - public WidthCalculationResult( - float adjustment, int processedMatches, List warnings) { - this.adjustment = adjustment; - this.processedMatches = processedMatches; - this.warnings = new ArrayList<>(warnings); - } - - public float getAdjustment() { - return adjustment; - } - - public int getProcessedMatches() { - return processedMatches; - } - - public List getWarnings() { - return new ArrayList<>(warnings); + private List extractTextSegmentsFromXObject( + PDResources resources, List tokens) { + List segments = new ArrayList<>(); + int currentTextPos = 0; + GraphicsState gs = new GraphicsState(); + for (int i = 0; i < tokens.size(); i++) { + Object currentToken = tokens.get(i); + if (currentToken instanceof Operator op) { + String opName = op.getName(); + if ("Tf".equals(opName) && i >= 2) { + try { + COSName fontName = (COSName) tokens.get(i - 2); + COSBase fontSizeBase = (COSBase) tokens.get(i - 1); + if (fontSizeBase instanceof COSNumber cosNumber) { + gs.setFont(resources.getFont(fontName)); + gs.setFontSize(cosNumber.floatValue()); + } + } catch (ClassCastException | IOException ignored) { + } + } + if (isTextShowingOperator(opName) && i > 0) { + String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font); + if (textContent != null && !textContent.trim().isEmpty()) { + segments.add( + new TextSegment( + i - 1, + opName, + textContent, + currentTextPos, + currentTextPos + textContent.length(), + gs.font, + gs.fontSize)); + currentTextPos += textContent.length(); + } + } + } } + return segments; } private int wipeAllTextInFormXObject(PDDocument document, PDFormXObject formXObject) @@ -2485,25 +2790,20 @@ public class RedactionService { } } - private static class TokenModificationResult { - private final boolean success; - private final String errorMessage; + private static class WidthCalculationResult { + @Getter private final float adjustment; + @Getter private final int processedMatches; + private final List warnings; - private TokenModificationResult(boolean success, String errorMessage) { - this.success = success; - this.errorMessage = errorMessage; + public WidthCalculationResult( + float adjustment, int processedMatches, List warnings) { + this.adjustment = adjustment; + this.processedMatches = processedMatches; + this.warnings = new ArrayList<>(warnings); } - public static TokenModificationResult success() { - return new TokenModificationResult(true, null); - } - - public static TokenModificationResult failure(String errorMessage) { - return new TokenModificationResult(false, errorMessage); - } - - public boolean isSuccess() { - return success; + public List getWarnings() { + return new ArrayList<>(warnings); } } @@ -2556,44 +2856,22 @@ public class RedactionService { } } - private List extractTextSegmentsFromXObject( - PDResources resources, List tokens) { - List segments = new ArrayList<>(); - int currentTextPos = 0; - GraphicsState gs = new GraphicsState(); - for (int i = 0; i < tokens.size(); i++) { - Object currentToken = tokens.get(i); - if (currentToken instanceof Operator op) { - String opName = op.getName(); - if ("Tf".equals(opName) && i >= 2) { - try { - COSName fontName = (COSName) tokens.get(i - 2); - COSBase fontSizeBase = (COSBase) tokens.get(i - 1); - if (fontSizeBase instanceof COSNumber cosNumber) { - gs.setFont(resources.getFont(fontName)); - gs.setFontSize(cosNumber.floatValue()); - } - } catch (ClassCastException | IOException ignored) { - } - } - if (isTextShowingOperator(opName) && i > 0) { - String textContent = extractTextFromToken(tokens.get(i - 1), opName, gs.font); - if (!textContent.isEmpty()) { - segments.add( - new TextSegment( - i - 1, - opName, - textContent, - currentTextPos, - currentTextPos + textContent.length(), - gs.font, - gs.fontSize)); - currentTextPos += textContent.length(); - } - } - } + private static class TokenModificationResult { + @Getter private final boolean success; + private final String errorMessage; + + private TokenModificationResult(boolean success, String errorMessage) { + this.success = success; + this.errorMessage = errorMessage; + } + + public static TokenModificationResult success() { + return new TokenModificationResult(true, null); + } + + public static TokenModificationResult failure(String errorMessage) { + return new TokenModificationResult(false, errorMessage); } - return segments; } @Data diff --git a/app/core/src/main/java/stirling/software/SPDF/utils/text/WidthCalculator.java b/app/core/src/main/java/stirling/software/SPDF/utils/text/WidthCalculator.java index 5c15277e2..369116648 100644 --- a/app/core/src/main/java/stirling/software/SPDF/utils/text/WidthCalculator.java +++ b/app/core/src/main/java/stirling/software/SPDF/utils/text/WidthCalculator.java @@ -1,7 +1,14 @@ package stirling.software.SPDF.utils.text; +import java.text.Normalizer; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType0Font; import lombok.experimental.UtilityClass; import lombok.extern.slf4j.Slf4j; @@ -11,127 +18,572 @@ import lombok.extern.slf4j.Slf4j; public class WidthCalculator { private final int FONT_SCALE_FACTOR = 1000; + private final float CONSERVATIVE_CHAR_WIDTH_RATIO = 0.55f; + private final float BBOX_CHAR_WIDTH_RATIO = 0.65f; + + private final Map widthCache = new ConcurrentHashMap<>(); + private final Map reliabilityCache = new ConcurrentHashMap<>(); + + private String createCacheKey(PDFont font, String text, float fontSize) { + return String.format("%s|%s|%.2f", font.getName(), text, fontSize); + } + + private String createReliabilityCacheKey(PDFont font) { + return font.getName(); + } public float calculateAccurateWidth(PDFont font, String text, float fontSize) { - if (font == null || text == null || text.isEmpty() || fontSize <= 0) { - return 0; + return calculateAccurateWidth(font, text, fontSize, true); + } + + public float calculateAccurateWidth( + PDFont font, String text, float fontSize, boolean useCache) { + if (font == null || text == null || text.isEmpty() || fontSize <= 0) return 0; + + if (useCache) { + String cacheKey = createCacheKey(font, text, fontSize); + Float cachedWidth = widthCache.get(cacheKey); + if (cachedWidth != null) return cachedWidth; } - if (!TextEncodingHelper.canEncodeCharacters(font, text)) { - log.debug( - "Text cannot be encoded by font {}, using fallback width calculation", - font.getName()); - return calculateFallbackWidth(font, text, fontSize); + String normalizedText = normalizeText(text); + + Float directWidth = calculateDirectWidth(font, normalizedText, fontSize); + if (directWidth != null) { + if (useCache) widthCache.put(createCacheKey(font, text, fontSize), directWidth); + return directWidth; } + Float charByCharWidth = calculateCharacterByCharacterWidth(font, normalizedText, fontSize); + if (charByCharWidth != null) { + if (useCache) widthCache.put(createCacheKey(font, text, fontSize), charByCharWidth); + return charByCharWidth; + } + + Float glyphWidth = calculateGlyphBasedWidth(font, normalizedText, fontSize); + if (glyphWidth != null) { + if (useCache) widthCache.put(createCacheKey(font, text, fontSize), glyphWidth); + return glyphWidth; + } + + float fallbackWidth = calculateComprehensiveFallbackWidth(font, normalizedText, fontSize); + if (useCache) widthCache.put(createCacheKey(font, text, fontSize), fallbackWidth); + return fallbackWidth; + } + + private String normalizeText(String text) { + return Normalizer.normalize(text, Normalizer.Form.NFC); + } + + private Float calculateDirectWidth(PDFont font, String text, float fontSize) { + if (!TextEncodingHelper.canEncodeCharacters(font, text)) return null; + try { float rawWidth = font.getStringWidth(text); float scaledWidth = (rawWidth / FONT_SCALE_FACTOR) * fontSize; - - log.debug( - "Direct width calculation successful for font {}: {} -> {}", - font.getName(), - rawWidth, - scaledWidth); - return scaledWidth; - + return rawWidth >= 0 && scaledWidth >= 0 ? scaledWidth : null; } catch (Exception e) { - log.debug( - "Direct width calculation failed for font {}: {}", - font.getName(), - e.getMessage()); - return calculateWidthWithCharacterIteration(font, text, fontSize); + return null; } } - private float calculateWidthWithCharacterIteration(PDFont font, String text, float fontSize) { + private Float calculateCharacterByCharacterWidth(PDFont font, String text, float fontSize) { try { + List codePoints = getCodePoints(text); float totalWidth = 0; + int previousCodePoint = -1; - for (int i = 0; i < text.length(); i++) { - String character = text.substring(i, i + 1); + for (int codePoint : codePoints) { + String character = new String(Character.toChars(codePoint)); + Float charWidth = calculateSingleCharacterWidth(font, character, fontSize); + if (charWidth == null) return null; + + totalWidth += charWidth; + if (previousCodePoint != -1) { + totalWidth += calculateKerning(font, previousCodePoint, codePoint, fontSize); + } + previousCodePoint = codePoint; + } + return totalWidth; + } catch (Exception e) { + return null; + } + } + + private List getCodePoints(String text) { + List codePoints = new ArrayList<>(); + for (int i = 0; i < text.length(); ) { + int codePoint = text.codePointAt(i); + codePoints.add(codePoint); + i += Character.charCount(codePoint); + } + return codePoints; + } + + private Float calculateSingleCharacterWidth(PDFont font, String character, float fontSize) { + try { + byte[] encoded = null; + + try { + encoded = font.encode(character); + if (encoded.length == 0) encoded = null; + } catch (Exception e) { + log.debug("Direct encoding failed for '{}': {}", character, e.getMessage()); + } + + if (encoded == null && font instanceof PDType0Font) { try { - byte[] encoded = font.encode(character); - if (encoded.length > 0) { - int glyphCode = encoded[0] & 0xFF; - float glyphWidth = font.getWidth(glyphCode); - - if (glyphWidth == 0) { - try { - glyphWidth = font.getWidthFromFont(glyphCode); - } catch (Exception e2) { - glyphWidth = font.getAverageFontWidth(); - } - } - - totalWidth += (glyphWidth / FONT_SCALE_FACTOR) * fontSize; - } else { - totalWidth += (font.getAverageFontWidth() / FONT_SCALE_FACTOR) * fontSize; - } - } catch (Exception e2) { - totalWidth += (font.getAverageFontWidth() / FONT_SCALE_FACTOR) * fontSize; + encoded = character.getBytes("UTF-8"); + } catch (Exception e) { + log.debug("UTF-8 encoding failed for '{}': {}", character, e.getMessage()); } } - log.debug("Character iteration width calculation: {}", totalWidth); - return totalWidth; + if (encoded != null && encoded.length > 0) { + Float width = calculateGlyphWidth(font, encoded, fontSize); + if (width != null && width >= 0) return width; + } + + return calculateAverageCharacterWidth(font, fontSize); } catch (Exception e) { - log.debug("Character iteration failed: {}", e.getMessage()); - return calculateFallbackWidth(font, text, fontSize); + log.debug( + "Single character width calculation failed for '{}': {}", + character, + e.getMessage()); + return calculateAverageCharacterWidth(font, fontSize); } } - private float calculateFallbackWidth(PDFont font, String text, float fontSize) { + private Float calculateGlyphWidth(PDFont font, byte[] encoded, float fontSize) { + for (byte b : encoded) { + try { + int glyphCode = b & 0xFF; + float glyphWidth = font.getWidth(glyphCode); + + if (glyphWidth > 0) { + return (glyphWidth / FONT_SCALE_FACTOR) * fontSize; + } + + // Try alternative width methods + try { + glyphWidth = font.getWidthFromFont(glyphCode); + if (glyphWidth > 0) { + return (glyphWidth / FONT_SCALE_FACTOR) * fontSize; + } + } catch (Exception e) { + log.debug( + "getWidthFromFont failed for glyph {}: {}", glyphCode, e.getMessage()); + } + + } catch (Exception e) { + log.debug("Glyph width calculation failed for byte {}: {}", b, e.getMessage()); + } + } + return null; + } + + private float calculateKerning( + PDFont font, int leftCodePoint, int rightCodePoint, float fontSize) { + return 0; + } + + private Float calculateGlyphBasedWidth(PDFont font, String text, float fontSize) { try { + float totalWidth = 0; + + for (int i = 0; i < text.length(); ) { + int codePoint = text.codePointAt(i); + String character = new String(Character.toChars(codePoint)); + + // Try to get glyph information more comprehensively + Float charWidth = + calculateGlyphWidthComprehensively(font, character, codePoint, fontSize); + if (charWidth == null) { + return null; + } + + totalWidth += charWidth; + i += Character.charCount(codePoint); + } + + log.debug("Glyph-based width calculation: {}", totalWidth); + return totalWidth; + + } catch (Exception e) { + log.debug("Glyph-based calculation failed: {}", e.getMessage()); + return null; + } + } + + private Float calculateGlyphWidthComprehensively( + PDFont font, String character, int codePoint, float fontSize) { + try { + // Method 1: Try standard encoding + try { + byte[] encoded = font.encode(character); + if (encoded.length > 0) { + Float width = calculateWidthFromEncodedBytes(font, encoded, fontSize); + if (width != null && width >= 0) { + return width; + } + } + } catch (Exception e) { + log.debug( + "Standard encoding failed for U+{}: {}", + Integer.toHexString(codePoint), + e.getMessage()); + } + + // Method 2: Try Unicode code point directly + try { + float glyphWidth = font.getWidth(codePoint); + if (glyphWidth > 0) { + return (glyphWidth / FONT_SCALE_FACTOR) * fontSize; + } + } catch (Exception e) { + log.debug( + "Unicode code point width failed for U+{}: {}", + Integer.toHexString(codePoint), + e.getMessage()); + } + + // Method 3: Character category based estimation + return calculateCategoryBasedWidth(font, codePoint, fontSize); + + } catch (Exception e) { + log.debug("Comprehensive glyph width calculation failed: {}", e.getMessage()); + return calculateAverageCharacterWidth(font, fontSize); + } + } + + private Float calculateWidthFromEncodedBytes(PDFont font, byte[] encoded, float fontSize) { + // Try each byte as a potential glyph code + for (byte b : encoded) { + try { + int glyphCode = b & 0xFF; + float width = font.getWidth(glyphCode); + if (width > 0) { + return (width / FONT_SCALE_FACTOR) * fontSize; + } + } catch (Exception e) { + // Continue trying other bytes + } + } + + // Try multi-byte interpretation for Unicode fonts + if (encoded.length >= 2 && font instanceof PDType0Font) { + try { + int glyphCode = ((encoded[0] & 0xFF) << 8) | (encoded[1] & 0xFF); + float width = font.getWidth(glyphCode); + if (width > 0) { + return (width / FONT_SCALE_FACTOR) * fontSize; + } + } catch (Exception e) { + log.debug("Multi-byte glyph code interpretation failed: {}", e.getMessage()); + } + } + + return null; + } + + private Float calculateCategoryBasedWidth(PDFont font, int codePoint, float fontSize) { + try { + int category = Character.getType(codePoint); + float baseWidth = calculateAverageCharacterWidth(font, fontSize); + + // Adjust width based on character category + float multiplier = + switch (category) { + case Character.UPPERCASE_LETTER -> 1.2f; + case Character.LOWERCASE_LETTER -> 1.0f; + case Character.DECIMAL_DIGIT_NUMBER -> 1.0f; + case Character.SPACE_SEPARATOR -> 0.5f; + case Character.DASH_PUNCTUATION -> 0.8f; + case Character.OTHER_PUNCTUATION -> 0.6f; + case Character.CURRENCY_SYMBOL -> 1.1f; + case Character.MATH_SYMBOL -> 1.0f; + case Character.MODIFIER_LETTER -> 0.7f; + case Character.NON_SPACING_MARK -> 0.0f; // Combining characters + case Character.ENCLOSING_MARK -> 0.0f; + case Character.COMBINING_SPACING_MARK -> 0.3f; + default -> 1.0f; + }; + + return baseWidth * multiplier; + } catch (Exception e) { + log.debug("Category-based width calculation failed: {}", e.getMessage()); + return calculateAverageCharacterWidth(font, fontSize); + } + } + + private float calculateAverageCharacterWidth(PDFont font, float fontSize) { + try { + float avgWidth = font.getAverageFontWidth(); + return (avgWidth / FONT_SCALE_FACTOR) * fontSize; + } catch (Exception e) { + log.debug("Average character width calculation failed: {}", e.getMessage()); + return CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize; + } + } + + private float calculateComprehensiveFallbackWidth(PDFont font, String text, float fontSize) { + try { + // Strategy 1: Use font bounding box with character analysis if (font.getFontDescriptor() != null && font.getFontDescriptor().getFontBoundingBox() != null) { PDRectangle bbox = font.getFontDescriptor().getFontBoundingBox(); - float avgCharWidth = - bbox.getWidth() / FONT_SCALE_FACTOR * 0.6f; // Conservative estimate - float fallbackWidth = text.length() * avgCharWidth * fontSize; + float avgCharWidth = bbox.getWidth() / FONT_SCALE_FACTOR; - log.debug("Bounding box fallback width: {}", fallbackWidth); - return fallbackWidth; + // Analyze text composition for better estimation + float adjustedWidth = analyzeTextComposition(text, avgCharWidth, fontSize); + log.debug("Bounding box based fallback width: {}", adjustedWidth); + return adjustedWidth; } - float avgWidth = font.getAverageFontWidth(); - float fallbackWidth = (text.length() * avgWidth / FONT_SCALE_FACTOR) * fontSize; - - log.debug("Average width fallback: {}", fallbackWidth); - return fallbackWidth; + // Strategy 2: Enhanced average width calculation + float enhancedAverage = calculateEnhancedAverageWidth(font, text, fontSize); + log.debug("Enhanced average fallback width: {}", enhancedAverage); + return enhancedAverage; } catch (Exception e) { - float conservativeWidth = text.length() * 0.5f * fontSize; - log.debug( - "Conservative fallback width for font {}: {}", - font.getName(), - conservativeWidth); + // Ultimate fallback + float conservativeWidth = text.length() * CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize; + log.debug("Conservative fallback width: {}", conservativeWidth); return conservativeWidth; } } + private float analyzeTextComposition(String text, float avgCharWidth, float fontSize) { + float totalWidth = 0; + int spaceCount = 0; + int upperCount = 0; + int lowerCount = 0; + int digitCount = 0; + int punctCount = 0; + + for (int i = 0; i < text.length(); ) { + int codePoint = text.codePointAt(i); + int category = Character.getType(codePoint); + + switch (category) { + case Character.SPACE_SEPARATOR -> { + spaceCount++; + totalWidth += avgCharWidth * 0.5f * fontSize; + } + case Character.UPPERCASE_LETTER -> { + upperCount++; + totalWidth += avgCharWidth * 1.2f * fontSize; + } + case Character.LOWERCASE_LETTER -> { + lowerCount++; + totalWidth += avgCharWidth * 1.0f * fontSize; + } + case Character.DECIMAL_DIGIT_NUMBER -> { + digitCount++; + totalWidth += avgCharWidth * 1.0f * fontSize; + } + case Character.OTHER_PUNCTUATION, Character.DASH_PUNCTUATION -> { + punctCount++; + totalWidth += avgCharWidth * 0.7f * fontSize; + } + default -> totalWidth += avgCharWidth * BBOX_CHAR_WIDTH_RATIO * fontSize; + } + + i += Character.charCount(codePoint); + } + + // Log composition analysis for debugging + log.debug( + "Text composition analysis - Spaces: {}, Upper: {}, Lower: {}, Digits: {}, Punct: {}", + spaceCount, + upperCount, + lowerCount, + digitCount, + punctCount); + + return totalWidth; + } + + private float calculateEnhancedAverageWidth(PDFont font, String text, float fontSize) { + try { + float baseAverage = font.getAverageFontWidth(); + + // Try to get more specific metrics + float capHeight = 0; + float xHeight = 0; + + if (font.getFontDescriptor() != null) { + capHeight = font.getFontDescriptor().getCapHeight(); + xHeight = font.getFontDescriptor().getXHeight(); + } + + // Use metrics to adjust the average width estimation + float adjustmentFactor = 1.0f; + if (capHeight > 0 && xHeight > 0) { + adjustmentFactor = Math.max(0.8f, Math.min(1.2f, xHeight / capHeight)); + } + + float adjustedAverage = (baseAverage * adjustmentFactor / FONT_SCALE_FACTOR) * fontSize; + return text.length() * adjustedAverage; + + } catch (Exception e) { + log.debug("Enhanced average width calculation failed: {}", e.getMessage()); + return text.length() * CONSERVATIVE_CHAR_WIDTH_RATIO * fontSize; + } + } + public boolean isWidthCalculationReliable(PDFont font) { if (font == null) { return false; } - if (font.isDamaged()) { - log.debug("Font {} is damaged", font.getName()); - return false; + // Check cache first + String cacheKey = createReliabilityCacheKey(font); + Boolean cachedResult = reliabilityCache.get(cacheKey); + if (cachedResult != null) { + log.debug( + "Using cached reliability result for font {}: {}", + font.getName(), + cachedResult); + return cachedResult; } - if (!TextEncodingHelper.canCalculateBasicWidths(font)) { - log.debug("Font {} cannot perform basic width calculations", font.getName()); + boolean result = performReliabilityCheck(font); + + // Cache the result + reliabilityCache.put(cacheKey, result); + return result; + } + + private boolean performReliabilityCheck(PDFont font) { + try { + // Check if font is damaged + if (font.isDamaged()) { + log.debug("Font {} is damaged", font.getName()); + return false; + } + + // Check basic width calculation capability + if (!TextEncodingHelper.canCalculateBasicWidths(font)) { + log.debug("Font {} cannot perform basic width calculations", font.getName()); + return false; + } + + // Test with a simple character + try { + font.getStringWidth("A"); + return true; + } catch (Exception e) { + log.debug("Font {} failed basic width test: {}", font.getName(), e.getMessage()); + } + + // Check if we can at least get average width + try { + float avgWidth = font.getAverageFontWidth(); + return avgWidth > 0; + } catch (Exception e) { + log.debug( + "Font {} cannot provide average width: {}", font.getName(), e.getMessage()); + } + + return false; + + } catch (Exception e) { + log.debug("Reliability check failed for font {}: {}", font.getName(), e.getMessage()); return false; } + } - if (TextEncodingHelper.hasCustomEncoding(font)) { - log.debug("Font {} has custom encoding", font.getName()); - return false; + public float calculateCharacterWidth(PDFont font, String character, float fontSize) { + if (font == null || character == null || character.isEmpty() || fontSize <= 0) return 0; + + String cacheKey = createCacheKey(font, character, fontSize); + Float cachedWidth = widthCache.get(cacheKey); + if (cachedWidth != null) return cachedWidth; + + Float width = calculateSingleCharacterWidth(font, character, fontSize); + if (width == null) width = calculateAverageCharacterWidth(font, fontSize); + + widthCache.put(cacheKey, width); + return width; + } + + public String createWidthMatchingPlaceholder( + String originalText, + float targetWidth, + PDFont font, + float fontSize, + String placeholderChar) { + if (originalText == null || originalText.isEmpty() || targetWidth <= 0) return ""; + + if (placeholderChar == null || placeholderChar.isEmpty()) placeholderChar = " "; + + try { + float placeholderCharWidth = calculateCharacterWidth(font, placeholderChar, fontSize); + if (placeholderCharWidth <= 0) { + return " ".repeat(Math.max(1, originalText.length())); + } + + int placeholderCount = Math.max(1, Math.round(targetWidth / placeholderCharWidth)); + int originalLength = originalText.length(); + int maxReasonableLength = Math.max(originalLength * 3, Math.max(placeholderCount, 10)); + placeholderCount = Math.min(placeholderCount, maxReasonableLength); + placeholderCount = Math.max(1, placeholderCount); + + return placeholderChar.repeat(placeholderCount); + + } catch (Exception e) { + return " ".repeat(Math.max(1, originalText.length())); + } + } + + public boolean canCalculateTextWidth(PDFont font, String text) { + if (font == null || text == null || text.isEmpty()) return false; + if (!isWidthCalculationReliable(font)) return false; + + List codePoints = getCodePoints(text); + int testSampleSize = Math.min(5, codePoints.size()); + + for (int i = 0; i < testSampleSize; i++) { + int codePoint = codePoints.get(i); + String character = new String(Character.toChars(codePoint)); + + try { + if (!TextEncodingHelper.canEncodeCharacters(font, character)) { + log.debug( + "Cannot encode character U+{} in text '{}'", + Integer.toHexString(codePoint), + text); + return false; + } + + float width = calculateCharacterWidth(font, character, 12.0f); + if (width <= 0) { + log.debug( + "Character U+{} has invalid width: {}", + Integer.toHexString(codePoint), + width); + return false; + } + } catch (Exception e) { + log.debug( + "Error testing character U+{}: {}", + Integer.toHexString(codePoint), + e.getMessage()); + return false; + } } return true; } + + public void clearWidthCache() { + widthCache.clear(); + } + + public void clearReliabilityCache() { + reliabilityCache.clear(); + } }