paragraphs? :o

This commit is contained in:
Anthony Stirling 2025-11-10 22:55:16 +00:00
parent 5fadb92f51
commit 3ed62c8dbf
7 changed files with 443 additions and 93 deletions

View File

@ -1,5 +1,6 @@
multipart.enabled=true
logging.level.org.springframework=WARN
logging.level.org.springframework.security=WARN
logging.level.org.hibernate=WARN
logging.level.org.eclipse.jetty=WARN
#logging.level.org.springframework.security.oauth2=DEBUG

View File

@ -4498,8 +4498,17 @@
"forceSingleElement": {
"title": "Lock edited text to a single PDF element",
"description": "When enabled, the editor exports each edited text box as one PDF text element to avoid overlapping glyphs or mixed fonts."
},
"textGroupingMode": {
"title": "Text grouping mode",
"description": "Paragraph mode merges aligned lines into one textbox; single-line mode keeps every PDF line separate. Auto picks the best option per page."
}
},
"grouping": {
"auto": "Auto",
"paragraph": "Paragraph",
"single": "Single Line"
},
"disclaimer": {
"heading": "Preview limitations",
"textFocus": "This workspace focuses on editing text and repositioning embedded images. Complex page artwork, form widgets, and layered graphics are preserved for export but are not fully editable here.",

View File

@ -36,7 +36,7 @@ function persistRedirectPath(path: string): void {
try {
document.cookie = `${OAUTH_REDIRECT_COOKIE}=${encodeURIComponent(path)}; path=/; max-age=${OAUTH_REDIRECT_COOKIE_MAX_AGE}; SameSite=Lax`;
} catch (error) {
console.warn('[SpringAuth] Failed to persist OAuth redirect path', error);
// console.warn('[SpringAuth] Failed to persist OAuth redirect path', error);
}
}
@ -113,21 +113,21 @@ class SpringAuthClient {
const token = localStorage.getItem('stirling_jwt');
if (!token) {
console.debug('[SpringAuth] getSession: No JWT in localStorage');
// console.debug('[SpringAuth] getSession: No JWT in localStorage');
return { data: { session: null }, error: null };
}
// Verify with backend
console.debug('[SpringAuth] getSession: Verifying JWT with /api/v1/auth/me');
// console.debug('[SpringAuth] getSession: Verifying JWT with /api/v1/auth/me');
const response = await fetch('/api/v1/auth/me', {
headers: {
'Authorization': `Bearer ${token}`,
},
});
console.debug('[SpringAuth] /me response status:', response.status);
// console.debug('[SpringAuth] /me response status:', response.status);
const contentType = response.headers.get('content-type');
console.debug('[SpringAuth] /me content-type:', contentType);
// console.debug('[SpringAuth] /me content-type:', contentType);
if (!response.ok) {
// Log the error response for debugging
@ -140,7 +140,7 @@ class SpringAuthClient {
// Token invalid or expired - clear it
localStorage.removeItem('stirling_jwt');
console.warn('[SpringAuth] getSession: Cleared invalid JWT from localStorage');
// console.warn('[SpringAuth] getSession: Cleared invalid JWT from localStorage');
return { data: { session: null }, error: { message: `Auth failed: ${response.status}` } };
}
@ -155,7 +155,7 @@ class SpringAuthClient {
}
const data = await response.json();
console.debug('[SpringAuth] /me response data:', data);
// console.debug('[SpringAuth] /me response data:', data);
// Create session object
const session: Session = {
@ -165,7 +165,7 @@ class SpringAuthClient {
expires_at: Date.now() + 3600 * 1000,
};
console.debug('[SpringAuth] getSession: Session retrieved successfully');
// console.debug('[SpringAuth] getSession: Session retrieved successfully');
return { data: { session }, error: null };
} catch (error) {
console.error('[SpringAuth] getSession error:', error);
@ -206,7 +206,7 @@ class SpringAuthClient {
// Store JWT in localStorage
localStorage.setItem('stirling_jwt', token);
console.log('[SpringAuth] JWT stored in localStorage');
// console.log('[SpringAuth] JWT stored in localStorage');
// Dispatch custom event for other components to react to JWT availability
window.dispatchEvent(new CustomEvent('jwt-available'));
@ -285,7 +285,7 @@ class SpringAuthClient {
// Redirect to Spring OAuth2 endpoint (Vite will proxy to backend)
const redirectUrl = `/oauth2/authorization/${params.provider}`;
console.log('[SpringAuth] Redirecting to OAuth:', redirectUrl);
// console.log('[SpringAuth] Redirecting to OAuth:', redirectUrl);
// Use window.location.assign for full page navigation
window.location.assign(redirectUrl);
return { error: null };
@ -303,7 +303,7 @@ class SpringAuthClient {
try {
// Clear JWT from localStorage immediately
localStorage.removeItem('stirling_jwt');
console.log('[SpringAuth] JWT removed from localStorage');
// console.log('[SpringAuth] JWT removed from localStorage');
const csrfToken = this.getCsrfToken();
const headers: HeadersInit = {};
@ -446,7 +446,7 @@ class SpringAuthClient {
// Refresh if token expires soon
if (timeUntilExpiry > 0 && timeUntilExpiry < this.TOKEN_REFRESH_THRESHOLD) {
console.log('[SpringAuth] Proactively refreshing token');
// console.log('[SpringAuth] Proactively refreshing token');
await this.refreshSession();
}
}

View File

@ -245,6 +245,26 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
onForceSingleTextElementChange,
} = data;
const syncEditorValue = useCallback(
(element: HTMLElement, pageIndex: number, groupId: string) => {
const value = element.innerText.replace(/\u00A0/g, ' ');
const offset = getCaretOffset(element);
caretOffsetsRef.current.set(groupId, offset);
onGroupEdit(pageIndex, groupId, value);
requestAnimationFrame(() => {
if (editingGroupId !== groupId) {
return;
}
const editor = editorRefs.current.get(groupId);
if (editor) {
const savedOffset = caretOffsetsRef.current.get(groupId) ?? editor.innerText.length;
setCaretOffset(editor, savedOffset);
}
});
},
[editingGroupId, onGroupEdit],
);
const resolveFont = (fontId: string | null | undefined, pageIndex: number | null | undefined): PdfJsonFont | null => {
if (!fontId || !pdfDocument?.fonts) {
return null;
@ -646,7 +666,14 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
// Measure text widths once per page/configuration and apply static scaling
useLayoutEffect(() => {
if (!autoScaleText || visibleGroups.length === 0) {
if (!autoScaleText) {
// Clear all scales when auto-scale is disabled
setTextScales(new Map());
measurementKeyRef.current = '';
return;
}
if (visibleGroups.length === 0) {
return;
}
@ -667,6 +694,13 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
return;
}
// Skip multi-line paragraphs - auto-scaling doesn't work well with wrapped text
const lineCount = (group.text || '').split('\n').length;
if (lineCount > 1) {
newScales.set(group.id, 1);
return;
}
const element = document.querySelector<HTMLElement>(`[data-text-group="${group.id}"]`);
if (!element) {
return;
@ -705,7 +739,16 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
// Delay measurement to ensure fonts and layout are ready
const timer = setTimeout(measureTextScales, 150);
return () => clearTimeout(timer);
}, [autoScaleText, visibleGroups, editingGroupId, currentPage, pageHeight, scale, fontFamilies.size, selectedPage]);
}, [
autoScaleText,
visibleGroups,
editingGroupId,
currentPage,
pageHeight,
scale,
fontFamilies.size,
selectedPage,
]);
useLayoutEffect(() => {
// Only restore caret position during re-renders while already editing
@ -792,7 +835,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
}}
>
{content}
{activeGroupId === groupId && editingGroupId !== groupId && (
{activeGroupId === groupId && (
<ActionIcon
size="xs"
variant="filled"
@ -956,6 +999,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
onChange={(event) => onForceSingleTextElementChange(event.currentTarget.checked)}
/>
</Group>
</Stack>
</Card>
@ -1325,11 +1369,24 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
if (fontSizePx > 0) {
lineHeightRatio = Math.max(lineHeightRatio, lineHeightPx / fontSizePx);
}
const detectedSpacingPx =
group.lineSpacing && group.lineSpacing > 0 ? group.lineSpacing * scale : undefined;
if (detectedSpacingPx && detectedSpacingPx > 0) {
lineHeightPx = Math.max(lineHeightPx, detectedSpacingPx);
if (fontSizePx > 0) {
lineHeightRatio = Math.max(lineHeightRatio, detectedSpacingPx / fontSizePx);
}
}
const lineCount = Math.max(group.text.split('\n').length, 1);
const paragraphHeightPx =
lineCount > 1
? lineHeightPx + (lineCount - 1) * (detectedSpacingPx ?? lineHeightPx)
: lineHeightPx;
let containerLeft = bounds.left;
let containerTop = bounds.top;
let containerWidth = Math.max(bounds.width, fontSizePx);
let containerHeight = Math.max(bounds.height, lineHeightPx);
let containerHeight = Math.max(bounds.height, paragraphHeightPx);
let transform: string | undefined;
let transformOrigin: React.CSSProperties['transformOrigin'];
@ -1349,7 +1406,13 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
containerTop = anchorTop - containerHeight;
}
if (!hasRotation && group.baseline !== null && group.baseline !== undefined && geometry) {
if (
lineCount === 1 &&
!hasRotation &&
group.baseline !== null &&
group.baseline !== undefined &&
geometry
) {
const cssBaselineTop = (pageHeight - group.baseline) * scale;
containerTop = Math.max(cssBaselineTop - ascentPx, 0);
containerHeight = Math.max(containerHeight, ascentPx + descentPx);
@ -1364,7 +1427,8 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
left: `${containerLeft}px`,
top: `${containerTop}px`,
width: `${containerWidth}px`,
height: `${containerHeight}px`,
height: isEditing ? 'auto' : `${containerHeight}px`,
minHeight: `${containerHeight}px`,
display: 'flex',
alignItems: 'flex-start',
justifyContent: 'flex-start',
@ -1423,23 +1487,12 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
setEditingGroupId(null);
}}
onInput={(event) => {
const value = event.currentTarget.innerText.replace(/\u00A0/g, ' ');
const offset = getCaretOffset(event.currentTarget);
caretOffsetsRef.current.set(group.id, offset);
onGroupEdit(group.pageIndex, group.id, value);
requestAnimationFrame(() => {
if (editingGroupId !== group.id) {
return;
}
const editor = editorRefs.current.get(group.id);
if (editor) {
setCaretOffset(editor, caretOffsetsRef.current.get(group.id) ?? editor.innerText.length);
}
});
syncEditorValue(event.currentTarget, group.pageIndex, group.id);
}}
style={{
width: '100%',
height: '100%',
minHeight: '100%',
height: 'auto',
padding: 0,
backgroundColor: 'rgba(255,255,255,0.95)',
color: textColor,
@ -1486,7 +1539,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
color: textColor,
display: 'block',
cursor: 'text',
overflow: 'visible',
overflow: 'hidden',
}}
>
<span
@ -1496,6 +1549,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
display: 'inline-block',
transform: shouldScale ? `scaleX(${textScale})` : undefined,
transformOrigin: 'left center',
whiteSpace: 'pre',
}}
>
{group.text || '\u00A0'}
@ -1503,57 +1557,43 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
</div>,
undefined,
(event: React.MouseEvent) => {
// Double-click to edit
if (event.detail === 2) {
// Capture click position BEFORE switching to edit mode
const clickX = event.clientX;
const clickY = event.clientY;
const clickX = event.clientX;
const clickY = event.clientY;
setEditingGroupId(group.id);
setActiveGroupId(group.id);
setActiveGroupId(group.id);
setEditingGroupId(group.id);
caretOffsetsRef.current.delete(group.id);
// Clear any stored offset to prevent interference
caretOffsetsRef.current.delete(group.id);
requestAnimationFrame(() => {
const editor = document.querySelector<HTMLElement>(`[data-editor-group="${group.id}"]`);
if (!editor) return;
editor.focus();
// Wait for editor to render, then position cursor at click location
requestAnimationFrame(() => {
const editor = document.querySelector<HTMLElement>(`[data-editor-group="${group.id}"]`);
if (!editor) return;
// Focus the editor first
editor.focus();
// Use caretRangeFromPoint to position cursor at click coordinates
setTimeout(() => {
if (document.caretRangeFromPoint) {
const range = document.caretRangeFromPoint(clickX, clickY);
if (range) {
const selection = window.getSelection();
if (selection) {
selection.removeAllRanges();
selection.addRange(range);
}
}
} else if ((document as any).caretPositionFromPoint) {
// Firefox fallback
const pos = (document as any).caretPositionFromPoint(clickX, clickY);
if (pos) {
const range = document.createRange();
range.setStart(pos.offsetNode, pos.offset);
range.collapse(true);
const selection = window.getSelection();
if (selection) {
selection.removeAllRanges();
selection.addRange(range);
}
setTimeout(() => {
if (document.caretRangeFromPoint) {
const range = document.caretRangeFromPoint(clickX, clickY);
if (range) {
const selection = window.getSelection();
if (selection) {
selection.removeAllRanges();
selection.addRange(range);
}
}
}, 10);
});
} else {
// Single click just selects
setActiveGroupId(group.id);
}
} else if ((document as any).caretPositionFromPoint) {
const pos = (document as any).caretPositionFromPoint(clickX, clickY);
if (pos) {
const range = document.createRange();
range.setStart(pos.offsetNode, pos.offset);
range.collapse(true);
const selection = window.getSelection();
if (selection) {
selection.removeAllRanges();
selection.addRange(range);
}
}
}
}, 10);
});
},
)}
</Box>

View File

@ -1028,6 +1028,7 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
selectedPage,
forceSingleTextElement,
requestPagePreview,
setForceSingleTextElement,
]);
const latestViewDataRef = useRef<PdfJsonEditorViewData>(viewData);

View File

@ -155,6 +155,8 @@ export interface TextGroup {
fontId?: string | null;
fontSize?: number | null;
fontMatrixSize?: number | null;
lineSpacing?: number | null;
lineElementCounts?: number[] | null;
color?: string | null;
fontWeight?: number | 'normal' | 'bold' | null;
rotation?: number | null;

View File

@ -24,6 +24,54 @@ type FontMetrics = {
type FontMetricsMap = Map<string, FontMetrics>;
const sanitizeParagraphText = (text: string | undefined | null): string => {
if (!text) {
return '';
}
return text.replace(/\r?\n/g, '');
};
const splitParagraphIntoLines = (text: string | undefined | null): string[] => {
if (text === null || text === undefined) {
return [''];
}
return text.replace(/\r/g, '').split('\n');
};
const extractElementBaseline = (element: PdfJsonTextElement): number | null => {
if (!element) {
return null;
}
if (element.textMatrix && element.textMatrix.length >= 6) {
const baseline = element.textMatrix[5];
return typeof baseline === 'number' ? baseline : null;
}
if (typeof element.y === 'number') {
return element.y;
}
return null;
};
const shiftElementsBy = (elements: PdfJsonTextElement[], delta: number): PdfJsonTextElement[] => {
if (delta === 0) {
return elements.map(cloneTextElement);
}
return elements.map((element) => {
const clone = cloneTextElement(element);
if (clone.textMatrix && clone.textMatrix.length >= 6) {
const matrix = [...clone.textMatrix];
matrix[5] = (matrix[5] ?? 0) + delta;
clone.textMatrix = matrix;
}
if (typeof clone.y === 'number') {
clone.y += delta;
} else if (clone.y === null || clone.y === undefined) {
clone.y = delta;
}
return clone;
});
};
const countGraphemes = (text: string): number => {
if (!text) {
return 0;
@ -472,6 +520,123 @@ const createGroup = (
};
};
const groupLinesIntoParagraphs = (
lineGroups: TextGroup[],
metrics?: FontMetricsMap,
): TextGroup[] => {
if (lineGroups.length === 0) {
return [];
}
const paragraphs: TextGroup[][] = [];
let currentParagraph: TextGroup[] = [lineGroups[0]];
for (let i = 1; i < lineGroups.length; i++) {
const prevLine = lineGroups[i - 1];
const currentLine = lineGroups[i];
// Calculate line spacing
const prevBaseline = prevLine.baseline ?? 0;
const currentBaseline = currentLine.baseline ?? 0;
const lineSpacing = Math.abs(prevBaseline - currentBaseline);
// Calculate average font size
const prevFontSize = prevLine.fontSize ?? 12;
const currentFontSize = currentLine.fontSize ?? 12;
const avgFontSize = (prevFontSize + currentFontSize) / 2;
// Check horizontal alignment (left edge)
const prevLeft = prevLine.bounds.left;
const currentLeft = currentLine.bounds.left;
const leftAlignmentTolerance = avgFontSize * 0.3;
const isLeftAligned = Math.abs(prevLeft - currentLeft) <= leftAlignmentTolerance;
// Check if fonts match
const sameFont = prevLine.fontId === currentLine.fontId;
// Check for consistent spacing rather than expected spacing
// Line spacing in PDFs can range from 1.0x to 3.0x font size
// We just want to ensure spacing is consistent between consecutive lines
// and not excessively large (which would indicate a paragraph break)
const maxReasonableSpacing = avgFontSize * 3.0; // Max ~3x font size for normal line spacing
const hasReasonableSpacing = lineSpacing <= maxReasonableSpacing;
// Merge into paragraph if:
// 1. Left aligned
// 2. Same font
// 3. Reasonable line spacing (not a large gap indicating paragraph break)
const shouldMerge = isLeftAligned && sameFont && hasReasonableSpacing;
if (shouldMerge) {
currentParagraph.push(currentLine);
} else {
paragraphs.push(currentParagraph);
currentParagraph = [currentLine];
}
}
// Don't forget the last paragraph
if (currentParagraph.length > 0) {
paragraphs.push(currentParagraph);
}
// Merge line groups into single paragraph groups
return paragraphs.map((lines, paragraphIndex) => {
if (lines.length === 1) {
return lines[0];
}
// Combine all elements from all lines
const allElements = lines.flatMap(line => line.originalElements);
const pageIndex = lines[0].pageIndex;
const lineElementCounts = lines.map((line) => line.originalElements.length);
// Create merged group with newlines between lines
const paragraphText = lines.map(line => line.text).join('\n');
const mergedBounds = mergeBounds(lines.map(line => line.bounds));
const spacingValues: number[] = [];
for (let i = 1; i < lines.length; i++) {
const prevBaseline = lines[i - 1].baseline ?? lines[i - 1].bounds.bottom;
const currentBaseline = lines[i].baseline ?? lines[i].bounds.bottom;
const spacing = Math.abs(prevBaseline - currentBaseline);
if (spacing > 0) {
spacingValues.push(spacing);
}
}
const averageSpacing =
spacingValues.length > 0
? spacingValues.reduce((sum, value) => sum + value, 0) / spacingValues.length
: null;
const firstElement = allElements[0];
const rotation = computeGroupRotation(allElements);
const anchor = rotation !== null ? getAnchorPoint(firstElement) : null;
const baselineLength = computeBaselineLength(allElements, metrics);
const baseline = computeAverageBaseline(allElements);
return {
id: lines[0].id, // Keep the first line's ID
pageIndex,
fontId: firstElement?.fontId,
fontSize: firstElement?.fontSize,
fontMatrixSize: firstElement?.fontMatrixSize,
lineSpacing: averageSpacing,
lineElementCounts: lines.length > 1 ? lineElementCounts : null,
color: firstElement ? extractColor(firstElement) : null,
fontWeight: null,
rotation,
anchor,
baselineLength,
baseline,
elements: allElements.map(cloneTextElement),
originalElements: allElements.map(cloneTextElement),
text: paragraphText,
originalText: paragraphText,
bounds: mergedBounds,
};
});
};
export const groupPageTextElements = (
page: PdfJsonPage | null | undefined,
pageIndex: number,
@ -508,7 +673,7 @@ export const groupPageTextElements = (
});
let groupCounter = 0;
const groups: TextGroup[] = [];
const lineGroups: TextGroup[] = [];
lines.forEach((line) => {
let currentBucket: PdfJsonTextElement[] = [];
@ -527,6 +692,19 @@ export const groupPageTextElements = (
const sameFont = previous.fontId === element.fontId;
let shouldSplit = gap > splitThreshold * (sameFont ? 1.4 : 1.0);
if (shouldSplit) {
const prevBaseline = getBaseline(previous);
const currentBaseline = getBaseline(element);
const baselineDelta = Math.abs(prevBaseline - currentBaseline);
const prevEndX = getX(previous) + getWidth(previous, metrics);
const prevEndY = prevBaseline;
const diagonalGap = Math.hypot(Math.max(0, getX(element) - prevEndX), baselineDelta);
const diagonalThreshold = Math.max(avgFontSize * 0.8, splitThreshold);
if (diagonalGap <= diagonalThreshold) {
shouldSplit = false;
}
}
const previousRotation = extractElementRotation(previous);
const currentRotation = extractElementRotation(element);
if (
@ -539,7 +717,7 @@ export const groupPageTextElements = (
}
if (shouldSplit) {
groups.push(createGroup(pageIndex, groupCounter, currentBucket, metrics));
lineGroups.push(createGroup(pageIndex, groupCounter, currentBucket, metrics));
groupCounter += 1;
currentBucket = [element];
} else {
@ -548,15 +726,17 @@ export const groupPageTextElements = (
});
if (currentBucket.length > 0) {
groups.push(createGroup(pageIndex, groupCounter, currentBucket, metrics));
lineGroups.push(createGroup(pageIndex, groupCounter, currentBucket, metrics));
groupCounter += 1;
}
});
return groups;
return groupLinesIntoParagraphs(lineGroups, metrics);
};
export const groupDocumentText = (document: PdfJsonDocument | null | undefined): TextGroup[][] => {
export const groupDocumentText = (
document: PdfJsonDocument | null | undefined,
): TextGroup[][] => {
const pages = document?.pages ?? [];
const metrics = buildFontMetrics(document);
return pages.map((page, index) => groupPageTextElements(page, index, metrics));
@ -600,7 +780,7 @@ export const pageDimensions = (page: PdfJsonPage | null | undefined): { width: n
export const createMergedElement = (group: TextGroup): PdfJsonTextElement => {
const reference = group.originalElements[0];
const merged = cloneTextElement(reference);
merged.text = group.text;
merged.text = sanitizeParagraphText(group.text);
clearGlyphHints(merged);
if (reference.textMatrix && reference.textMatrix.length === 6) {
merged.textMatrix = [...reference.textMatrix];
@ -613,7 +793,8 @@ const distributeTextAcrossElements = (text: string | undefined, elements: PdfJso
return true;
}
const targetChars = Array.from(text ?? '');
const normalizedText = sanitizeParagraphText(text);
const targetChars = Array.from(normalizedText);
if (targetChars.length === 0) {
elements.forEach((element) => {
element.text = '';
@ -627,10 +808,6 @@ const distributeTextAcrossElements = (text: string | undefined, elements: PdfJso
const graphemeCount = Array.from(originalText).length;
return graphemeCount > 0 ? graphemeCount : 1;
});
const totalCapacity = capacities.reduce((sum, value) => sum + value, 0);
if (targetChars.length > totalCapacity) {
return false;
}
let cursor = 0;
elements.forEach((element, index) => {
@ -640,7 +817,9 @@ const distributeTextAcrossElements = (text: string | undefined, elements: PdfJso
if (index === elements.length - 1) {
sliceLength = remaining;
} else {
sliceLength = Math.min(capacities[index], remaining);
const capacity = Math.max(capacities[index], 1);
const minRemainingForRest = Math.max(elements.length - index - 1, 0);
sliceLength = Math.min(capacity, Math.max(remaining - minRemainingForRest, 1));
}
}
@ -658,6 +837,118 @@ const distributeTextAcrossElements = (text: string | undefined, elements: PdfJso
return true;
};
const sliceElementsByLineCounts = (group: TextGroup): PdfJsonTextElement[][] => {
const counts = group.lineElementCounts;
if (!counts || counts.length === 0) {
if (!group.originalElements.length) {
return [];
}
return [group.originalElements];
}
const result: PdfJsonTextElement[][] = [];
let cursor = 0;
counts.forEach((count) => {
if (count <= 0) {
return;
}
const slice = group.originalElements.slice(cursor, cursor + count);
if (slice.length > 0) {
result.push(slice);
}
cursor += count;
});
return result;
};
const rebuildParagraphLineElements = (group: TextGroup): PdfJsonTextElement[] | null => {
if (!group.text || !group.text.includes('\n')) {
return null;
}
const lineTexts = splitParagraphIntoLines(group.text);
if (lineTexts.length === 0) {
return [];
}
const lineElementGroups = sliceElementsByLineCounts(group);
if (!lineElementGroups.length) {
return null;
}
const lineBaselines = lineElementGroups.map((elements) => {
for (const element of elements) {
const baseline = extractElementBaseline(element);
if (baseline !== null) {
return baseline;
}
}
return group.baseline ?? null;
});
const spacingFromBaselines = (() => {
for (let i = 1; i < lineBaselines.length; i += 1) {
const prev = lineBaselines[i - 1];
const current = lineBaselines[i];
if (prev !== null && current !== null) {
const diff = Math.abs(prev - current);
if (diff > 0) {
return diff;
}
}
}
return null;
})();
const spacing =
(group.lineSpacing && group.lineSpacing > 0
? group.lineSpacing
: spacingFromBaselines) ??
Math.max(group.fontMatrixSize ?? group.fontSize ?? 12, 6) * 1.2;
let direction = -1;
for (let i = 1; i < lineBaselines.length; i += 1) {
const prev = lineBaselines[i - 1];
const current = lineBaselines[i];
if (prev !== null && current !== null && Math.abs(prev - current) > 0.05) {
direction = current < prev ? -1 : 1;
break;
}
}
const templateCount = lineElementGroups.length;
const lastTemplateIndex = Math.max(templateCount - 1, 0);
const rebuilt: PdfJsonTextElement[] = [];
for (let index = 0; index < lineTexts.length; index += 1) {
const templateIndex = Math.min(index, lastTemplateIndex);
const templateElements = lineElementGroups[templateIndex];
if (!templateElements || templateElements.length === 0) {
return null;
}
const shiftSteps = index - templateIndex;
const delta = shiftSteps * spacing * direction;
const clones = shiftElementsBy(templateElements, delta);
const normalizedLine = sanitizeParagraphText(lineTexts[index]);
const distributed = distributeTextAcrossElements(normalizedLine, clones);
if (!distributed) {
const primary = clones[0];
primary.text = normalizedLine;
clearGlyphHints(primary);
for (let i = 1; i < clones.length; i += 1) {
clones[i].text = '';
clearGlyphHints(clones[i]);
}
}
rebuilt.push(...clones);
}
return rebuilt;
};
export const buildUpdatedDocument = (
source: PdfJsonDocument,
groupsByPage: TextGroup[][],
@ -724,11 +1015,17 @@ export const restoreGlyphElements = (
rebuiltElements.push(createMergedElement(group));
return;
}
const paragraphElements = rebuildParagraphLineElements(group);
if (paragraphElements && paragraphElements.length > 0) {
rebuiltElements.push(...paragraphElements);
return;
}
const originalGlyphCount = group.originalElements.reduce(
(sum, element) => sum + countGraphemes(element.text ?? ''),
0,
);
const targetGlyphCount = countGraphemes(group.text);
const normalizedText = sanitizeParagraphText(group.text);
const targetGlyphCount = countGraphemes(normalizedText);
if (targetGlyphCount !== originalGlyphCount) {
rebuiltElements.push(createMergedElement(group));
@ -736,7 +1033,7 @@ export const restoreGlyphElements = (
}
const originals = group.originalElements.map(cloneTextElement);
const distributed = distributeTextAcrossElements(group.text, originals);
const distributed = distributeTextAcrossElements(normalizedText, originals);
if (distributed) {
rebuiltElements.push(...originals);
} else {