mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
paragraphs? :o
This commit is contained in:
parent
5fadb92f51
commit
3ed62c8dbf
@ -1,5 +1,6 @@
|
||||
multipart.enabled=true
|
||||
logging.level.org.springframework=WARN
|
||||
logging.level.org.springframework.security=WARN
|
||||
logging.level.org.hibernate=WARN
|
||||
logging.level.org.eclipse.jetty=WARN
|
||||
#logging.level.org.springframework.security.oauth2=DEBUG
|
||||
|
||||
@ -4498,8 +4498,17 @@
|
||||
"forceSingleElement": {
|
||||
"title": "Lock edited text to a single PDF element",
|
||||
"description": "When enabled, the editor exports each edited text box as one PDF text element to avoid overlapping glyphs or mixed fonts."
|
||||
},
|
||||
"textGroupingMode": {
|
||||
"title": "Text grouping mode",
|
||||
"description": "Paragraph mode merges aligned lines into one textbox; single-line mode keeps every PDF line separate. Auto picks the best option per page."
|
||||
}
|
||||
},
|
||||
"grouping": {
|
||||
"auto": "Auto",
|
||||
"paragraph": "Paragraph",
|
||||
"single": "Single Line"
|
||||
},
|
||||
"disclaimer": {
|
||||
"heading": "Preview limitations",
|
||||
"textFocus": "This workspace focuses on editing text and repositioning embedded images. Complex page artwork, form widgets, and layered graphics are preserved for export but are not fully editable here.",
|
||||
|
||||
@ -36,7 +36,7 @@ function persistRedirectPath(path: string): void {
|
||||
try {
|
||||
document.cookie = `${OAUTH_REDIRECT_COOKIE}=${encodeURIComponent(path)}; path=/; max-age=${OAUTH_REDIRECT_COOKIE_MAX_AGE}; SameSite=Lax`;
|
||||
} catch (error) {
|
||||
console.warn('[SpringAuth] Failed to persist OAuth redirect path', error);
|
||||
// console.warn('[SpringAuth] Failed to persist OAuth redirect path', error);
|
||||
}
|
||||
}
|
||||
|
||||
@ -113,21 +113,21 @@ class SpringAuthClient {
|
||||
const token = localStorage.getItem('stirling_jwt');
|
||||
|
||||
if (!token) {
|
||||
console.debug('[SpringAuth] getSession: No JWT in localStorage');
|
||||
// console.debug('[SpringAuth] getSession: No JWT in localStorage');
|
||||
return { data: { session: null }, error: null };
|
||||
}
|
||||
|
||||
// Verify with backend
|
||||
console.debug('[SpringAuth] getSession: Verifying JWT with /api/v1/auth/me');
|
||||
// console.debug('[SpringAuth] getSession: Verifying JWT with /api/v1/auth/me');
|
||||
const response = await fetch('/api/v1/auth/me', {
|
||||
headers: {
|
||||
'Authorization': `Bearer ${token}`,
|
||||
},
|
||||
});
|
||||
|
||||
console.debug('[SpringAuth] /me response status:', response.status);
|
||||
// console.debug('[SpringAuth] /me response status:', response.status);
|
||||
const contentType = response.headers.get('content-type');
|
||||
console.debug('[SpringAuth] /me content-type:', contentType);
|
||||
// console.debug('[SpringAuth] /me content-type:', contentType);
|
||||
|
||||
if (!response.ok) {
|
||||
// Log the error response for debugging
|
||||
@ -140,7 +140,7 @@ class SpringAuthClient {
|
||||
|
||||
// Token invalid or expired - clear it
|
||||
localStorage.removeItem('stirling_jwt');
|
||||
console.warn('[SpringAuth] getSession: Cleared invalid JWT from localStorage');
|
||||
// console.warn('[SpringAuth] getSession: Cleared invalid JWT from localStorage');
|
||||
return { data: { session: null }, error: { message: `Auth failed: ${response.status}` } };
|
||||
}
|
||||
|
||||
@ -155,7 +155,7 @@ class SpringAuthClient {
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
console.debug('[SpringAuth] /me response data:', data);
|
||||
// console.debug('[SpringAuth] /me response data:', data);
|
||||
|
||||
// Create session object
|
||||
const session: Session = {
|
||||
@ -165,7 +165,7 @@ class SpringAuthClient {
|
||||
expires_at: Date.now() + 3600 * 1000,
|
||||
};
|
||||
|
||||
console.debug('[SpringAuth] getSession: Session retrieved successfully');
|
||||
// console.debug('[SpringAuth] getSession: Session retrieved successfully');
|
||||
return { data: { session }, error: null };
|
||||
} catch (error) {
|
||||
console.error('[SpringAuth] getSession error:', error);
|
||||
@ -206,7 +206,7 @@ class SpringAuthClient {
|
||||
|
||||
// Store JWT in localStorage
|
||||
localStorage.setItem('stirling_jwt', token);
|
||||
console.log('[SpringAuth] JWT stored in localStorage');
|
||||
// console.log('[SpringAuth] JWT stored in localStorage');
|
||||
|
||||
// Dispatch custom event for other components to react to JWT availability
|
||||
window.dispatchEvent(new CustomEvent('jwt-available'));
|
||||
@ -285,7 +285,7 @@ class SpringAuthClient {
|
||||
|
||||
// Redirect to Spring OAuth2 endpoint (Vite will proxy to backend)
|
||||
const redirectUrl = `/oauth2/authorization/${params.provider}`;
|
||||
console.log('[SpringAuth] Redirecting to OAuth:', redirectUrl);
|
||||
// console.log('[SpringAuth] Redirecting to OAuth:', redirectUrl);
|
||||
// Use window.location.assign for full page navigation
|
||||
window.location.assign(redirectUrl);
|
||||
return { error: null };
|
||||
@ -303,7 +303,7 @@ class SpringAuthClient {
|
||||
try {
|
||||
// Clear JWT from localStorage immediately
|
||||
localStorage.removeItem('stirling_jwt');
|
||||
console.log('[SpringAuth] JWT removed from localStorage');
|
||||
// console.log('[SpringAuth] JWT removed from localStorage');
|
||||
|
||||
const csrfToken = this.getCsrfToken();
|
||||
const headers: HeadersInit = {};
|
||||
@ -446,7 +446,7 @@ class SpringAuthClient {
|
||||
|
||||
// Refresh if token expires soon
|
||||
if (timeUntilExpiry > 0 && timeUntilExpiry < this.TOKEN_REFRESH_THRESHOLD) {
|
||||
console.log('[SpringAuth] Proactively refreshing token');
|
||||
// console.log('[SpringAuth] Proactively refreshing token');
|
||||
await this.refreshSession();
|
||||
}
|
||||
}
|
||||
|
||||
@ -245,6 +245,26 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
onForceSingleTextElementChange,
|
||||
} = data;
|
||||
|
||||
const syncEditorValue = useCallback(
|
||||
(element: HTMLElement, pageIndex: number, groupId: string) => {
|
||||
const value = element.innerText.replace(/\u00A0/g, ' ');
|
||||
const offset = getCaretOffset(element);
|
||||
caretOffsetsRef.current.set(groupId, offset);
|
||||
onGroupEdit(pageIndex, groupId, value);
|
||||
requestAnimationFrame(() => {
|
||||
if (editingGroupId !== groupId) {
|
||||
return;
|
||||
}
|
||||
const editor = editorRefs.current.get(groupId);
|
||||
if (editor) {
|
||||
const savedOffset = caretOffsetsRef.current.get(groupId) ?? editor.innerText.length;
|
||||
setCaretOffset(editor, savedOffset);
|
||||
}
|
||||
});
|
||||
},
|
||||
[editingGroupId, onGroupEdit],
|
||||
);
|
||||
|
||||
const resolveFont = (fontId: string | null | undefined, pageIndex: number | null | undefined): PdfJsonFont | null => {
|
||||
if (!fontId || !pdfDocument?.fonts) {
|
||||
return null;
|
||||
@ -646,7 +666,14 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
|
||||
// Measure text widths once per page/configuration and apply static scaling
|
||||
useLayoutEffect(() => {
|
||||
if (!autoScaleText || visibleGroups.length === 0) {
|
||||
if (!autoScaleText) {
|
||||
// Clear all scales when auto-scale is disabled
|
||||
setTextScales(new Map());
|
||||
measurementKeyRef.current = '';
|
||||
return;
|
||||
}
|
||||
|
||||
if (visibleGroups.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -667,6 +694,13 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip multi-line paragraphs - auto-scaling doesn't work well with wrapped text
|
||||
const lineCount = (group.text || '').split('\n').length;
|
||||
if (lineCount > 1) {
|
||||
newScales.set(group.id, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
const element = document.querySelector<HTMLElement>(`[data-text-group="${group.id}"]`);
|
||||
if (!element) {
|
||||
return;
|
||||
@ -705,7 +739,16 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
// Delay measurement to ensure fonts and layout are ready
|
||||
const timer = setTimeout(measureTextScales, 150);
|
||||
return () => clearTimeout(timer);
|
||||
}, [autoScaleText, visibleGroups, editingGroupId, currentPage, pageHeight, scale, fontFamilies.size, selectedPage]);
|
||||
}, [
|
||||
autoScaleText,
|
||||
visibleGroups,
|
||||
editingGroupId,
|
||||
currentPage,
|
||||
pageHeight,
|
||||
scale,
|
||||
fontFamilies.size,
|
||||
selectedPage,
|
||||
]);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
// Only restore caret position during re-renders while already editing
|
||||
@ -792,7 +835,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
}}
|
||||
>
|
||||
{content}
|
||||
{activeGroupId === groupId && editingGroupId !== groupId && (
|
||||
{activeGroupId === groupId && (
|
||||
<ActionIcon
|
||||
size="xs"
|
||||
variant="filled"
|
||||
@ -956,6 +999,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
onChange={(event) => onForceSingleTextElementChange(event.currentTarget.checked)}
|
||||
/>
|
||||
</Group>
|
||||
|
||||
</Stack>
|
||||
</Card>
|
||||
|
||||
@ -1325,11 +1369,24 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
if (fontSizePx > 0) {
|
||||
lineHeightRatio = Math.max(lineHeightRatio, lineHeightPx / fontSizePx);
|
||||
}
|
||||
const detectedSpacingPx =
|
||||
group.lineSpacing && group.lineSpacing > 0 ? group.lineSpacing * scale : undefined;
|
||||
if (detectedSpacingPx && detectedSpacingPx > 0) {
|
||||
lineHeightPx = Math.max(lineHeightPx, detectedSpacingPx);
|
||||
if (fontSizePx > 0) {
|
||||
lineHeightRatio = Math.max(lineHeightRatio, detectedSpacingPx / fontSizePx);
|
||||
}
|
||||
}
|
||||
const lineCount = Math.max(group.text.split('\n').length, 1);
|
||||
const paragraphHeightPx =
|
||||
lineCount > 1
|
||||
? lineHeightPx + (lineCount - 1) * (detectedSpacingPx ?? lineHeightPx)
|
||||
: lineHeightPx;
|
||||
|
||||
let containerLeft = bounds.left;
|
||||
let containerTop = bounds.top;
|
||||
let containerWidth = Math.max(bounds.width, fontSizePx);
|
||||
let containerHeight = Math.max(bounds.height, lineHeightPx);
|
||||
let containerHeight = Math.max(bounds.height, paragraphHeightPx);
|
||||
let transform: string | undefined;
|
||||
let transformOrigin: React.CSSProperties['transformOrigin'];
|
||||
|
||||
@ -1349,7 +1406,13 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
containerTop = anchorTop - containerHeight;
|
||||
}
|
||||
|
||||
if (!hasRotation && group.baseline !== null && group.baseline !== undefined && geometry) {
|
||||
if (
|
||||
lineCount === 1 &&
|
||||
!hasRotation &&
|
||||
group.baseline !== null &&
|
||||
group.baseline !== undefined &&
|
||||
geometry
|
||||
) {
|
||||
const cssBaselineTop = (pageHeight - group.baseline) * scale;
|
||||
containerTop = Math.max(cssBaselineTop - ascentPx, 0);
|
||||
containerHeight = Math.max(containerHeight, ascentPx + descentPx);
|
||||
@ -1364,7 +1427,8 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
left: `${containerLeft}px`,
|
||||
top: `${containerTop}px`,
|
||||
width: `${containerWidth}px`,
|
||||
height: `${containerHeight}px`,
|
||||
height: isEditing ? 'auto' : `${containerHeight}px`,
|
||||
minHeight: `${containerHeight}px`,
|
||||
display: 'flex',
|
||||
alignItems: 'flex-start',
|
||||
justifyContent: 'flex-start',
|
||||
@ -1423,23 +1487,12 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
setEditingGroupId(null);
|
||||
}}
|
||||
onInput={(event) => {
|
||||
const value = event.currentTarget.innerText.replace(/\u00A0/g, ' ');
|
||||
const offset = getCaretOffset(event.currentTarget);
|
||||
caretOffsetsRef.current.set(group.id, offset);
|
||||
onGroupEdit(group.pageIndex, group.id, value);
|
||||
requestAnimationFrame(() => {
|
||||
if (editingGroupId !== group.id) {
|
||||
return;
|
||||
}
|
||||
const editor = editorRefs.current.get(group.id);
|
||||
if (editor) {
|
||||
setCaretOffset(editor, caretOffsetsRef.current.get(group.id) ?? editor.innerText.length);
|
||||
}
|
||||
});
|
||||
syncEditorValue(event.currentTarget, group.pageIndex, group.id);
|
||||
}}
|
||||
style={{
|
||||
width: '100%',
|
||||
height: '100%',
|
||||
minHeight: '100%',
|
||||
height: 'auto',
|
||||
padding: 0,
|
||||
backgroundColor: 'rgba(255,255,255,0.95)',
|
||||
color: textColor,
|
||||
@ -1486,7 +1539,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
color: textColor,
|
||||
display: 'block',
|
||||
cursor: 'text',
|
||||
overflow: 'visible',
|
||||
overflow: 'hidden',
|
||||
}}
|
||||
>
|
||||
<span
|
||||
@ -1496,6 +1549,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
display: 'inline-block',
|
||||
transform: shouldScale ? `scaleX(${textScale})` : undefined,
|
||||
transformOrigin: 'left center',
|
||||
whiteSpace: 'pre',
|
||||
}}
|
||||
>
|
||||
{group.text || '\u00A0'}
|
||||
@ -1503,57 +1557,43 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
</div>,
|
||||
undefined,
|
||||
(event: React.MouseEvent) => {
|
||||
// Double-click to edit
|
||||
if (event.detail === 2) {
|
||||
// Capture click position BEFORE switching to edit mode
|
||||
const clickX = event.clientX;
|
||||
const clickY = event.clientY;
|
||||
const clickX = event.clientX;
|
||||
const clickY = event.clientY;
|
||||
|
||||
setEditingGroupId(group.id);
|
||||
setActiveGroupId(group.id);
|
||||
setActiveGroupId(group.id);
|
||||
setEditingGroupId(group.id);
|
||||
caretOffsetsRef.current.delete(group.id);
|
||||
|
||||
// Clear any stored offset to prevent interference
|
||||
caretOffsetsRef.current.delete(group.id);
|
||||
requestAnimationFrame(() => {
|
||||
const editor = document.querySelector<HTMLElement>(`[data-editor-group="${group.id}"]`);
|
||||
if (!editor) return;
|
||||
editor.focus();
|
||||
|
||||
// Wait for editor to render, then position cursor at click location
|
||||
requestAnimationFrame(() => {
|
||||
const editor = document.querySelector<HTMLElement>(`[data-editor-group="${group.id}"]`);
|
||||
if (!editor) return;
|
||||
|
||||
// Focus the editor first
|
||||
editor.focus();
|
||||
|
||||
// Use caretRangeFromPoint to position cursor at click coordinates
|
||||
setTimeout(() => {
|
||||
if (document.caretRangeFromPoint) {
|
||||
const range = document.caretRangeFromPoint(clickX, clickY);
|
||||
if (range) {
|
||||
const selection = window.getSelection();
|
||||
if (selection) {
|
||||
selection.removeAllRanges();
|
||||
selection.addRange(range);
|
||||
}
|
||||
}
|
||||
} else if ((document as any).caretPositionFromPoint) {
|
||||
// Firefox fallback
|
||||
const pos = (document as any).caretPositionFromPoint(clickX, clickY);
|
||||
if (pos) {
|
||||
const range = document.createRange();
|
||||
range.setStart(pos.offsetNode, pos.offset);
|
||||
range.collapse(true);
|
||||
const selection = window.getSelection();
|
||||
if (selection) {
|
||||
selection.removeAllRanges();
|
||||
selection.addRange(range);
|
||||
}
|
||||
setTimeout(() => {
|
||||
if (document.caretRangeFromPoint) {
|
||||
const range = document.caretRangeFromPoint(clickX, clickY);
|
||||
if (range) {
|
||||
const selection = window.getSelection();
|
||||
if (selection) {
|
||||
selection.removeAllRanges();
|
||||
selection.addRange(range);
|
||||
}
|
||||
}
|
||||
}, 10);
|
||||
});
|
||||
} else {
|
||||
// Single click just selects
|
||||
setActiveGroupId(group.id);
|
||||
}
|
||||
} else if ((document as any).caretPositionFromPoint) {
|
||||
const pos = (document as any).caretPositionFromPoint(clickX, clickY);
|
||||
if (pos) {
|
||||
const range = document.createRange();
|
||||
range.setStart(pos.offsetNode, pos.offset);
|
||||
range.collapse(true);
|
||||
const selection = window.getSelection();
|
||||
if (selection) {
|
||||
selection.removeAllRanges();
|
||||
selection.addRange(range);
|
||||
}
|
||||
}
|
||||
}
|
||||
}, 10);
|
||||
});
|
||||
},
|
||||
)}
|
||||
</Box>
|
||||
|
||||
@ -1028,6 +1028,7 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
selectedPage,
|
||||
forceSingleTextElement,
|
||||
requestPagePreview,
|
||||
setForceSingleTextElement,
|
||||
]);
|
||||
|
||||
const latestViewDataRef = useRef<PdfJsonEditorViewData>(viewData);
|
||||
|
||||
@ -155,6 +155,8 @@ export interface TextGroup {
|
||||
fontId?: string | null;
|
||||
fontSize?: number | null;
|
||||
fontMatrixSize?: number | null;
|
||||
lineSpacing?: number | null;
|
||||
lineElementCounts?: number[] | null;
|
||||
color?: string | null;
|
||||
fontWeight?: number | 'normal' | 'bold' | null;
|
||||
rotation?: number | null;
|
||||
|
||||
@ -24,6 +24,54 @@ type FontMetrics = {
|
||||
|
||||
type FontMetricsMap = Map<string, FontMetrics>;
|
||||
|
||||
const sanitizeParagraphText = (text: string | undefined | null): string => {
|
||||
if (!text) {
|
||||
return '';
|
||||
}
|
||||
return text.replace(/\r?\n/g, '');
|
||||
};
|
||||
|
||||
const splitParagraphIntoLines = (text: string | undefined | null): string[] => {
|
||||
if (text === null || text === undefined) {
|
||||
return [''];
|
||||
}
|
||||
return text.replace(/\r/g, '').split('\n');
|
||||
};
|
||||
|
||||
const extractElementBaseline = (element: PdfJsonTextElement): number | null => {
|
||||
if (!element) {
|
||||
return null;
|
||||
}
|
||||
if (element.textMatrix && element.textMatrix.length >= 6) {
|
||||
const baseline = element.textMatrix[5];
|
||||
return typeof baseline === 'number' ? baseline : null;
|
||||
}
|
||||
if (typeof element.y === 'number') {
|
||||
return element.y;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const shiftElementsBy = (elements: PdfJsonTextElement[], delta: number): PdfJsonTextElement[] => {
|
||||
if (delta === 0) {
|
||||
return elements.map(cloneTextElement);
|
||||
}
|
||||
return elements.map((element) => {
|
||||
const clone = cloneTextElement(element);
|
||||
if (clone.textMatrix && clone.textMatrix.length >= 6) {
|
||||
const matrix = [...clone.textMatrix];
|
||||
matrix[5] = (matrix[5] ?? 0) + delta;
|
||||
clone.textMatrix = matrix;
|
||||
}
|
||||
if (typeof clone.y === 'number') {
|
||||
clone.y += delta;
|
||||
} else if (clone.y === null || clone.y === undefined) {
|
||||
clone.y = delta;
|
||||
}
|
||||
return clone;
|
||||
});
|
||||
};
|
||||
|
||||
const countGraphemes = (text: string): number => {
|
||||
if (!text) {
|
||||
return 0;
|
||||
@ -472,6 +520,123 @@ const createGroup = (
|
||||
};
|
||||
};
|
||||
|
||||
const groupLinesIntoParagraphs = (
|
||||
lineGroups: TextGroup[],
|
||||
metrics?: FontMetricsMap,
|
||||
): TextGroup[] => {
|
||||
if (lineGroups.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const paragraphs: TextGroup[][] = [];
|
||||
let currentParagraph: TextGroup[] = [lineGroups[0]];
|
||||
|
||||
for (let i = 1; i < lineGroups.length; i++) {
|
||||
const prevLine = lineGroups[i - 1];
|
||||
const currentLine = lineGroups[i];
|
||||
|
||||
// Calculate line spacing
|
||||
const prevBaseline = prevLine.baseline ?? 0;
|
||||
const currentBaseline = currentLine.baseline ?? 0;
|
||||
const lineSpacing = Math.abs(prevBaseline - currentBaseline);
|
||||
|
||||
// Calculate average font size
|
||||
const prevFontSize = prevLine.fontSize ?? 12;
|
||||
const currentFontSize = currentLine.fontSize ?? 12;
|
||||
const avgFontSize = (prevFontSize + currentFontSize) / 2;
|
||||
|
||||
// Check horizontal alignment (left edge)
|
||||
const prevLeft = prevLine.bounds.left;
|
||||
const currentLeft = currentLine.bounds.left;
|
||||
const leftAlignmentTolerance = avgFontSize * 0.3;
|
||||
const isLeftAligned = Math.abs(prevLeft - currentLeft) <= leftAlignmentTolerance;
|
||||
|
||||
// Check if fonts match
|
||||
const sameFont = prevLine.fontId === currentLine.fontId;
|
||||
|
||||
// Check for consistent spacing rather than expected spacing
|
||||
// Line spacing in PDFs can range from 1.0x to 3.0x font size
|
||||
// We just want to ensure spacing is consistent between consecutive lines
|
||||
// and not excessively large (which would indicate a paragraph break)
|
||||
const maxReasonableSpacing = avgFontSize * 3.0; // Max ~3x font size for normal line spacing
|
||||
const hasReasonableSpacing = lineSpacing <= maxReasonableSpacing;
|
||||
|
||||
// Merge into paragraph if:
|
||||
// 1. Left aligned
|
||||
// 2. Same font
|
||||
// 3. Reasonable line spacing (not a large gap indicating paragraph break)
|
||||
const shouldMerge = isLeftAligned && sameFont && hasReasonableSpacing;
|
||||
|
||||
if (shouldMerge) {
|
||||
currentParagraph.push(currentLine);
|
||||
} else {
|
||||
paragraphs.push(currentParagraph);
|
||||
currentParagraph = [currentLine];
|
||||
}
|
||||
}
|
||||
|
||||
// Don't forget the last paragraph
|
||||
if (currentParagraph.length > 0) {
|
||||
paragraphs.push(currentParagraph);
|
||||
}
|
||||
|
||||
// Merge line groups into single paragraph groups
|
||||
return paragraphs.map((lines, paragraphIndex) => {
|
||||
if (lines.length === 1) {
|
||||
return lines[0];
|
||||
}
|
||||
|
||||
// Combine all elements from all lines
|
||||
const allElements = lines.flatMap(line => line.originalElements);
|
||||
const pageIndex = lines[0].pageIndex;
|
||||
const lineElementCounts = lines.map((line) => line.originalElements.length);
|
||||
|
||||
// Create merged group with newlines between lines
|
||||
const paragraphText = lines.map(line => line.text).join('\n');
|
||||
const mergedBounds = mergeBounds(lines.map(line => line.bounds));
|
||||
const spacingValues: number[] = [];
|
||||
for (let i = 1; i < lines.length; i++) {
|
||||
const prevBaseline = lines[i - 1].baseline ?? lines[i - 1].bounds.bottom;
|
||||
const currentBaseline = lines[i].baseline ?? lines[i].bounds.bottom;
|
||||
const spacing = Math.abs(prevBaseline - currentBaseline);
|
||||
if (spacing > 0) {
|
||||
spacingValues.push(spacing);
|
||||
}
|
||||
}
|
||||
const averageSpacing =
|
||||
spacingValues.length > 0
|
||||
? spacingValues.reduce((sum, value) => sum + value, 0) / spacingValues.length
|
||||
: null;
|
||||
|
||||
const firstElement = allElements[0];
|
||||
const rotation = computeGroupRotation(allElements);
|
||||
const anchor = rotation !== null ? getAnchorPoint(firstElement) : null;
|
||||
const baselineLength = computeBaselineLength(allElements, metrics);
|
||||
const baseline = computeAverageBaseline(allElements);
|
||||
|
||||
return {
|
||||
id: lines[0].id, // Keep the first line's ID
|
||||
pageIndex,
|
||||
fontId: firstElement?.fontId,
|
||||
fontSize: firstElement?.fontSize,
|
||||
fontMatrixSize: firstElement?.fontMatrixSize,
|
||||
lineSpacing: averageSpacing,
|
||||
lineElementCounts: lines.length > 1 ? lineElementCounts : null,
|
||||
color: firstElement ? extractColor(firstElement) : null,
|
||||
fontWeight: null,
|
||||
rotation,
|
||||
anchor,
|
||||
baselineLength,
|
||||
baseline,
|
||||
elements: allElements.map(cloneTextElement),
|
||||
originalElements: allElements.map(cloneTextElement),
|
||||
text: paragraphText,
|
||||
originalText: paragraphText,
|
||||
bounds: mergedBounds,
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
export const groupPageTextElements = (
|
||||
page: PdfJsonPage | null | undefined,
|
||||
pageIndex: number,
|
||||
@ -508,7 +673,7 @@ export const groupPageTextElements = (
|
||||
});
|
||||
|
||||
let groupCounter = 0;
|
||||
const groups: TextGroup[] = [];
|
||||
const lineGroups: TextGroup[] = [];
|
||||
|
||||
lines.forEach((line) => {
|
||||
let currentBucket: PdfJsonTextElement[] = [];
|
||||
@ -527,6 +692,19 @@ export const groupPageTextElements = (
|
||||
const sameFont = previous.fontId === element.fontId;
|
||||
let shouldSplit = gap > splitThreshold * (sameFont ? 1.4 : 1.0);
|
||||
|
||||
if (shouldSplit) {
|
||||
const prevBaseline = getBaseline(previous);
|
||||
const currentBaseline = getBaseline(element);
|
||||
const baselineDelta = Math.abs(prevBaseline - currentBaseline);
|
||||
const prevEndX = getX(previous) + getWidth(previous, metrics);
|
||||
const prevEndY = prevBaseline;
|
||||
const diagonalGap = Math.hypot(Math.max(0, getX(element) - prevEndX), baselineDelta);
|
||||
const diagonalThreshold = Math.max(avgFontSize * 0.8, splitThreshold);
|
||||
if (diagonalGap <= diagonalThreshold) {
|
||||
shouldSplit = false;
|
||||
}
|
||||
}
|
||||
|
||||
const previousRotation = extractElementRotation(previous);
|
||||
const currentRotation = extractElementRotation(element);
|
||||
if (
|
||||
@ -539,7 +717,7 @@ export const groupPageTextElements = (
|
||||
}
|
||||
|
||||
if (shouldSplit) {
|
||||
groups.push(createGroup(pageIndex, groupCounter, currentBucket, metrics));
|
||||
lineGroups.push(createGroup(pageIndex, groupCounter, currentBucket, metrics));
|
||||
groupCounter += 1;
|
||||
currentBucket = [element];
|
||||
} else {
|
||||
@ -548,15 +726,17 @@ export const groupPageTextElements = (
|
||||
});
|
||||
|
||||
if (currentBucket.length > 0) {
|
||||
groups.push(createGroup(pageIndex, groupCounter, currentBucket, metrics));
|
||||
lineGroups.push(createGroup(pageIndex, groupCounter, currentBucket, metrics));
|
||||
groupCounter += 1;
|
||||
}
|
||||
});
|
||||
|
||||
return groups;
|
||||
return groupLinesIntoParagraphs(lineGroups, metrics);
|
||||
};
|
||||
|
||||
export const groupDocumentText = (document: PdfJsonDocument | null | undefined): TextGroup[][] => {
|
||||
export const groupDocumentText = (
|
||||
document: PdfJsonDocument | null | undefined,
|
||||
): TextGroup[][] => {
|
||||
const pages = document?.pages ?? [];
|
||||
const metrics = buildFontMetrics(document);
|
||||
return pages.map((page, index) => groupPageTextElements(page, index, metrics));
|
||||
@ -600,7 +780,7 @@ export const pageDimensions = (page: PdfJsonPage | null | undefined): { width: n
|
||||
export const createMergedElement = (group: TextGroup): PdfJsonTextElement => {
|
||||
const reference = group.originalElements[0];
|
||||
const merged = cloneTextElement(reference);
|
||||
merged.text = group.text;
|
||||
merged.text = sanitizeParagraphText(group.text);
|
||||
clearGlyphHints(merged);
|
||||
if (reference.textMatrix && reference.textMatrix.length === 6) {
|
||||
merged.textMatrix = [...reference.textMatrix];
|
||||
@ -613,7 +793,8 @@ const distributeTextAcrossElements = (text: string | undefined, elements: PdfJso
|
||||
return true;
|
||||
}
|
||||
|
||||
const targetChars = Array.from(text ?? '');
|
||||
const normalizedText = sanitizeParagraphText(text);
|
||||
const targetChars = Array.from(normalizedText);
|
||||
if (targetChars.length === 0) {
|
||||
elements.forEach((element) => {
|
||||
element.text = '';
|
||||
@ -627,10 +808,6 @@ const distributeTextAcrossElements = (text: string | undefined, elements: PdfJso
|
||||
const graphemeCount = Array.from(originalText).length;
|
||||
return graphemeCount > 0 ? graphemeCount : 1;
|
||||
});
|
||||
const totalCapacity = capacities.reduce((sum, value) => sum + value, 0);
|
||||
if (targetChars.length > totalCapacity) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let cursor = 0;
|
||||
elements.forEach((element, index) => {
|
||||
@ -640,7 +817,9 @@ const distributeTextAcrossElements = (text: string | undefined, elements: PdfJso
|
||||
if (index === elements.length - 1) {
|
||||
sliceLength = remaining;
|
||||
} else {
|
||||
sliceLength = Math.min(capacities[index], remaining);
|
||||
const capacity = Math.max(capacities[index], 1);
|
||||
const minRemainingForRest = Math.max(elements.length - index - 1, 0);
|
||||
sliceLength = Math.min(capacity, Math.max(remaining - minRemainingForRest, 1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -658,6 +837,118 @@ const distributeTextAcrossElements = (text: string | undefined, elements: PdfJso
|
||||
return true;
|
||||
};
|
||||
|
||||
const sliceElementsByLineCounts = (group: TextGroup): PdfJsonTextElement[][] => {
|
||||
const counts = group.lineElementCounts;
|
||||
if (!counts || counts.length === 0) {
|
||||
if (!group.originalElements.length) {
|
||||
return [];
|
||||
}
|
||||
return [group.originalElements];
|
||||
}
|
||||
|
||||
const result: PdfJsonTextElement[][] = [];
|
||||
let cursor = 0;
|
||||
counts.forEach((count) => {
|
||||
if (count <= 0) {
|
||||
return;
|
||||
}
|
||||
const slice = group.originalElements.slice(cursor, cursor + count);
|
||||
if (slice.length > 0) {
|
||||
result.push(slice);
|
||||
}
|
||||
cursor += count;
|
||||
});
|
||||
return result;
|
||||
};
|
||||
|
||||
const rebuildParagraphLineElements = (group: TextGroup): PdfJsonTextElement[] | null => {
|
||||
if (!group.text || !group.text.includes('\n')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const lineTexts = splitParagraphIntoLines(group.text);
|
||||
if (lineTexts.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const lineElementGroups = sliceElementsByLineCounts(group);
|
||||
if (!lineElementGroups.length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const lineBaselines = lineElementGroups.map((elements) => {
|
||||
for (const element of elements) {
|
||||
const baseline = extractElementBaseline(element);
|
||||
if (baseline !== null) {
|
||||
return baseline;
|
||||
}
|
||||
}
|
||||
return group.baseline ?? null;
|
||||
});
|
||||
|
||||
const spacingFromBaselines = (() => {
|
||||
for (let i = 1; i < lineBaselines.length; i += 1) {
|
||||
const prev = lineBaselines[i - 1];
|
||||
const current = lineBaselines[i];
|
||||
if (prev !== null && current !== null) {
|
||||
const diff = Math.abs(prev - current);
|
||||
if (diff > 0) {
|
||||
return diff;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
})();
|
||||
|
||||
const spacing =
|
||||
(group.lineSpacing && group.lineSpacing > 0
|
||||
? group.lineSpacing
|
||||
: spacingFromBaselines) ??
|
||||
Math.max(group.fontMatrixSize ?? group.fontSize ?? 12, 6) * 1.2;
|
||||
|
||||
let direction = -1;
|
||||
for (let i = 1; i < lineBaselines.length; i += 1) {
|
||||
const prev = lineBaselines[i - 1];
|
||||
const current = lineBaselines[i];
|
||||
if (prev !== null && current !== null && Math.abs(prev - current) > 0.05) {
|
||||
direction = current < prev ? -1 : 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const templateCount = lineElementGroups.length;
|
||||
const lastTemplateIndex = Math.max(templateCount - 1, 0);
|
||||
const rebuilt: PdfJsonTextElement[] = [];
|
||||
|
||||
for (let index = 0; index < lineTexts.length; index += 1) {
|
||||
const templateIndex = Math.min(index, lastTemplateIndex);
|
||||
const templateElements = lineElementGroups[templateIndex];
|
||||
if (!templateElements || templateElements.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const shiftSteps = index - templateIndex;
|
||||
const delta = shiftSteps * spacing * direction;
|
||||
const clones = shiftElementsBy(templateElements, delta);
|
||||
const normalizedLine = sanitizeParagraphText(lineTexts[index]);
|
||||
const distributed = distributeTextAcrossElements(normalizedLine, clones);
|
||||
|
||||
if (!distributed) {
|
||||
const primary = clones[0];
|
||||
primary.text = normalizedLine;
|
||||
clearGlyphHints(primary);
|
||||
for (let i = 1; i < clones.length; i += 1) {
|
||||
clones[i].text = '';
|
||||
clearGlyphHints(clones[i]);
|
||||
}
|
||||
}
|
||||
|
||||
rebuilt.push(...clones);
|
||||
}
|
||||
|
||||
return rebuilt;
|
||||
};
|
||||
|
||||
export const buildUpdatedDocument = (
|
||||
source: PdfJsonDocument,
|
||||
groupsByPage: TextGroup[][],
|
||||
@ -724,11 +1015,17 @@ export const restoreGlyphElements = (
|
||||
rebuiltElements.push(createMergedElement(group));
|
||||
return;
|
||||
}
|
||||
const paragraphElements = rebuildParagraphLineElements(group);
|
||||
if (paragraphElements && paragraphElements.length > 0) {
|
||||
rebuiltElements.push(...paragraphElements);
|
||||
return;
|
||||
}
|
||||
const originalGlyphCount = group.originalElements.reduce(
|
||||
(sum, element) => sum + countGraphemes(element.text ?? ''),
|
||||
0,
|
||||
);
|
||||
const targetGlyphCount = countGraphemes(group.text);
|
||||
const normalizedText = sanitizeParagraphText(group.text);
|
||||
const targetGlyphCount = countGraphemes(normalizedText);
|
||||
|
||||
if (targetGlyphCount !== originalGlyphCount) {
|
||||
rebuiltElements.push(createMergedElement(group));
|
||||
@ -736,7 +1033,7 @@ export const restoreGlyphElements = (
|
||||
}
|
||||
|
||||
const originals = group.originalElements.map(cloneTextElement);
|
||||
const distributed = distributeTextAcrossElements(group.text, originals);
|
||||
const distributed = distributeTextAcrossElements(normalizedText, originals);
|
||||
if (distributed) {
|
||||
rebuiltElements.push(...originals);
|
||||
} else {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user