mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
init cool stuff
This commit is contained in:
parent
a58c69016b
commit
8247156f96
@ -34,6 +34,7 @@ import {
|
||||
PdfJsonFont,
|
||||
PdfJsonPage,
|
||||
ConversionProgress,
|
||||
TextGroup,
|
||||
} from '@app/tools/pdfTextEditor/pdfTextEditorTypes';
|
||||
import { getImageBounds, pageDimensions } from '@app/tools/pdfTextEditor/pdfTextEditorUtils';
|
||||
import FontStatusPanel from '@app/components/tools/pdfTextEditor/FontStatusPanel';
|
||||
@ -301,13 +302,12 @@ const analyzePageContentType = (groups: TextGroup[], pageWidth: number): boolean
|
||||
const stdDev = Math.sqrt(variance);
|
||||
const coefficientOfVariation = avgWordsPerGroup > 0 ? stdDev / avgWordsPerGroup : 0;
|
||||
|
||||
// All 4 criteria must pass for paragraph mode
|
||||
const criterion1 = multiLineGroups >= 2 && avgWordsPerGroup > 8;
|
||||
const criterion2 = avgWordsPerGroup > 5;
|
||||
const criterion3 = longTextRatio > 0.4;
|
||||
const criterion4 = coefficientOfVariation > 0.5 || fullWidthRatio > 0.6;
|
||||
// All 3 criteria must pass for paragraph mode
|
||||
const criterion1 = avgWordsPerGroup > 5;
|
||||
const criterion2 = longTextRatio > 0.4;
|
||||
const criterion3 = coefficientOfVariation > 0.5 || fullWidthRatio > 0.6;
|
||||
|
||||
const isParagraphPage = criterion1 && criterion2 && criterion3 && criterion4;
|
||||
const isParagraphPage = criterion1 && criterion2 && criterion3;
|
||||
|
||||
return isParagraphPage;
|
||||
};
|
||||
@ -319,6 +319,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
const [activeGroupId, setActiveGroupId] = useState<string | null>(null);
|
||||
const [editingGroupId, setEditingGroupId] = useState<string | null>(null);
|
||||
const [activeImageId, setActiveImageId] = useState<string | null>(null);
|
||||
const [selectedGroupIds, setSelectedGroupIds] = useState<Set<string>>(new Set());
|
||||
const [widthOverrides, setWidthOverrides] = useState<Map<string, number>>(new Map());
|
||||
const draggingImageRef = useRef<string | null>(null);
|
||||
const rndRefs = useRef<Map<string, any>>(new Map());
|
||||
const pendingDragUpdateRef = useRef<number | null>(null);
|
||||
@ -330,6 +332,15 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
const containerRef = useRef<HTMLDivElement | null>(null);
|
||||
const editorRefs = useRef<Map<string, HTMLDivElement>>(new Map());
|
||||
const caretOffsetsRef = useRef<Map<string, number>>(new Map());
|
||||
const lastSelectedGroupIdRef = useRef<string | null>(null);
|
||||
const widthOverridesRef = useRef<Map<string, number>>(widthOverrides);
|
||||
const resizingRef = useRef<{
|
||||
groupId: string;
|
||||
startX: number;
|
||||
startWidth: number;
|
||||
baseWidth: number;
|
||||
maxWidth: number;
|
||||
} | null>(null);
|
||||
|
||||
const {
|
||||
document: pdfDocument,
|
||||
@ -359,6 +370,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
onGeneratePdf,
|
||||
onForceSingleTextElementChange,
|
||||
onGroupingModeChange,
|
||||
onMergeGroups,
|
||||
onUngroupGroup,
|
||||
} = data;
|
||||
|
||||
const handleModeChangeRequest = useCallback((newMode: GroupingMode) => {
|
||||
@ -382,6 +395,15 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
setPendingModeChange(null);
|
||||
}, []);
|
||||
|
||||
const clearSelection = useCallback(() => {
|
||||
setSelectedGroupIds(new Set());
|
||||
lastSelectedGroupIdRef.current = null;
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
widthOverridesRef.current = widthOverrides;
|
||||
}, [widthOverrides]);
|
||||
|
||||
const resolveFont = (fontId: string | null | undefined, pageIndex: number | null | undefined): PdfJsonFont | null => {
|
||||
if (!fontId || !pdfDocument?.fonts) {
|
||||
return null;
|
||||
@ -548,11 +570,78 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
const pagePreview = pagePreviews.get(selectedPage);
|
||||
const { width: pageWidth, height: pageHeight } = pageDimensions(currentPage);
|
||||
|
||||
useEffect(() => {
|
||||
clearSelection();
|
||||
}, [clearSelection, selectedPage]);
|
||||
|
||||
useEffect(() => {
|
||||
clearSelection();
|
||||
}, [clearSelection, externalGroupingMode]);
|
||||
|
||||
useEffect(() => {
|
||||
setWidthOverrides(new Map());
|
||||
}, [pdfDocument]);
|
||||
|
||||
useEffect(() => {
|
||||
setSelectedGroupIds((prev) => {
|
||||
const filtered = Array.from(prev).filter((id) => pageGroups.some((group) => group.id === id));
|
||||
if (filtered.length === prev.size) {
|
||||
return prev;
|
||||
}
|
||||
return new Set(filtered);
|
||||
});
|
||||
setWidthOverrides((prev) => {
|
||||
const filtered = new Map<string, number>();
|
||||
pageGroups.forEach((group) => {
|
||||
if (prev.has(group.id)) {
|
||||
filtered.set(group.id, prev.get(group.id) ?? 0);
|
||||
}
|
||||
});
|
||||
if (filtered.size === prev.size) {
|
||||
return prev;
|
||||
}
|
||||
return filtered;
|
||||
});
|
||||
}, [pageGroups]);
|
||||
|
||||
// Detect if current page contains paragraph-heavy content
|
||||
const isParagraphPage = useMemo(() => analyzePageContentType(pageGroups, pageWidth), [pageGroups, pageWidth]);
|
||||
const isParagraphPage = useMemo(() => {
|
||||
const result = analyzePageContentType(pageGroups, pageWidth);
|
||||
console.log(`🏷️ Page ${selectedPage} badge: ${result ? 'PARAGRAPH' : 'SPARSE'} (${pageGroups.length} groups)`);
|
||||
return result;
|
||||
}, [pageGroups, pageWidth, selectedPage]);
|
||||
const isParagraphLayout =
|
||||
externalGroupingMode === 'paragraph' || (externalGroupingMode === 'auto' && isParagraphPage);
|
||||
|
||||
const resolveGroupWidth = useCallback(
|
||||
(group: TextGroup): { width: number; base: number; max: number } => {
|
||||
const baseWidth = Math.max(group.bounds.right - group.bounds.left, 1);
|
||||
const maxWidth = Math.max(pageWidth - group.bounds.left, baseWidth);
|
||||
const override = widthOverrides.get(group.id);
|
||||
const resolved = override ? Math.min(Math.max(override, baseWidth), maxWidth) : baseWidth;
|
||||
return { width: resolved, base: baseWidth, max: maxWidth };
|
||||
},
|
||||
[pageWidth, widthOverrides],
|
||||
);
|
||||
|
||||
const selectedGroupIdsArray = useMemo(() => Array.from(selectedGroupIds), [selectedGroupIds]);
|
||||
const selectionIndices = useMemo(() => {
|
||||
return selectedGroupIdsArray
|
||||
.map((id) => pageGroups.findIndex((group) => group.id === id))
|
||||
.filter((index) => index >= 0)
|
||||
.sort((a, b) => a - b);
|
||||
}, [pageGroups, selectedGroupIdsArray]);
|
||||
const canMergeSelection = selectionIndices.length >= 2 && selectionIndices.every((value, idx, array) => idx === 0 || value === array[idx - 1] + 1);
|
||||
const paragraphSelectionIds = useMemo(() =>
|
||||
selectedGroupIdsArray.filter((id) => {
|
||||
const target = pageGroups.find((group) => group.id === id);
|
||||
return target ? (target.childLineGroups?.length ?? 0) > 1 : false;
|
||||
}),
|
||||
[pageGroups, selectedGroupIdsArray]);
|
||||
const canUngroupSelection = paragraphSelectionIds.length > 0;
|
||||
const hasWidthOverrides = selectedGroupIdsArray.some((id) => widthOverrides.has(id));
|
||||
const hasSelection = selectedGroupIdsArray.length > 0;
|
||||
|
||||
const syncEditorValue = useCallback(
|
||||
(
|
||||
element: HTMLElement,
|
||||
@ -581,6 +670,69 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
[editingGroupId, onGroupEdit],
|
||||
);
|
||||
|
||||
const handleMergeSelection = useCallback(() => {
|
||||
if (!canMergeSelection) {
|
||||
return;
|
||||
}
|
||||
const orderedIds = selectionIndices
|
||||
.map((index) => pageGroups[index]?.id)
|
||||
.filter((value): value is string => Boolean(value));
|
||||
if (orderedIds.length < 2) {
|
||||
return;
|
||||
}
|
||||
const merged = onMergeGroups(selectedPage, orderedIds);
|
||||
if (merged) {
|
||||
clearSelection();
|
||||
}
|
||||
}, [canMergeSelection, selectionIndices, pageGroups, onMergeGroups, selectedPage, clearSelection]);
|
||||
|
||||
const handleUngroupSelection = useCallback(() => {
|
||||
if (!canUngroupSelection) {
|
||||
return;
|
||||
}
|
||||
let changed = false;
|
||||
paragraphSelectionIds.forEach((id) => {
|
||||
const result = onUngroupGroup(selectedPage, id);
|
||||
if (result) {
|
||||
changed = true;
|
||||
}
|
||||
});
|
||||
if (changed) {
|
||||
clearSelection();
|
||||
}
|
||||
}, [canUngroupSelection, paragraphSelectionIds, onUngroupGroup, selectedPage, clearSelection]);
|
||||
|
||||
const handleWidthAdjustment = useCallback(
|
||||
(mode: 'expand' | 'reset') => {
|
||||
if (mode === 'expand' && !hasSelection) {
|
||||
return;
|
||||
}
|
||||
if (mode === 'reset' && !hasWidthOverrides) {
|
||||
return;
|
||||
}
|
||||
const selectedGroups = selectedGroupIdsArray
|
||||
.map((id) => pageGroups.find((group) => group.id === id))
|
||||
.filter((group): group is TextGroup => Boolean(group));
|
||||
if (selectedGroups.length === 0) {
|
||||
return;
|
||||
}
|
||||
setWidthOverrides((prev) => {
|
||||
const next = new Map(prev);
|
||||
selectedGroups.forEach((group) => {
|
||||
const baseWidth = Math.max(group.bounds.right - group.bounds.left, 1);
|
||||
const maxWidth = Math.max(pageWidth - group.bounds.left, baseWidth);
|
||||
if (mode === 'expand') {
|
||||
next.set(group.id, maxWidth);
|
||||
} else {
|
||||
next.delete(group.id);
|
||||
}
|
||||
});
|
||||
return next;
|
||||
});
|
||||
},
|
||||
[hasSelection, hasWidthOverrides, selectedGroupIdsArray, pageGroups, pageWidth],
|
||||
);
|
||||
|
||||
const extractPreferredFontId = useCallback((target?: TextGroup | null) => {
|
||||
if (!target) {
|
||||
return undefined;
|
||||
@ -874,7 +1026,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
textSpan.style.transform = 'none';
|
||||
|
||||
const bounds = toCssBounds(currentPage, pageHeight, scale, group.bounds);
|
||||
const containerWidth = bounds.width;
|
||||
const { width: resolvedWidth } = resolveGroupWidth(group);
|
||||
const containerWidth = resolvedWidth * scale;
|
||||
const textWidth = textSpan.getBoundingClientRect().width;
|
||||
|
||||
// Restore original transform
|
||||
@ -907,6 +1060,7 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
fontFamilies.size,
|
||||
selectedPage,
|
||||
isParagraphLayout,
|
||||
resolveGroupWidth,
|
||||
]);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
@ -977,6 +1131,7 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
const handlePageChange = (pageNumber: number) => {
|
||||
setActiveGroupId(null);
|
||||
setEditingGroupId(null);
|
||||
clearSelection();
|
||||
onSelectPage(pageNumber - 1);
|
||||
};
|
||||
|
||||
@ -984,8 +1139,97 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
setEditingGroupId(null);
|
||||
setActiveGroupId(null);
|
||||
setActiveImageId(null);
|
||||
clearSelection();
|
||||
};
|
||||
|
||||
const handleSelectionInteraction = useCallback(
|
||||
(groupId: string, groupIndex: number, event: React.MouseEvent): boolean => {
|
||||
const multiSelect = event.metaKey || event.ctrlKey;
|
||||
const rangeSelect = event.shiftKey && lastSelectedGroupIdRef.current !== null;
|
||||
setSelectedGroupIds((previous) => {
|
||||
if (multiSelect) {
|
||||
const next = new Set(previous);
|
||||
if (next.has(groupId)) {
|
||||
next.delete(groupId);
|
||||
} else {
|
||||
next.add(groupId);
|
||||
}
|
||||
return next;
|
||||
}
|
||||
if (rangeSelect) {
|
||||
const anchorId = lastSelectedGroupIdRef.current;
|
||||
const anchorIndex = anchorId ? pageGroups.findIndex((group) => group.id === anchorId) : -1;
|
||||
if (anchorIndex === -1) {
|
||||
return new Set([groupId]);
|
||||
}
|
||||
const start = Math.min(anchorIndex, groupIndex);
|
||||
const end = Math.max(anchorIndex, groupIndex);
|
||||
const next = new Set<string>();
|
||||
for (let idx = start; idx <= end; idx += 1) {
|
||||
const candidate = pageGroups[idx];
|
||||
if (candidate) {
|
||||
next.add(candidate.id);
|
||||
}
|
||||
}
|
||||
return next;
|
||||
}
|
||||
return new Set([groupId]);
|
||||
});
|
||||
if (!rangeSelect) {
|
||||
lastSelectedGroupIdRef.current = groupId;
|
||||
}
|
||||
return !(multiSelect || rangeSelect);
|
||||
},
|
||||
[pageGroups],
|
||||
);
|
||||
|
||||
const handleResizeStart = useCallback(
|
||||
(event: React.MouseEvent, group: TextGroup, currentWidth: number) => {
|
||||
const baseWidth = Math.max(group.bounds.right - group.bounds.left, 1);
|
||||
const maxWidth = Math.max(pageWidth - group.bounds.left, baseWidth);
|
||||
event.stopPropagation();
|
||||
event.preventDefault();
|
||||
const startX = event.clientX;
|
||||
const handleMouseMove = (moveEvent: MouseEvent) => {
|
||||
const context = resizingRef.current;
|
||||
if (!context) {
|
||||
return;
|
||||
}
|
||||
moveEvent.preventDefault();
|
||||
const deltaPx = moveEvent.clientX - context.startX;
|
||||
const deltaWidth = deltaPx / scale;
|
||||
const nextWidth = Math.min(
|
||||
Math.max(context.startWidth + deltaWidth, context.baseWidth),
|
||||
context.maxWidth,
|
||||
);
|
||||
setWidthOverrides((prev) => {
|
||||
const next = new Map(prev);
|
||||
if (Math.abs(nextWidth - context.baseWidth) <= 0.5) {
|
||||
next.delete(context.groupId);
|
||||
} else {
|
||||
next.set(context.groupId, nextWidth);
|
||||
}
|
||||
return next;
|
||||
});
|
||||
};
|
||||
const handleMouseUp = () => {
|
||||
resizingRef.current = null;
|
||||
window.removeEventListener('mousemove', handleMouseMove);
|
||||
window.removeEventListener('mouseup', handleMouseUp);
|
||||
};
|
||||
resizingRef.current = {
|
||||
groupId: group.id,
|
||||
startX,
|
||||
startWidth: currentWidth,
|
||||
baseWidth,
|
||||
maxWidth,
|
||||
};
|
||||
window.addEventListener('mousemove', handleMouseMove);
|
||||
window.addEventListener('mouseup', handleMouseUp);
|
||||
},
|
||||
[pageWidth, scale],
|
||||
);
|
||||
|
||||
const renderGroupContainer = (
|
||||
groupId: string,
|
||||
pageIndex: number,
|
||||
@ -994,6 +1238,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
content: React.ReactNode,
|
||||
onActivate?: (event: React.MouseEvent) => void,
|
||||
onClick?: (event: React.MouseEvent) => void,
|
||||
isSelected = false,
|
||||
resizeHandle?: React.ReactNode,
|
||||
) => (
|
||||
<Box
|
||||
component="div"
|
||||
@ -1004,12 +1250,20 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
marginTop: '-3px',
|
||||
outline: isActive
|
||||
? '2px solid var(--mantine-color-blue-5)'
|
||||
: isChanged
|
||||
? '1px solid var(--mantine-color-yellow-5)'
|
||||
: 'none',
|
||||
: isSelected
|
||||
? '1px solid var(--mantine-color-violet-5)'
|
||||
: isChanged
|
||||
? '1px solid var(--mantine-color-yellow-5)'
|
||||
: 'none',
|
||||
outlineOffset: '-1px',
|
||||
borderRadius: 6,
|
||||
backgroundColor: isChanged || isActive ? 'rgba(250,255,189,0.28)' : 'transparent',
|
||||
backgroundColor: isActive
|
||||
? 'rgba(184,212,255,0.35)'
|
||||
: isSelected
|
||||
? 'rgba(206,190,255,0.32)'
|
||||
: isChanged
|
||||
? 'rgba(250,255,189,0.28)'
|
||||
: 'transparent',
|
||||
transition: 'outline 120ms ease, background-color 120ms ease',
|
||||
pointerEvents: 'auto',
|
||||
overflow: 'visible',
|
||||
@ -1029,6 +1283,7 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
}}
|
||||
>
|
||||
{content}
|
||||
{resizeHandle}
|
||||
{activeGroupId === groupId && (
|
||||
<ActionIcon
|
||||
size="xs"
|
||||
@ -1201,12 +1456,12 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
{t('pdfTextEditor.options.groupingMode.title', 'Text Grouping Mode')}
|
||||
</Text>
|
||||
{externalGroupingMode === 'auto' && isParagraphPage && (
|
||||
<Badge size="xs" color="blue" variant="light">
|
||||
<Badge size="xs" color="blue" variant="light" key={`para-${selectedPage}`}>
|
||||
{t('pdfTextEditor.pageType.paragraph', 'Paragraph page')}
|
||||
</Badge>
|
||||
)}
|
||||
{externalGroupingMode === 'auto' && !isParagraphPage && hasDocument && (
|
||||
<Badge size="xs" color="gray" variant="light">
|
||||
<Badge size="xs" color="gray" variant="light" key={`sparse-${selectedPage}`}>
|
||||
{t('pdfTextEditor.pageType.sparse', 'Sparse text')}
|
||||
</Badge>
|
||||
)}
|
||||
@ -1239,6 +1494,59 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
/>
|
||||
</Stack>
|
||||
|
||||
<Stack gap="xs">
|
||||
<Group gap={4} align="center">
|
||||
<Text fw={500} size="sm">
|
||||
{t('pdfTextEditor.options.manualGrouping.title', 'Manual Text Grouping')}
|
||||
</Text>
|
||||
<Badge size="xs" color="violet" variant="light">
|
||||
{t('pdfTextEditor.badges.beta', 'Beta')}
|
||||
</Badge>
|
||||
</Group>
|
||||
<Text size="xs" c="dimmed">
|
||||
{t(
|
||||
'pdfTextEditor.options.manualGrouping.description',
|
||||
'Hold Ctrl (Cmd) or Shift while clicking to multi-select text boxes, then merge or ungroup them manually.',
|
||||
)}
|
||||
</Text>
|
||||
<Group grow>
|
||||
<Button
|
||||
size="xs"
|
||||
variant="subtle"
|
||||
disabled={!canMergeSelection}
|
||||
onClick={handleMergeSelection}
|
||||
>
|
||||
{t('pdfTextEditor.manual.merge', 'Merge selection')}
|
||||
</Button>
|
||||
<Button
|
||||
size="xs"
|
||||
variant="subtle"
|
||||
disabled={!canUngroupSelection}
|
||||
onClick={handleUngroupSelection}
|
||||
>
|
||||
{t('pdfTextEditor.manual.ungroup', 'Ungroup selection')}
|
||||
</Button>
|
||||
</Group>
|
||||
<Group grow>
|
||||
<Button
|
||||
size="xs"
|
||||
variant="light"
|
||||
disabled={!hasSelection}
|
||||
onClick={() => handleWidthAdjustment('expand')}
|
||||
>
|
||||
{t('pdfTextEditor.manual.expandWidth', 'Expand to page edge')}
|
||||
</Button>
|
||||
<Button
|
||||
size="xs"
|
||||
variant="light"
|
||||
disabled={!hasWidthOverrides}
|
||||
onClick={() => handleWidthAdjustment('reset')}
|
||||
>
|
||||
{t('pdfTextEditor.manual.resetWidth', 'Reset width')}
|
||||
</Button>
|
||||
</Group>
|
||||
</Stack>
|
||||
|
||||
<Group justify="space-between" align="center">
|
||||
<div>
|
||||
<Text fw={500} size="sm">
|
||||
@ -1615,7 +1923,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
|
||||
let containerLeft = bounds.left;
|
||||
let containerTop = bounds.top;
|
||||
let containerWidth = Math.max(bounds.width, fontSizePx);
|
||||
const { width: resolvedWidth, base: baseWidth, max: maxWidth } = resolveGroupWidth(group);
|
||||
let containerWidth = Math.max(resolvedWidth * scale, fontSizePx);
|
||||
let containerHeight = Math.max(bounds.height, paragraphHeightPx);
|
||||
let transform: string | undefined;
|
||||
let transformOrigin: React.CSSProperties['transformOrigin'];
|
||||
@ -1654,14 +1963,15 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
|
||||
// Determine text wrapping behavior based on whether text has been changed
|
||||
const hasChanges = changed;
|
||||
const shouldWrap = hasChanges && isParagraphLayout;
|
||||
const whiteSpace = shouldWrap ? 'pre-wrap' : 'pre';
|
||||
const wordBreak = shouldWrap ? 'break-word' : 'normal';
|
||||
const overflowWrap = shouldWrap ? 'break-word' : 'normal';
|
||||
const widthExtended = resolvedWidth - baseWidth > 0.5;
|
||||
const enableWrap = isParagraphLayout || widthExtended || isEditing || hasChanges;
|
||||
const whiteSpace = enableWrap ? 'pre-wrap' : 'pre';
|
||||
const wordBreak = enableWrap ? 'break-word' : 'normal';
|
||||
const overflowWrap = enableWrap ? 'break-word' : 'normal';
|
||||
|
||||
// For paragraph mode, allow height to grow to accommodate lines without wrapping
|
||||
// For single-line mode, maintain fixed height based on PDF bounds
|
||||
const useFlexibleHeight = isEditing || shouldWrap || (isParagraphLayout && lineCount > 1);
|
||||
const useFlexibleHeight = isEditing || enableWrap || (isParagraphLayout && lineCount > 1);
|
||||
|
||||
// The renderGroupContainer wrapper adds 4px horizontal padding (2px left + 2px right)
|
||||
// We need to add this to the container width to compensate, so the inner content
|
||||
@ -1685,6 +1995,35 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
transformOrigin,
|
||||
};
|
||||
|
||||
const showResizeHandle = !hasRotation && (selectedGroupIds.has(group.id) || activeGroupId === group.id);
|
||||
const resizeHandle = showResizeHandle ? (
|
||||
<Box
|
||||
role="button"
|
||||
aria-label={t('pdfTextEditor.manual.resizeHandle', 'Adjust text width')}
|
||||
onMouseDown={(event) => handleResizeStart(event, group, resolvedWidth)}
|
||||
style={{
|
||||
position: 'absolute',
|
||||
top: '50%',
|
||||
right: -6,
|
||||
width: 12,
|
||||
height: 32,
|
||||
marginTop: -16,
|
||||
cursor: 'ew-resize',
|
||||
borderRadius: 6,
|
||||
backgroundColor: 'rgba(76, 110, 245, 0.35)',
|
||||
border: '1px solid rgba(76, 110, 245, 0.8)',
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
color: 'white',
|
||||
fontSize: 9,
|
||||
userSelect: 'none',
|
||||
}}
|
||||
>
|
||||
||
|
||||
</Box>
|
||||
) : null;
|
||||
|
||||
if (isEditing) {
|
||||
return (
|
||||
<Box key={group.id} style={containerStyle}>
|
||||
@ -1741,7 +2080,7 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
minHeight: '100%',
|
||||
height: 'auto',
|
||||
padding: 0,
|
||||
backgroundColor: 'rgba(255,255,255,0.95)',
|
||||
backgroundColor: 'rgba(255,255,255,0.95)',
|
||||
color: textColor,
|
||||
fontSize: `${fontSizePx}px`,
|
||||
fontFamily,
|
||||
@ -1750,15 +2089,19 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
outline: 'none',
|
||||
border: 'none',
|
||||
display: 'block',
|
||||
whiteSpace: isParagraphLayout ? 'pre-wrap' : 'pre',
|
||||
wordBreak: isParagraphLayout ? 'break-word' : 'normal',
|
||||
overflowWrap: isParagraphLayout ? 'break-word' : 'normal',
|
||||
whiteSpace,
|
||||
wordBreak,
|
||||
overflowWrap,
|
||||
cursor: 'text',
|
||||
overflow: 'visible',
|
||||
}}
|
||||
>
|
||||
{group.text || '\u00A0'}
|
||||
</div>,
|
||||
undefined,
|
||||
undefined,
|
||||
selectedGroupIds.has(group.id),
|
||||
resizeHandle,
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
@ -1790,14 +2133,14 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
color: textColor,
|
||||
display: 'block',
|
||||
cursor: 'text',
|
||||
overflow: shouldWrap ? 'visible' : 'hidden',
|
||||
overflow: enableWrap ? 'visible' : 'hidden',
|
||||
}}
|
||||
>
|
||||
<span
|
||||
data-text-content
|
||||
style={{
|
||||
pointerEvents: 'none',
|
||||
display: shouldWrap ? 'inline' : 'inline-block',
|
||||
display: enableWrap ? 'inline' : 'inline-block',
|
||||
transform: shouldScale ? `scaleX(${textScale})` : 'none',
|
||||
transformOrigin: 'left center',
|
||||
whiteSpace,
|
||||
@ -1808,6 +2151,13 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
</div>,
|
||||
undefined,
|
||||
(event: React.MouseEvent) => {
|
||||
const shouldActivate = handleSelectionInteraction(group.id, pageGroupIndex, event);
|
||||
if (!shouldActivate) {
|
||||
setActiveGroupId(null);
|
||||
setEditingGroupId(null);
|
||||
return;
|
||||
}
|
||||
|
||||
const clickX = event.clientX;
|
||||
const clickY = event.clientY;
|
||||
|
||||
@ -1815,6 +2165,22 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
setEditingGroupId(group.id);
|
||||
caretOffsetsRef.current.delete(group.id);
|
||||
|
||||
// Log group stats when selected
|
||||
const lines = (group.text ?? '').split('\n');
|
||||
const words = (group.text ?? '').split(/\s+/).filter(w => w.length > 0).length;
|
||||
const chars = (group.text ?? '').length;
|
||||
const width = group.bounds.right - group.bounds.left;
|
||||
const height = group.bounds.bottom - group.bounds.top;
|
||||
const isMultiLine = lines.length > 1;
|
||||
console.log(`📝 Selected Text Group "${group.id}":`);
|
||||
console.log(` Lines: ${lines.length}, Words: ${words}, Chars: ${chars}`);
|
||||
console.log(` Dimensions: ${width.toFixed(1)}pt × ${height.toFixed(1)}pt`);
|
||||
console.log(` Type: ${isMultiLine ? 'MULTI-LINE (paragraph)' : 'SINGLE-LINE'}`);
|
||||
console.log(` Text preview: "${(group.text ?? '').substring(0, 80)}${(group.text ?? '').length > 80 ? '...' : ''}"`);
|
||||
if (isMultiLine) {
|
||||
console.log(` Line spacing: ${group.lineSpacing?.toFixed(1) ?? 'unknown'}pt`);
|
||||
}
|
||||
|
||||
requestAnimationFrame(() => {
|
||||
const editor = document.querySelector<HTMLElement>(`[data-editor-group="${group.id}"]`);
|
||||
if (!editor) return;
|
||||
@ -1846,6 +2212,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => {
|
||||
}, 10);
|
||||
});
|
||||
},
|
||||
selectedGroupIds.has(group.id),
|
||||
resizeHandle,
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
|
||||
@ -18,6 +18,7 @@ import {
|
||||
PdfJsonPage,
|
||||
TextGroup,
|
||||
PdfTextEditorViewData,
|
||||
BoundingBox,
|
||||
} from './pdfTextEditorTypes';
|
||||
import {
|
||||
deepCloneDocument,
|
||||
@ -26,6 +27,7 @@ import {
|
||||
restoreGlyphElements,
|
||||
extractDocumentImages,
|
||||
cloneImageElement,
|
||||
cloneTextElement,
|
||||
valueOr,
|
||||
} from './pdfTextEditorUtils';
|
||||
import PdfTextEditorView from '@app/components/tools/pdfTextEditor/PdfTextEditorView';
|
||||
@ -52,6 +54,148 @@ const getAutoLoadKey = (file: File): string => {
|
||||
return `${file.name}|${file.size}|${file.lastModified}`;
|
||||
};
|
||||
|
||||
const normalizeLineArray = (value: string | undefined | null, expected: number): string[] => {
|
||||
const normalized = (value ?? '').replace(/\r/g, '');
|
||||
if (expected <= 0) {
|
||||
return [normalized];
|
||||
}
|
||||
const parts = normalized.split('\n');
|
||||
if (parts.length === expected) {
|
||||
return parts;
|
||||
}
|
||||
if (parts.length < expected) {
|
||||
return parts.concat(Array(expected - parts.length).fill(''));
|
||||
}
|
||||
const head = parts.slice(0, Math.max(expected - 1, 0));
|
||||
const tail = parts.slice(Math.max(expected - 1, 0)).join('\n');
|
||||
return [...head, tail];
|
||||
};
|
||||
|
||||
const cloneLineTemplate = (line: TextGroup, text?: string, originalText?: string): TextGroup => ({
|
||||
...line,
|
||||
text: text ?? line.text,
|
||||
originalText: originalText ?? line.originalText,
|
||||
childLineGroups: null,
|
||||
lineElementCounts: null,
|
||||
lineSpacing: null,
|
||||
elements: line.elements.map(cloneTextElement),
|
||||
originalElements: line.originalElements.map(cloneTextElement),
|
||||
});
|
||||
|
||||
const expandGroupToLines = (group: TextGroup): TextGroup[] => {
|
||||
if (group.childLineGroups && group.childLineGroups.length > 0) {
|
||||
const textLines = normalizeLineArray(group.text, group.childLineGroups.length);
|
||||
const originalLines = normalizeLineArray(group.originalText, group.childLineGroups.length);
|
||||
return group.childLineGroups.map((child, index) =>
|
||||
cloneLineTemplate(child, textLines[index], originalLines[index]),
|
||||
);
|
||||
}
|
||||
return [cloneLineTemplate(group)];
|
||||
};
|
||||
|
||||
const mergeBoundingBoxes = (boxes: BoundingBox[]): BoundingBox => {
|
||||
if (boxes.length === 0) {
|
||||
return { left: 0, right: 0, top: 0, bottom: 0 };
|
||||
}
|
||||
return boxes.reduce(
|
||||
(acc, box) => ({
|
||||
left: Math.min(acc.left, box.left),
|
||||
right: Math.max(acc.right, box.right),
|
||||
top: Math.min(acc.top, box.top),
|
||||
bottom: Math.max(acc.bottom, box.bottom),
|
||||
}),
|
||||
{ ...boxes[0] },
|
||||
);
|
||||
};
|
||||
|
||||
const buildMergedGroupFromSelection = (groups: TextGroup[]): TextGroup | null => {
|
||||
if (groups.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const lineTemplates = groups.flatMap(expandGroupToLines);
|
||||
if (lineTemplates.length <= 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const lineTexts = lineTemplates.map((line) => line.text ?? '');
|
||||
const lineOriginalTexts = lineTemplates.map((line) => line.originalText ?? '');
|
||||
const combinedOriginals = lineTemplates.flatMap((line) => line.originalElements.map(cloneTextElement));
|
||||
const combinedElements = combinedOriginals.map(cloneTextElement);
|
||||
const mergedBounds = mergeBoundingBoxes(lineTemplates.map((line) => line.bounds));
|
||||
|
||||
const spacingValues: number[] = [];
|
||||
for (let index = 1; index < lineTemplates.length; index += 1) {
|
||||
const prevBaseline = lineTemplates[index - 1].baseline ?? lineTemplates[index - 1].bounds.bottom;
|
||||
const currentBaseline = lineTemplates[index].baseline ?? lineTemplates[index].bounds.bottom;
|
||||
const spacing = Math.abs(prevBaseline - currentBaseline);
|
||||
if (spacing > 0) {
|
||||
spacingValues.push(spacing);
|
||||
}
|
||||
}
|
||||
const averageSpacing =
|
||||
spacingValues.length > 0
|
||||
? spacingValues.reduce((sum, value) => sum + value, 0) / spacingValues.length
|
||||
: null;
|
||||
|
||||
const first = groups[0];
|
||||
const lineElementCounts = lineTemplates.map((line) => Math.max(line.originalElements.length, 1));
|
||||
const paragraph: TextGroup = {
|
||||
...first,
|
||||
text: lineTexts.join('\n'),
|
||||
originalText: lineOriginalTexts.join('\n'),
|
||||
elements: combinedElements,
|
||||
originalElements: combinedOriginals,
|
||||
bounds: mergedBounds,
|
||||
lineSpacing: averageSpacing,
|
||||
lineElementCounts: lineElementCounts.length > 1 ? lineElementCounts : null,
|
||||
childLineGroups: lineTemplates.map((line, index) =>
|
||||
cloneLineTemplate(line, lineTexts[index], lineOriginalTexts[index]),
|
||||
),
|
||||
};
|
||||
|
||||
return paragraph;
|
||||
};
|
||||
|
||||
const splitParagraphGroup = (group: TextGroup): TextGroup[] => {
|
||||
if (!group.childLineGroups || group.childLineGroups.length <= 1) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const templateLines = group.childLineGroups.map((child) => cloneLineTemplate(child));
|
||||
const lineCount = templateLines.length;
|
||||
const textLines = normalizeLineArray(group.text, lineCount);
|
||||
const originalLines = normalizeLineArray(group.originalText, lineCount);
|
||||
const baseCounts =
|
||||
group.lineElementCounts && group.lineElementCounts.length === lineCount
|
||||
? [...group.lineElementCounts]
|
||||
: templateLines.map((line) => Math.max(line.originalElements.length, 1));
|
||||
|
||||
const totalOriginals = group.originalElements.length;
|
||||
const counted = baseCounts.reduce((sum, count) => sum + count, 0);
|
||||
if (counted < totalOriginals && baseCounts.length > 0) {
|
||||
baseCounts[baseCounts.length - 1] += totalOriginals - counted;
|
||||
}
|
||||
|
||||
let offset = 0;
|
||||
return templateLines.map((template, index) => {
|
||||
const take = Math.max(1, baseCounts[index] ?? 1);
|
||||
const slice = group.originalElements.slice(offset, offset + take).map(cloneTextElement);
|
||||
offset += take;
|
||||
return {
|
||||
...template,
|
||||
id: `${group.id}-line-${index + 1}-${Date.now()}-${index}`,
|
||||
text: textLines[index] ?? '',
|
||||
originalText: originalLines[index] ?? '',
|
||||
elements: slice.map(cloneTextElement),
|
||||
originalElements: slice,
|
||||
lineElementCounts: null,
|
||||
lineSpacing: null,
|
||||
childLineGroups: null,
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
const { t } = useTranslation();
|
||||
const {
|
||||
@ -609,6 +753,73 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
});
|
||||
}, []);
|
||||
|
||||
const handleMergeGroups = useCallback((pageIndex: number, groupIds: string[]): boolean => {
|
||||
if (groupIds.length < 2) {
|
||||
return false;
|
||||
}
|
||||
let updated = false;
|
||||
setGroupsByPage((previous) =>
|
||||
previous.map((groups, idx) => {
|
||||
if (idx !== pageIndex) {
|
||||
return groups;
|
||||
}
|
||||
const indices = groupIds
|
||||
.map((id) => groups.findIndex((group) => group.id === id))
|
||||
.filter((index) => index >= 0);
|
||||
if (indices.length !== groupIds.length) {
|
||||
return groups;
|
||||
}
|
||||
const sorted = [...indices].sort((a, b) => a - b);
|
||||
for (let i = 1; i < sorted.length; i += 1) {
|
||||
if (sorted[i] !== sorted[i - 1] + 1) {
|
||||
return groups;
|
||||
}
|
||||
}
|
||||
const selection = sorted.map((position) => groups[position]);
|
||||
const merged = buildMergedGroupFromSelection(selection);
|
||||
if (!merged) {
|
||||
return groups;
|
||||
}
|
||||
const next = [
|
||||
...groups.slice(0, sorted[0]),
|
||||
merged,
|
||||
...groups.slice(sorted[sorted.length - 1] + 1),
|
||||
];
|
||||
updated = true;
|
||||
return next;
|
||||
}),
|
||||
);
|
||||
return updated;
|
||||
}, []);
|
||||
|
||||
const handleUngroupGroup = useCallback((pageIndex: number, groupId: string): boolean => {
|
||||
let updated = false;
|
||||
setGroupsByPage((previous) =>
|
||||
previous.map((groups, idx) => {
|
||||
if (idx !== pageIndex) {
|
||||
return groups;
|
||||
}
|
||||
const targetIndex = groups.findIndex((group) => group.id === groupId);
|
||||
if (targetIndex < 0) {
|
||||
return groups;
|
||||
}
|
||||
const targetGroup = groups[targetIndex];
|
||||
const splits = splitParagraphGroup(targetGroup);
|
||||
if (splits.length <= 1) {
|
||||
return groups;
|
||||
}
|
||||
const next = [
|
||||
...groups.slice(0, targetIndex),
|
||||
...splits,
|
||||
...groups.slice(targetIndex + 1),
|
||||
];
|
||||
updated = true;
|
||||
return next;
|
||||
}),
|
||||
);
|
||||
return updated;
|
||||
}, []);
|
||||
|
||||
const handleImageTransform = useCallback(
|
||||
(
|
||||
pageIndex: number,
|
||||
@ -1064,7 +1275,11 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
onGeneratePdf: handleGeneratePdf,
|
||||
onForceSingleTextElementChange: setForceSingleTextElement,
|
||||
onGroupingModeChange: setGroupingMode,
|
||||
onMergeGroups: handleMergeGroups,
|
||||
onUngroupGroup: handleUngroupGroup,
|
||||
}), [
|
||||
handleMergeGroups,
|
||||
handleUngroupGroup,
|
||||
handleImageTransform,
|
||||
imagesByPage,
|
||||
pagePreviews,
|
||||
|
||||
@ -168,6 +168,7 @@ export interface TextGroup {
|
||||
text: string;
|
||||
originalText: string;
|
||||
bounds: BoundingBox;
|
||||
childLineGroups?: TextGroup[] | null;
|
||||
}
|
||||
|
||||
export const DEFAULT_PAGE_WIDTH = 612;
|
||||
@ -219,4 +220,6 @@ export interface PdfTextEditorViewData {
|
||||
onGeneratePdf: () => void;
|
||||
onForceSingleTextElementChange: (value: boolean) => void;
|
||||
onGroupingModeChange: (value: 'auto' | 'paragraph' | 'singleLine') => void;
|
||||
onMergeGroups: (pageIndex: number, groupIds: string[]) => boolean;
|
||||
onUngroupGroup: (pageIndex: number, groupId: string) => boolean;
|
||||
}
|
||||
|
||||
@ -520,8 +520,18 @@ const createGroup = (
|
||||
};
|
||||
};
|
||||
|
||||
const cloneLineTemplate = (line: TextGroup): TextGroup => ({
|
||||
...line,
|
||||
childLineGroups: null,
|
||||
lineElementCounts: null,
|
||||
lineSpacing: null,
|
||||
elements: line.elements.map(cloneTextElement),
|
||||
originalElements: line.originalElements.map(cloneTextElement),
|
||||
});
|
||||
|
||||
const groupLinesIntoParagraphs = (
|
||||
lineGroups: TextGroup[],
|
||||
pageWidth: number,
|
||||
metrics?: FontMetricsMap,
|
||||
): TextGroup[] => {
|
||||
if (lineGroups.length === 0) {
|
||||
@ -530,6 +540,8 @@ const groupLinesIntoParagraphs = (
|
||||
|
||||
const paragraphs: TextGroup[][] = [];
|
||||
let currentParagraph: TextGroup[] = [lineGroups[0]];
|
||||
const bulletFlags = new Map<string, boolean>();
|
||||
bulletFlags.set(lineGroups[0].id, false);
|
||||
|
||||
for (let i = 1; i < lineGroups.length; i++) {
|
||||
const prevLine = lineGroups[i - 1];
|
||||
@ -561,11 +573,85 @@ const groupLinesIntoParagraphs = (
|
||||
const maxReasonableSpacing = avgFontSize * 3.0; // Max ~3x font size for normal line spacing
|
||||
const hasReasonableSpacing = lineSpacing <= maxReasonableSpacing;
|
||||
|
||||
// Check if current line looks like a bullet/list item
|
||||
const prevRight = prevLine.bounds.right;
|
||||
const currentRight = currentLine.bounds.right;
|
||||
const prevWidth = prevRight - prevLeft;
|
||||
const currentWidth = currentRight - currentLeft;
|
||||
|
||||
// Count word count to help identify bullets (typically short)
|
||||
const prevWords = (prevLine.text ?? '').split(/\s+/).filter(w => w.length > 0).length;
|
||||
const currentWords = (currentLine.text ?? '').split(/\s+/).filter(w => w.length > 0).length;
|
||||
const prevText = (prevLine.text ?? '').trim();
|
||||
const currentText = (currentLine.text ?? '').trim();
|
||||
|
||||
// Bullet detection - look for bullet markers or very short lines
|
||||
const bulletMarkerRegex = /^[\u2022\u2023\u25E6\u2043\u2219•·◦‣⁃\-\*]\s|^\d+[\.\)]\s|^[a-z][\.\)]\s/i;
|
||||
const prevHasBulletMarker = bulletMarkerRegex.test(prevText);
|
||||
const currentHasBulletMarker = bulletMarkerRegex.test(currentText);
|
||||
|
||||
// True bullets are:
|
||||
// 1. Have bullet markers/numbers OR
|
||||
// 2. Very short (< 10 words) AND much narrower than average (< 60% of page width)
|
||||
const headingKeywords = ['action items', 'next steps', 'notes', 'logistics', 'tasks'];
|
||||
const normalizedPageWidth = pageWidth > 0 ? pageWidth : avgFontSize * 70;
|
||||
const maxReferenceWidth = normalizedPageWidth > 0 ? normalizedPageWidth : avgFontSize * 70;
|
||||
const indentDelta = currentLeft - prevLeft;
|
||||
const indentThreshold = Math.max(avgFontSize * 0.6, 8);
|
||||
const hasIndent = indentDelta > indentThreshold;
|
||||
const currentWidthRatio = maxReferenceWidth > 0 ? currentWidth / maxReferenceWidth : 0;
|
||||
const prevWidthRatio = maxReferenceWidth > 0 ? prevWidth / maxReferenceWidth : 0;
|
||||
const prevLooksLikeHeading =
|
||||
prevText.endsWith(':') ||
|
||||
(prevWords <= 4 && prevWidthRatio < 0.4) ||
|
||||
headingKeywords.some((keyword) => prevText.toLowerCase().includes(keyword));
|
||||
|
||||
const wrapCandidate =
|
||||
!currentHasBulletMarker &&
|
||||
!hasIndent &&
|
||||
!prevLooksLikeHeading &&
|
||||
currentWords <= 12 &&
|
||||
currentWidthRatio < 0.45 &&
|
||||
Math.abs(prevLeft - currentLeft) <= leftAlignmentTolerance &&
|
||||
currentWidth < prevWidth * 0.85;
|
||||
|
||||
const currentIsBullet = wrapCandidate
|
||||
? false
|
||||
: currentHasBulletMarker ||
|
||||
(hasIndent && (currentWords <= 14 || currentWidthRatio <= 0.65)) ||
|
||||
(prevLooksLikeHeading && (currentWords <= 16 || currentWidthRatio <= 0.8 || prevWidthRatio < 0.35)) ||
|
||||
(currentWords <= 8 && currentWidthRatio <= 0.45 && prevWidth - currentWidth > avgFontSize * 4);
|
||||
|
||||
const prevIsBullet = bulletFlags.get(prevLine.id) ?? prevHasBulletMarker;
|
||||
bulletFlags.set(currentLine.id, currentIsBullet);
|
||||
|
||||
// Detect paragraph→bullet transition
|
||||
const likelyBulletStart = !prevIsBullet && currentIsBullet;
|
||||
|
||||
// Don't merge two consecutive bullets
|
||||
const bothAreBullets = prevIsBullet && currentIsBullet;
|
||||
|
||||
// Merge into paragraph if:
|
||||
// 1. Left aligned
|
||||
// 2. Same font
|
||||
// 3. Reasonable line spacing (not a large gap indicating paragraph break)
|
||||
const shouldMerge = isLeftAligned && sameFont && hasReasonableSpacing;
|
||||
// 3. Reasonable line spacing
|
||||
// 4. NOT transitioning to bullets
|
||||
// 5. NOT both are bullets
|
||||
const shouldMerge =
|
||||
isLeftAligned &&
|
||||
sameFont &&
|
||||
hasReasonableSpacing &&
|
||||
!likelyBulletStart &&
|
||||
!bothAreBullets &&
|
||||
!currentIsBullet;
|
||||
|
||||
if (i < 10 || likelyBulletStart || bothAreBullets || !shouldMerge) {
|
||||
console.log(` Line ${i}:`);
|
||||
console.log(` prev: "${prevText.substring(0, 40)}" (${prevWords}w, ${prevWidth.toFixed(0)}pt, marker:${prevHasBulletMarker}, bullet:${prevIsBullet})`);
|
||||
console.log(` curr: "${currentText.substring(0, 40)}" (${currentWords}w, ${currentWidth.toFixed(0)}pt, marker:${currentHasBulletMarker}, bullet:${currentIsBullet})`);
|
||||
console.log(` checks: leftAlign:${isLeftAligned} (${Math.abs(prevLeft - currentLeft).toFixed(1)}pt), sameFont:${sameFont}, spacing:${hasReasonableSpacing} (${lineSpacing.toFixed(1)}pt/${maxReasonableSpacing.toFixed(1)}pt)`);
|
||||
console.log(` decision: merge=${shouldMerge} (bulletStart:${likelyBulletStart}, bothBullets:${bothAreBullets})`);
|
||||
}
|
||||
|
||||
if (shouldMerge) {
|
||||
currentParagraph.push(currentLine);
|
||||
@ -587,17 +673,24 @@ const groupLinesIntoParagraphs = (
|
||||
}
|
||||
|
||||
// Combine all elements from all lines
|
||||
const allElements = lines.flatMap(line => line.originalElements);
|
||||
const lineTemplates = lines.map(line => cloneLineTemplate(line));
|
||||
const flattenedLineTemplates = lineTemplates.flatMap((line) =>
|
||||
line.childLineGroups && line.childLineGroups.length > 0
|
||||
? line.childLineGroups
|
||||
: [line],
|
||||
);
|
||||
const allLines = flattenedLineTemplates.length > 0 ? flattenedLineTemplates : lineTemplates;
|
||||
const allElements = allLines.flatMap(line => line.originalElements);
|
||||
const pageIndex = lines[0].pageIndex;
|
||||
const lineElementCounts = lines.map((line) => line.originalElements.length);
|
||||
const lineElementCounts = allLines.map((line) => line.originalElements.length);
|
||||
|
||||
// Create merged group with newlines between lines
|
||||
const paragraphText = lines.map(line => line.text).join('\n');
|
||||
const mergedBounds = mergeBounds(lines.map(line => line.bounds));
|
||||
const paragraphText = allLines.map(line => line.text).join('\n');
|
||||
const mergedBounds = mergeBounds(allLines.map(line => line.bounds));
|
||||
const spacingValues: number[] = [];
|
||||
for (let i = 1; i < lines.length; i++) {
|
||||
const prevBaseline = lines[i - 1].baseline ?? lines[i - 1].bounds.bottom;
|
||||
const currentBaseline = lines[i].baseline ?? lines[i].bounds.bottom;
|
||||
for (let i = 1; i < allLines.length; i++) {
|
||||
const prevBaseline = allLines[i - 1].baseline ?? allLines[i - 1].bounds.bottom;
|
||||
const currentBaseline = allLines[i].baseline ?? allLines[i].bounds.bottom;
|
||||
const spacing = Math.abs(prevBaseline - currentBaseline);
|
||||
if (spacing > 0) {
|
||||
spacingValues.push(spacing);
|
||||
@ -633,6 +726,7 @@ const groupLinesIntoParagraphs = (
|
||||
text: paragraphText,
|
||||
originalText: paragraphText,
|
||||
bounds: mergedBounds,
|
||||
childLineGroups: allLines,
|
||||
};
|
||||
});
|
||||
};
|
||||
@ -742,7 +836,7 @@ export const groupPageTextElements = (
|
||||
|
||||
if (groupingMode === 'paragraph') {
|
||||
// Paragraph mode: always apply grouping
|
||||
return groupLinesIntoParagraphs(lineGroups, metrics);
|
||||
return groupLinesIntoParagraphs(lineGroups, pageWidth, metrics);
|
||||
}
|
||||
|
||||
// Auto mode: use heuristic to determine if we should group
|
||||
@ -801,12 +895,11 @@ export const groupPageTextElements = (
|
||||
const coefficientOfVariation = avgWordsPerGroup > 0 ? stdDev / avgWordsPerGroup : 0;
|
||||
|
||||
// Check each criterion
|
||||
const criterion1 = multiLineGroups >= 2 && avgWordsPerGroup > 8;
|
||||
const criterion2 = avgWordsPerGroup > 5;
|
||||
const criterion3 = longTextRatio > 0.4;
|
||||
const criterion4 = coefficientOfVariation > 0.5 || fullWidthRatio > 0.6; // High variance OR many full-width lines = paragraph text
|
||||
const criterion1 = avgWordsPerGroup > 5;
|
||||
const criterion2 = longTextRatio > 0.4;
|
||||
const criterion3 = coefficientOfVariation > 0.5 || fullWidthRatio > 0.6; // High variance OR many full-width lines = paragraph text
|
||||
|
||||
const isParagraphPage = criterion1 && criterion2 && criterion3 && criterion4;
|
||||
const isParagraphPage = criterion1 && criterion2 && criterion3;
|
||||
|
||||
// Log detection stats
|
||||
console.log(`📄 Page ${pageIndex} Grouping Analysis (mode: ${groupingMode}):`);
|
||||
@ -823,24 +916,21 @@ export const groupPageTextElements = (
|
||||
console.log(` • Std deviation: ${stdDev.toFixed(2)}`);
|
||||
console.log(` • Coefficient of variation: ${coefficientOfVariation.toFixed(2)}`);
|
||||
console.log(` Criteria:`);
|
||||
console.log(` 1. Multi-line + Avg Words: ${criterion1 ? '✅ PASS' : '❌ FAIL'}`);
|
||||
console.log(` (${multiLineGroups} >= 2 AND ${avgWordsPerGroup.toFixed(2)} > 8)`);
|
||||
console.log(` 2. Avg Words Only: ${criterion2 ? '✅ PASS' : '❌ FAIL'}`);
|
||||
console.log(` 1. Avg Words Per Group: ${criterion1 ? '✅ PASS' : '❌ FAIL'}`);
|
||||
console.log(` (${avgWordsPerGroup.toFixed(2)} > 5)`);
|
||||
console.log(` 3. Long Text Ratio: ${criterion3 ? '✅ PASS' : '❌ FAIL'}`);
|
||||
console.log(` 2. Long Text Ratio: ${criterion2 ? '✅ PASS' : '❌ FAIL'}`);
|
||||
console.log(` (${(longTextRatio * 100).toFixed(1)}% > 40%)`);
|
||||
console.log(` 4. Line Width Pattern: ${criterion4 ? '✅ PASS' : '❌ FAIL'}`);
|
||||
console.log(` 3. Line Width Pattern: ${criterion3 ? '✅ PASS' : '❌ FAIL'}`);
|
||||
console.log(` (CV ${coefficientOfVariation.toFixed(2)} > 0.5 OR ${(fullWidthRatio * 100).toFixed(1)}% > 60%)`);
|
||||
console.log(` ${coefficientOfVariation > 0.5 ? '✓ High variance (varying line lengths)' : '✗ Low variance'} ${fullWidthRatio > 0.6 ? '✓ Many full-width lines (paragraph-like)' : '✗ Few full-width lines (list-like)'}`);
|
||||
console.log(` Decision: ${isParagraphPage ? '📝 PARAGRAPH MODE' : '📋 LINE MODE'}`);
|
||||
if (isParagraphPage) {
|
||||
console.log(` Reason: All criteria passed (AND logic)`);
|
||||
console.log(` Reason: All three criteria passed (AND logic)`);
|
||||
} else {
|
||||
const failedReasons = [];
|
||||
if (!criterion1) failedReasons.push('insufficient multi-line groups or word density');
|
||||
if (!criterion2) failedReasons.push('low average words per group');
|
||||
if (!criterion3) failedReasons.push('low ratio of long text groups');
|
||||
if (!criterion4) failedReasons.push('low variance and few full-width lines (list-like structure)');
|
||||
if (!criterion1) failedReasons.push('low average words per group');
|
||||
if (!criterion2) failedReasons.push('low ratio of long text groups');
|
||||
if (!criterion3) failedReasons.push('low variance and few full-width lines (list-like structure)');
|
||||
console.log(` Reason: ${failedReasons.join(', ')}`);
|
||||
}
|
||||
console.log('');
|
||||
@ -848,7 +938,7 @@ export const groupPageTextElements = (
|
||||
// Only apply paragraph grouping if it looks like a paragraph-heavy page
|
||||
if (isParagraphPage) {
|
||||
console.log(`🔀 Applying paragraph grouping to page ${pageIndex}`);
|
||||
return groupLinesIntoParagraphs(lineGroups, metrics);
|
||||
return groupLinesIntoParagraphs(lineGroups, pageWidth, metrics);
|
||||
}
|
||||
|
||||
// For sparse pages, keep lines separate
|
||||
|
||||
Loading…
Reference in New Issue
Block a user