diff --git a/app/proprietary/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java b/app/proprietary/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java index b0869dc16..64d5e677f 100644 --- a/app/proprietary/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java +++ b/app/proprietary/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java @@ -415,9 +415,16 @@ public class PdfJsonConversionService { for (PDPage page : document.getPages()) { PdfJsonPageDimension dim = new PdfJsonPageDimension(); dim.setPageNumber(pageIndex + 1); - PDRectangle mediaBox = page.getMediaBox(); - dim.setWidth(mediaBox.getWidth()); - dim.setHeight(mediaBox.getHeight()); + // Use CropBox if present (defines visible page area), otherwise fall back + // to MediaBox + PDRectangle pageBox = page.getCropBox(); + if (pageBox == null + || pageBox.getWidth() == 0 + || pageBox.getHeight() == 0) { + pageBox = page.getMediaBox(); + } + dim.setWidth(pageBox.getWidth()); + dim.setHeight(pageBox.getHeight()); dim.setRotation(page.getRotation()); pageDimensions.add(dim); pageIndex++; @@ -1851,9 +1858,13 @@ public class PdfJsonConversionService { for (PDPage page : document.getPages()) { PdfJsonPage pageModel = new PdfJsonPage(); pageModel.setPageNumber(pageIndex + 1); - PDRectangle mediaBox = page.getMediaBox(); - pageModel.setWidth(mediaBox.getWidth()); - pageModel.setHeight(mediaBox.getHeight()); + // Use CropBox if present (defines visible page area), otherwise fall back to MediaBox + PDRectangle pageBox = page.getCropBox(); + if (pageBox == null || pageBox.getWidth() == 0 || pageBox.getHeight() == 0) { + pageBox = page.getMediaBox(); + } + pageModel.setWidth(pageBox.getWidth()); + pageModel.setHeight(pageBox.getHeight()); pageModel.setRotation(page.getRotation()); pageModel.setTextElements(textByPage.getOrDefault(pageIndex + 1, new ArrayList<>())); pageModel.setImageElements(imagesByPage.getOrDefault(pageIndex + 1, new ArrayList<>())); diff --git a/frontend/public/locales/en-GB/translation.json b/frontend/public/locales/en-GB/translation.json index c972fd919..c85de2119 100644 --- a/frontend/public/locales/en-GB/translation.json +++ b/frontend/public/locales/en-GB/translation.json @@ -4533,6 +4533,32 @@ "cancel": "Cancel", "confirm": "Reset and Change Mode" }, + "welcomeBanner": { + "title": "Welcome to PDF Text Editor (Early Access)", + "experimental": "This is an experimental feature in active development. Expect some instability and issues during use.", + "howItWorks": "This tool converts your PDF to an editable format where you can modify text content and reposition images. Changes are saved back as a new PDF.", + "bestFor": "Works Best With:", + "bestFor1": "Simple PDFs containing primarily text and images", + "bestFor2": "Documents with standard paragraph formatting", + "bestFor3": "Letters, essays, reports, and basic documents", + "notIdealFor": "Not Ideal For:", + "notIdealFor1": "PDFs with special formatting like bullet points, tables, or multi-column layouts", + "notIdealFor2": "Magazines, brochures, or heavily designed documents", + "notIdealFor3": "Instruction manuals with complex layouts", + "limitations": "Current Limitations:", + "limitation1": "Font rendering may differ slightly from the original PDF", + "limitation2": "Complex graphics, form fields, and annotations are preserved but not editable", + "limitation3": "Large files may take time to convert and process", + "knownIssues": "Known Issues (Being Fixed):", + "issue1": "Text colour is not currently preserved (will be added soon)", + "issue2": "Paragraph mode has more alignment and spacing issues - Single Line mode recommended", + "issue3": "The preview display differs from the exported PDF - exported PDFs are closer to the original", + "issue4": "Rotated text alignment may need manual adjustment", + "issue5": "Transparency and layering effects may vary from original", + "feedback": "This is an early access feature. Please report any issues you encounter to help us improve!", + "gotIt": "Got it", + "dontShowAgain": "Don't show again" + }, "disclaimer": { "heading": "Preview limitations", "textFocus": "This workspace focuses on editing text and repositioning embedded images. Complex page artwork, form widgets, and layered graphics are preserved for export but are not fully editable here.", @@ -4579,6 +4605,21 @@ "standard14": "Standard PDF Font", "warnings": "Warnings", "suggestions": "Notes" + }, + "manual": { + "mergeTooltip": "Merge selected boxes into a single paragraph", + "merge": "Merge selection", + "ungroupTooltip": "Split paragraph back into separate lines", + "ungroup": "Ungroup selection", + "widthMenu": "Width options", + "expandWidth": "Expand to page edge", + "resetWidth": "Reset width", + "resizeHandle": "Adjust text width" + }, + "options": { + "manualGrouping": { + "descriptionInline": "Tip: Hold Ctrl (Cmd) or Shift to multi-select text boxes. A floating toolbar will appear above the selection so you can merge, ungroup, or adjust widths." + } } }, "workspace": { diff --git a/frontend/src/core/contexts/NavigationContext.tsx b/frontend/src/core/contexts/NavigationContext.tsx index 377ee43e6..500a6db5e 100644 --- a/frontend/src/core/contexts/NavigationContext.tsx +++ b/frontend/src/core/contexts/NavigationContext.tsx @@ -121,10 +121,11 @@ export const NavigationProvider: React.FC<{ hasUnsavedChanges }); - // If we're leaving pageEditor or viewer workbench and have unsaved changes, request navigation + // If we're leaving pageEditor, viewer, or custom workbench and have unsaved changes, request navigation const leavingWorkbenchWithChanges = (state.workbench === 'pageEditor' && workbench !== 'pageEditor' && hasUnsavedChanges) || - (state.workbench === 'viewer' && workbench !== 'viewer' && hasUnsavedChanges); + (state.workbench === 'viewer' && workbench !== 'viewer' && hasUnsavedChanges) || + (state.workbench.startsWith('custom:') && workbench !== state.workbench && hasUnsavedChanges); if (leavingWorkbenchWithChanges) { // Update state to reflect unsaved changes so modal knows @@ -132,7 +133,19 @@ export const NavigationProvider: React.FC<{ dispatch({ type: 'SET_UNSAVED_CHANGES', payload: { hasChanges: true } }); } const performWorkbenchChange = () => { - dispatch({ type: 'SET_WORKBENCH', payload: { workbench } }); + // When leaving a custom workbench, clear the selected tool + console.log('[NavigationContext] performWorkbenchChange executing', { + from: state.workbench, + to: workbench, + isCustom: state.workbench.startsWith('custom:') + }); + if (state.workbench.startsWith('custom:')) { + console.log('[NavigationContext] Clearing tool and changing workbench to:', workbench); + dispatch({ type: 'SET_TOOL_AND_WORKBENCH', payload: { toolId: null, workbench } }); + } else { + console.log('[NavigationContext] Just changing workbench to:', workbench); + dispatch({ type: 'SET_WORKBENCH', payload: { workbench } }); + } }; dispatch({ type: 'SET_PENDING_NAVIGATION', payload: { navigationFn: performWorkbenchChange } }); dispatch({ type: 'SHOW_NAVIGATION_WARNING', payload: { show: true } }); @@ -149,10 +162,11 @@ export const NavigationProvider: React.FC<{ // Check for unsaved changes using registered checker or state const hasUnsavedChanges = unsavedChangesCheckerRef.current?.() || state.hasUnsavedChanges; - // If we're leaving pageEditor or viewer workbench and have unsaved changes, request navigation + // If we're leaving pageEditor, viewer, or custom workbench and have unsaved changes, request navigation const leavingWorkbenchWithChanges = (state.workbench === 'pageEditor' && workbench !== 'pageEditor' && hasUnsavedChanges) || - (state.workbench === 'viewer' && workbench !== 'viewer' && hasUnsavedChanges); + (state.workbench === 'viewer' && workbench !== 'viewer' && hasUnsavedChanges) || + (state.workbench.startsWith('custom:') && workbench !== state.workbench && hasUnsavedChanges); if (leavingWorkbenchWithChanges) { const performWorkbenchChange = () => { @@ -192,13 +206,19 @@ export const NavigationProvider: React.FC<{ }, [state.hasUnsavedChanges]), confirmNavigation: useCallback(() => { + console.log('[NavigationContext] confirmNavigation called', { + hasPendingNav: !!state.pendingNavigation, + currentWorkbench: state.workbench, + currentTool: state.selectedTool + }); if (state.pendingNavigation) { state.pendingNavigation(); } dispatch({ type: 'SET_PENDING_NAVIGATION', payload: { navigationFn: null } }); dispatch({ type: 'SHOW_NAVIGATION_WARNING', payload: { show: false } }); - }, [state.pendingNavigation]), + console.log('[NavigationContext] confirmNavigation completed'); + }, [state.pendingNavigation, state.workbench, state.selectedTool]), cancelNavigation: useCallback(() => { dispatch({ type: 'SET_PENDING_NAVIGATION', payload: { navigationFn: null } }); diff --git a/frontend/src/core/contexts/ToolWorkflowContext.tsx b/frontend/src/core/contexts/ToolWorkflowContext.tsx index 7c657506e..8608d460b 100644 --- a/frontend/src/core/contexts/ToolWorkflowContext.tsx +++ b/frontend/src/core/contexts/ToolWorkflowContext.tsx @@ -218,15 +218,25 @@ export function ToolWorkflowProvider({ children }: ToolWorkflowProviderProps) { }, [customViewRegistry, customViewData]); useEffect(() => { - if (isBaseWorkbench(navigationState.workbench)) { + const { workbench } = navigationState; + if (isBaseWorkbench(workbench)) { return; } - const currentCustomView = customWorkbenchViews.find(view => view.workbenchId === navigationState.workbench); + const currentCustomView = customWorkbenchViews.find(view => view.workbenchId === workbench); + const expectedWorkbench = selectedTool?.workbench; + const workbenchOwnedBySelectedTool = expectedWorkbench === workbench; + if (!currentCustomView || currentCustomView.data == null) { + // If the currently selected tool expects this custom workbench, allow it + // some time to register/populate the view instead of immediately bouncing + // the user back to Active Files. + if (workbenchOwnedBySelectedTool) { + return; + } actions.setWorkbench(getDefaultWorkbench()); } - }, [actions, customWorkbenchViews, navigationState.workbench]); + }, [actions, customWorkbenchViews, navigationState.workbench, selectedTool]); // Persisted via PreferencesContext; no direct localStorage writes needed here @@ -421,4 +431,4 @@ export function useToolWorkflow(): ToolWorkflowContextValue { throw new Error('useToolWorkflow must be used within a ToolWorkflowProvider'); } return context; -} \ No newline at end of file +} diff --git a/frontend/src/proprietary/components/tools/pdfTextEditor/FontStatusPanel.tsx b/frontend/src/proprietary/components/tools/pdfTextEditor/FontStatusPanel.tsx index 4bb8ce65c..0ba9a4665 100644 --- a/frontend/src/proprietary/components/tools/pdfTextEditor/FontStatusPanel.tsx +++ b/frontend/src/proprietary/components/tools/pdfTextEditor/FontStatusPanel.tsx @@ -173,10 +173,6 @@ const FontStatusPanel: React.FC = ({ document, pageIndex } [document, pageIndex] ); - if (!document || fontAnalysis.fonts.length === 0) { - return null; - } - const { canReproducePerfectly, hasWarnings, summary, fonts } = fontAnalysis; const statusIcon = useMemo(() => { @@ -189,6 +185,11 @@ const FontStatusPanel: React.FC = ({ document, pageIndex } return ; }, [canReproducePerfectly, hasWarnings]); + // Early return AFTER all hooks are declared + if (!document || fontAnalysis.fonts.length === 0) { + return null; + } + const statusColor = canReproducePerfectly ? 'green' : hasWarnings ? 'yellow' : 'blue'; const pageLabel = pageIndex !== undefined @@ -199,14 +200,30 @@ const FontStatusPanel: React.FC = ({ document, pageIndex } - - {statusIcon} - - {pageLabel} - - - {fonts.length} - + + + {statusIcon} + + {pageLabel} + + + {fonts.length} + + + + {/* Warning badges BEFORE expansion */} + + {summary.systemFallback > 0 && ( + }> + {summary.systemFallback} {t('pdfTextEditor.fontAnalysis.fallback', 'fallback')} + + )} + {summary.missing > 0 && ( + }> + {summary.missing} {t('pdfTextEditor.fontAnalysis.missing', 'missing')} + + )} + diff --git a/frontend/src/proprietary/components/tools/pdfTextEditor/PdfTextEditorView.tsx b/frontend/src/proprietary/components/tools/pdfTextEditor/PdfTextEditorView.tsx index 9d78e5a2e..734a0f090 100644 --- a/frontend/src/proprietary/components/tools/pdfTextEditor/PdfTextEditorView.tsx +++ b/frontend/src/proprietary/components/tools/pdfTextEditor/PdfTextEditorView.tsx @@ -8,8 +8,8 @@ import { Button, Card, Divider, - FileButton, Group, + Menu, Modal, Pagination, Progress, @@ -19,6 +19,7 @@ import { Switch, Text, Title, + Tooltip, } from '@mantine/core'; import { useTranslation } from 'react-i18next'; import DescriptionIcon from '@mui/icons-material/DescriptionOutlined'; @@ -27,15 +28,19 @@ import PictureAsPdfIcon from '@mui/icons-material/PictureAsPdfOutlined'; import AutorenewIcon from '@mui/icons-material/Autorenew'; import WarningAmberIcon from '@mui/icons-material/WarningAmber'; import InfoOutlinedIcon from '@mui/icons-material/InfoOutlined'; -import UploadIcon from '@mui/icons-material/Upload'; import CloseIcon from '@mui/icons-material/Close'; +import MergeTypeIcon from '@mui/icons-material/MergeType'; +import CallSplitIcon from '@mui/icons-material/CallSplit'; +import MoreVertIcon from '@mui/icons-material/MoreVert'; import { Rnd } from 'react-rnd'; +import NavigationWarningModal from '@core/components/shared/NavigationWarningModal'; import { PdfTextEditorViewData, PdfJsonFont, PdfJsonPage, ConversionProgress, + TextGroup, } from '@app/tools/pdfTextEditor/pdfTextEditorTypes'; import { getImageBounds, pageDimensions } from '@app/tools/pdfTextEditor/pdfTextEditorUtils'; import FontStatusPanel from '@app/components/tools/pdfTextEditor/FontStatusPanel'; @@ -249,20 +254,16 @@ const buildFontLookupKeys = ( * Analyzes text groups on a page to determine if it's paragraph-heavy or sparse. * Returns true if the page appears to be document-like with substantial text content. */ -const analyzePageContentType = (groups: TextGroup[]): boolean => { +const analyzePageContentType = (groups: TextGroup[], pageWidth: number): boolean => { if (groups.length === 0) return false; let multiLineGroups = 0; let totalWords = 0; let longTextGroups = 0; let totalGroups = 0; - const groupDetails: Array<{ - id: string; - lines: number; - words: number; - chars: number; - text: string; - }> = []; + let fullWidthLines = 0; + const wordCounts: number[] = []; + const fullWidthThreshold = pageWidth * 0.7; groups.forEach((group) => { const text = (group.text || '').trim(); @@ -274,39 +275,45 @@ const analyzePageContentType = (groups: TextGroup[]): boolean => { const wordCount = text.split(/\s+/).filter((w) => w.length > 0).length; totalWords += wordCount; + wordCounts.push(wordCount); // Count multi-line paragraphs if (lineCount > 1) { multiLineGroups++; } - // Count text groups with substantial content (more than a few words) - if (wordCount >= 5 || text.length >= 30) { + // Count text groups with substantial content (β‰₯10 words or β‰₯50 chars) + if (wordCount >= 10 || text.length >= 50) { longTextGroups++; } - groupDetails.push({ - id: group.id, - lines: lineCount, - words: wordCount, - chars: text.length, - text: text.substring(0, 50) + (text.length > 50 ? '...' : ''), - }); + // Check if this line extends close to the right margin + const rightEdge = group.bounds.right; + if (rightEdge >= fullWidthThreshold) { + fullWidthLines++; + } }); if (totalGroups === 0) return false; - // Heuristics for paragraph mode: - // 1. Has multiple substantial multi-line groups (2+) AND decent average words - // 2. Average words per group > 12 (strong indicator of document text) - // 3. More than 40% of groups have substantial text (typical of documents) const avgWordsPerGroup = totalWords / totalGroups; const longTextRatio = longTextGroups / totalGroups; + const fullWidthRatio = fullWidthLines / totalGroups; - const isParagraphPage = - (multiLineGroups >= 2 && avgWordsPerGroup > 8) || - avgWordsPerGroup > 12 || - longTextRatio > 0.4; + // Calculate variance in line lengths + const variance = wordCounts.reduce((sum, count) => { + const diff = count - avgWordsPerGroup; + return sum + diff * diff; + }, 0) / totalGroups; + const stdDev = Math.sqrt(variance); + const coefficientOfVariation = avgWordsPerGroup > 0 ? stdDev / avgWordsPerGroup : 0; + + // All 3 criteria must pass for paragraph mode + const criterion1 = avgWordsPerGroup > 5; + const criterion2 = longTextRatio > 0.4; + const criterion3 = coefficientOfVariation > 0.5 || fullWidthRatio > 0.6; + + const isParagraphPage = criterion1 && criterion2 && criterion3; return isParagraphPage; }; @@ -318,6 +325,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { const [activeGroupId, setActiveGroupId] = useState(null); const [editingGroupId, setEditingGroupId] = useState(null); const [activeImageId, setActiveImageId] = useState(null); + const [selectedGroupIds, setSelectedGroupIds] = useState>(new Set()); + const [widthOverrides, setWidthOverrides] = useState>(new Map()); const draggingImageRef = useRef(null); const rndRefs = useRef>(new Map()); const pendingDragUpdateRef = useRef(null); @@ -329,6 +338,39 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { const containerRef = useRef(null); const editorRefs = useRef>(new Map()); const caretOffsetsRef = useRef>(new Map()); + const lastSelectedGroupIdRef = useRef(null); + const widthOverridesRef = useRef>(widthOverrides); + const resizingRef = useRef<{ + groupId: string; + startX: number; + startWidth: number; + baseWidth: number; + maxWidth: number; + } | null>(null); + + // First-time banner state + const [showWelcomeBanner, setShowWelcomeBanner] = useState(() => { + try { + return localStorage.getItem('pdfTextEditor.welcomeBannerDismissed') !== 'true'; + } catch { + return true; + } + }); + + const handleDismissWelcomeBanner = useCallback(() => { + // Just dismiss for this session, don't save to localStorage + setShowWelcomeBanner(false); + }, []); + + const handleDontShowAgain = useCallback(() => { + // Save to localStorage to never show again + try { + localStorage.setItem('pdfTextEditor.welcomeBannerDismissed', 'true'); + } catch { + // Ignore localStorage errors + } + setShowWelcomeBanner(false); + }, []); const { document: pdfDocument, @@ -348,7 +390,6 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { forceSingleTextElement, groupingMode: externalGroupingMode, requestPagePreview, - onLoadJson, onSelectPage, onGroupEdit, onGroupDelete, @@ -357,10 +398,37 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { onReset, onDownloadJson, onGeneratePdf, + onGeneratePdfForNavigation, onForceSingleTextElementChange, onGroupingModeChange, + onMergeGroups, + onUngroupGroup, } = data; + // Define derived variables immediately after props destructuring, before any hooks + const pages = pdfDocument?.pages ?? []; + const currentPage = pages[selectedPage] ?? null; + const pageGroups = groupsByPage[selectedPage] ?? []; + const pageImages = imagesByPage[selectedPage] ?? []; + const pagePreview = pagePreviews.get(selectedPage); + const { width: pageWidth, height: pageHeight } = pageDimensions(currentPage); + + // Debug logging for page dimensions + console.log(`πŸ“ [PdfTextEditor] Page ${selectedPage + 1} Dimensions:`, { + pageWidth, + pageHeight, + aspectRatio: pageHeight > 0 ? (pageWidth / pageHeight).toFixed(3) : 'N/A', + currentPage: currentPage ? { + mediaBox: currentPage.mediaBox, + cropBox: currentPage.cropBox, + rotation: currentPage.rotation, + } : null, + documentMetadata: pdfDocument?.metadata ? { + title: pdfDocument.metadata.title, + pageCount: pages.length, + } : null, + }); + const handleModeChangeRequest = useCallback((newMode: GroupingMode) => { if (hasChanges && newMode !== externalGroupingMode) { // Show confirmation dialog @@ -382,7 +450,16 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { setPendingModeChange(null); }, []); - const resolveFont = (fontId: string | null | undefined, pageIndex: number | null | undefined): PdfJsonFont | null => { + const clearSelection = useCallback(() => { + setSelectedGroupIds(new Set()); + lastSelectedGroupIdRef.current = null; + }, []); + + useEffect(() => { + widthOverridesRef.current = widthOverrides; + }, [widthOverrides]); + + const resolveFont = useCallback((fontId: string | null | undefined, pageIndex: number | null | undefined): PdfJsonFont | null => { if (!fontId || !pdfDocument?.fonts) { return null; } @@ -404,9 +481,9 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { return directUid; } return fonts.find((font) => font?.id === fontId) ?? null; - }; + }, [pdfDocument?.fonts]); - const getFontFamily = (fontId: string | null | undefined, pageIndex: number | null | undefined): string => { + const getFontFamily = useCallback((fontId: string | null | undefined, pageIndex: number | null | undefined): string => { if (!fontId) { return 'sans-serif'; } @@ -437,123 +514,79 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { } return 'Arial, Helvetica, sans-serif'; - }; + }, [resolveFont, fontFamilies]); - const getFontMetricsFor = ( - fontId: string | null | undefined, - pageIndex: number | null | undefined, - ): { unitsPerEm: number; ascent: number; descent: number } | undefined => { - if (!fontId) { - return undefined; - } - const font = resolveFont(fontId, pageIndex); - const lookupKeys = buildFontLookupKeys(fontId, font ?? undefined, pageIndex); - for (const key of lookupKeys) { - const metrics = fontMetrics.get(key); - if (metrics) { - return metrics; + useEffect(() => { + clearSelection(); + }, [clearSelection, selectedPage]); + + useEffect(() => { + clearSelection(); + }, [clearSelection, externalGroupingMode]); + + useEffect(() => { + setWidthOverrides(new Map()); + }, [pdfDocument]); + + useEffect(() => { + setSelectedGroupIds((prev) => { + const filtered = Array.from(prev).filter((id) => pageGroups.some((group) => group.id === id)); + if (filtered.length === prev.size) { + return prev; } - } - return undefined; - }; - - const getLineHeightPx = ( - fontId: string | null | undefined, - pageIndex: number | null | undefined, - fontSizePx: number, - ): number => { - if (fontSizePx <= 0) { - return fontSizePx; - } - const metrics = getFontMetricsFor(fontId, pageIndex); - if (!metrics || metrics.unitsPerEm <= 0) { - return fontSizePx * 1.2; - } - const unitsPerEm = metrics.unitsPerEm > 0 ? metrics.unitsPerEm : 1000; - const ascentUnits = metrics.ascent ?? unitsPerEm; - const descentUnits = Math.abs(metrics.descent ?? -(unitsPerEm * 0.2)); - const totalUnits = Math.max(unitsPerEm, ascentUnits + descentUnits); - if (totalUnits <= 0) { - return fontSizePx * 1.2; - } - const lineHeight = (totalUnits / unitsPerEm) * fontSizePx; - return Math.max(lineHeight, fontSizePx * 1.05); - }; - - const getFontGeometry = ( - fontId: string | null | undefined, - pageIndex: number | null | undefined, - ): { - unitsPerEm: number; - ascentUnits: number; - descentUnits: number; - totalUnits: number; - ascentRatio: number; - descentRatio: number; - } | undefined => { - const metrics = getFontMetricsFor(fontId, pageIndex); - if (!metrics) { - return undefined; - } - const unitsPerEm = metrics.unitsPerEm > 0 ? metrics.unitsPerEm : 1000; - const rawAscent = metrics.ascent ?? unitsPerEm; - const rawDescent = metrics.descent ?? -(unitsPerEm * 0.2); - const ascentUnits = Number.isFinite(rawAscent) ? rawAscent : unitsPerEm; - const descentUnits = Number.isFinite(rawDescent) ? Math.abs(rawDescent) : unitsPerEm * 0.2; - const totalUnits = Math.max(unitsPerEm, ascentUnits + descentUnits); - if (totalUnits <= 0 || !Number.isFinite(totalUnits)) { - return undefined; - } - return { - unitsPerEm, - ascentUnits, - descentUnits, - totalUnits, - ascentRatio: ascentUnits / totalUnits, - descentRatio: descentUnits / totalUnits, - }; - }; - - const getFontWeight = ( - fontId: string | null | undefined, - pageIndex: number | null | undefined, - ): number | 'normal' | 'bold' => { - if (!fontId) { - return 'normal'; - } - const font = resolveFont(fontId, pageIndex); - if (!font || !font.fontDescriptorFlags) { - return 'normal'; - } - - // PDF font descriptor flag bit 18 (value 262144 = 0x40000) indicates ForceBold - const FORCE_BOLD_FLAG = 262144; - if ((font.fontDescriptorFlags & FORCE_BOLD_FLAG) !== 0) { - return 'bold'; - } - - // Also check if font name contains "Bold" - const fontName = font.standard14Name || font.baseName || ''; - if (fontName.toLowerCase().includes('bold')) { - return 'bold'; - } - - return 'normal'; - }; - - const pages = pdfDocument?.pages ?? []; - const currentPage = pages[selectedPage] ?? null; - const pageGroups = groupsByPage[selectedPage] ?? []; - const pageImages = imagesByPage[selectedPage] ?? []; - const pagePreview = pagePreviews.get(selectedPage); + return new Set(filtered); + }); + setWidthOverrides((prev) => { + const filtered = new Map(); + pageGroups.forEach((group) => { + if (prev.has(group.id)) { + filtered.set(group.id, prev.get(group.id) ?? 0); + } + }); + if (filtered.size === prev.size) { + return prev; + } + return filtered; + }); + }, [pageGroups]); // Detect if current page contains paragraph-heavy content - const isParagraphPage = useMemo(() => analyzePageContentType(pageGroups), [pageGroups]); + const isParagraphPage = useMemo(() => { + const result = analyzePageContentType(pageGroups, pageWidth); + console.log(`🏷️ Page ${selectedPage} badge: ${result ? 'PARAGRAPH' : 'SPARSE'} (${pageGroups.length} groups)`); + return result; + }, [pageGroups, pageWidth, selectedPage]); const isParagraphLayout = externalGroupingMode === 'paragraph' || (externalGroupingMode === 'auto' && isParagraphPage); - const paragraphWhiteSpace = isParagraphLayout ? 'pre-wrap' : 'pre'; - const paragraphWordBreak = isParagraphLayout ? 'break-word' : 'normal'; - const paragraphOverflowWrap = isParagraphLayout ? 'break-word' : 'normal'; + + const resolveGroupWidth = useCallback( + (group: TextGroup): { width: number; base: number; max: number } => { + const baseWidth = Math.max(group.bounds.right - group.bounds.left, 1); + const maxWidth = Math.max(pageWidth - group.bounds.left, baseWidth); + const override = widthOverrides.get(group.id); + const resolved = override ? Math.min(Math.max(override, baseWidth), maxWidth) : baseWidth; + return { width: resolved, base: baseWidth, max: maxWidth }; + }, + [pageWidth, widthOverrides], + ); + + const selectedGroupIdsArray = useMemo(() => Array.from(selectedGroupIds), [selectedGroupIds]); + const selectionIndices = useMemo(() => { + return selectedGroupIdsArray + .map((id) => pageGroups.findIndex((group) => group.id === id)) + .filter((index) => index >= 0) + .sort((a, b) => a - b); + }, [pageGroups, selectedGroupIdsArray]); + const canMergeSelection = selectionIndices.length >= 2 && selectionIndices.every((value, idx, array) => idx === 0 || value === array[idx - 1] + 1); + const paragraphSelectionIds = useMemo(() => + selectedGroupIdsArray.filter((id) => { + const target = pageGroups.find((group) => group.id === id); + return target ? (target.childLineGroups?.length ?? 0) > 1 : false; + }), + [pageGroups, selectedGroupIdsArray]); + const canUngroupSelection = paragraphSelectionIds.length > 0; + const hasWidthOverrides = selectedGroupIdsArray.some((id) => widthOverrides.has(id)); + const hasSelection = selectedGroupIdsArray.length > 0; const syncEditorValue = useCallback( ( @@ -583,6 +616,69 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { [editingGroupId, onGroupEdit], ); + const handleMergeSelection = useCallback(() => { + if (!canMergeSelection) { + return; + } + const orderedIds = selectionIndices + .map((index) => pageGroups[index]?.id) + .filter((value): value is string => Boolean(value)); + if (orderedIds.length < 2) { + return; + } + const merged = onMergeGroups(selectedPage, orderedIds); + if (merged) { + clearSelection(); + } + }, [canMergeSelection, selectionIndices, pageGroups, onMergeGroups, selectedPage, clearSelection]); + + const handleUngroupSelection = useCallback(() => { + if (!canUngroupSelection) { + return; + } + let changed = false; + paragraphSelectionIds.forEach((id) => { + const result = onUngroupGroup(selectedPage, id); + if (result) { + changed = true; + } + }); + if (changed) { + clearSelection(); + } + }, [canUngroupSelection, paragraphSelectionIds, onUngroupGroup, selectedPage, clearSelection]); + + const handleWidthAdjustment = useCallback( + (mode: 'expand' | 'reset') => { + if (mode === 'expand' && !hasSelection) { + return; + } + if (mode === 'reset' && !hasWidthOverrides) { + return; + } + const selectedGroups = selectedGroupIdsArray + .map((id) => pageGroups.find((group) => group.id === id)) + .filter((group): group is TextGroup => Boolean(group)); + if (selectedGroups.length === 0) { + return; + } + setWidthOverrides((prev) => { + const next = new Map(prev); + selectedGroups.forEach((group) => { + const baseWidth = Math.max(group.bounds.right - group.bounds.left, 1); + const maxWidth = Math.max(pageWidth - group.bounds.left, baseWidth); + if (mode === 'expand') { + next.set(group.id, maxWidth); + } else { + next.delete(group.id); + } + }); + return next; + }); + }, + [hasSelection, hasWidthOverrides, selectedGroupIdsArray, pageGroups, pageWidth], + ); + const extractPreferredFontId = useCallback((target?: TextGroup | null) => { if (!target) { return undefined; @@ -774,6 +870,110 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { }); }; }, [pdfDocument?.fonts]); + + // Define helper functions that depend on hooks AFTER all hook calls + const getFontMetricsFor = useCallback(( + fontId: string | null | undefined, + pageIndex: number | null | undefined, + ): { unitsPerEm: number; ascent: number; descent: number } | undefined => { + if (!fontId) { + return undefined; + } + const font = resolveFont(fontId, pageIndex); + const lookupKeys = buildFontLookupKeys(fontId, font ?? undefined, pageIndex); + for (const key of lookupKeys) { + const metrics = fontMetrics.get(key); + if (metrics) { + return metrics; + } + } + return undefined; + }, [resolveFont, fontMetrics]); + + const getLineHeightPx = useCallback(( + fontId: string | null | undefined, + pageIndex: number | null | undefined, + fontSizePx: number, + ): number => { + if (fontSizePx <= 0) { + return fontSizePx; + } + const metrics = getFontMetricsFor(fontId, pageIndex); + if (!metrics || metrics.unitsPerEm <= 0) { + return fontSizePx * 1.2; + } + const unitsPerEm = metrics.unitsPerEm > 0 ? metrics.unitsPerEm : 1000; + const ascentUnits = metrics.ascent ?? unitsPerEm; + const descentUnits = Math.abs(metrics.descent ?? -(unitsPerEm * 0.2)); + const totalUnits = Math.max(unitsPerEm, ascentUnits + descentUnits); + if (totalUnits <= 0) { + return fontSizePx * 1.2; + } + const lineHeight = (totalUnits / unitsPerEm) * fontSizePx; + return Math.max(lineHeight, fontSizePx * 1.05); + }, [getFontMetricsFor]); + + const getFontGeometry = useCallback(( + fontId: string | null | undefined, + pageIndex: number | null | undefined, + ): { + unitsPerEm: number; + ascentUnits: number; + descentUnits: number; + totalUnits: number; + ascentRatio: number; + descentRatio: number; + } | undefined => { + const metrics = getFontMetricsFor(fontId, pageIndex); + if (!metrics) { + return undefined; + } + const unitsPerEm = metrics.unitsPerEm > 0 ? metrics.unitsPerEm : 1000; + const rawAscent = metrics.ascent ?? unitsPerEm; + const rawDescent = metrics.descent ?? -(unitsPerEm * 0.2); + const ascentUnits = Number.isFinite(rawAscent) ? rawAscent : unitsPerEm; + const descentUnits = Number.isFinite(rawDescent) ? Math.abs(rawDescent) : unitsPerEm * 0.2; + const totalUnits = Math.max(unitsPerEm, ascentUnits + descentUnits); + if (totalUnits <= 0 || !Number.isFinite(totalUnits)) { + return undefined; + } + return { + unitsPerEm, + ascentUnits, + descentUnits, + totalUnits, + ascentRatio: ascentUnits / totalUnits, + descentRatio: descentUnits / totalUnits, + }; + }, [getFontMetricsFor]); + + const getFontWeight = useCallback(( + fontId: string | null | undefined, + pageIndex: number | null | undefined, + ): number | 'normal' | 'bold' => { + if (!fontId) { + return 'normal'; + } + const font = resolveFont(fontId, pageIndex); + if (!font || !font.fontDescriptorFlags) { + return 'normal'; + } + + // PDF font descriptor flag bit 18 (value 262144 = 0x40000) indicates ForceBold + const FORCE_BOLD_FLAG = 262144; + if ((font.fontDescriptorFlags & FORCE_BOLD_FLAG) !== 0) { + return 'bold'; + } + + // Also check if font name contains "Bold" + const fontName = font.standard14Name || font.baseName || ''; + if (fontName.toLowerCase().includes('bold')) { + return 'bold'; + } + + return 'normal'; + }, [resolveFont]); + const visibleGroups = useMemo( () => pageGroups @@ -787,17 +987,40 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { [editingGroupId, pageGroups], ); - const orderedImages = useMemo( - () => - [...pageImages].sort( - (first, second) => (first?.zOrder ?? -1_000_000) - (second?.zOrder ?? -1_000_000), - ), - [pageImages], - ); - const { width: pageWidth, height: pageHeight } = pageDimensions(currentPage); - const scale = useMemo(() => Math.min(MAX_RENDER_WIDTH / pageWidth, 2.5), [pageWidth]); - const scaledWidth = pageWidth * scale; - const scaledHeight = pageHeight * scale; +const orderedImages = useMemo( + () => + [...pageImages].sort( + (first, second) => (first?.zOrder ?? -1_000_000) - (second?.zOrder ?? -1_000_000), + ), + [pageImages], +); +const scale = useMemo(() => { + const calculatedScale = Math.min(MAX_RENDER_WIDTH / pageWidth, 2.5); + console.log(`πŸ” [PdfTextEditor] Scale Calculation:`, { + MAX_RENDER_WIDTH, + pageWidth, + pageHeight, + calculatedScale: calculatedScale.toFixed(3), + scaledWidth: (pageWidth * calculatedScale).toFixed(2), + scaledHeight: (pageHeight * calculatedScale).toFixed(2), + }); + return calculatedScale; +}, [pageWidth, pageHeight]); +const scaledWidth = pageWidth * scale; +const scaledHeight = pageHeight * scale; +const selectionToolbarPosition = useMemo(() => { + if (!hasSelection) { + return null; + } + const firstSelected = pageGroups.find((group) => selectedGroupIds.has(group.id)); + if (!firstSelected) { + return null; + } + const bounds = toCssBounds(currentPage, pageHeight, scale, firstSelected.bounds); + const top = Math.max(bounds.top - 40, 8); + const left = Math.min(Math.max(bounds.left, 8), Math.max(scaledWidth - 220, 8)); + return { left, top }; +}, [hasSelection, pageGroups, selectedGroupIds, currentPage, pageHeight, scale, scaledWidth]); useEffect(() => { if (!hasDocument || !hasVectorPreview) { @@ -847,8 +1070,16 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { return; } - // Skip multi-line paragraphs - auto-scaling doesn't work well with wrapped text + // Only apply auto-scaling to unchanged text + const hasChanges = group.text !== group.originalText; + if (hasChanges) { + newScales.set(group.id, 1); + return; + } + const lineCount = (group.text || '').split('\n').length; + + // Skip multi-line paragraphs - auto-scaling doesn't work well with wrapped text if (lineCount > 1) { newScales.set(group.id, 1); return; @@ -869,7 +1100,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { textSpan.style.transform = 'none'; const bounds = toCssBounds(currentPage, pageHeight, scale, group.bounds); - const containerWidth = bounds.width; + const { width: resolvedWidth } = resolveGroupWidth(group); + const containerWidth = resolvedWidth * scale; const textWidth = textSpan.getBoundingClientRect().width; // Restore original transform @@ -901,6 +1133,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { scale, fontFamilies.size, selectedPage, + isParagraphLayout, + resolveGroupWidth, ]); useLayoutEffect(() => { @@ -971,6 +1205,7 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { const handlePageChange = (pageNumber: number) => { setActiveGroupId(null); setEditingGroupId(null); + clearSelection(); onSelectPage(pageNumber - 1); }; @@ -978,8 +1213,97 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { setEditingGroupId(null); setActiveGroupId(null); setActiveImageId(null); + clearSelection(); }; + const handleSelectionInteraction = useCallback( + (groupId: string, groupIndex: number, event: React.MouseEvent): boolean => { + const multiSelect = event.metaKey || event.ctrlKey; + const rangeSelect = event.shiftKey && lastSelectedGroupIdRef.current !== null; + setSelectedGroupIds((previous) => { + if (multiSelect) { + const next = new Set(previous); + if (next.has(groupId)) { + next.delete(groupId); + } else { + next.add(groupId); + } + return next; + } + if (rangeSelect) { + const anchorId = lastSelectedGroupIdRef.current; + const anchorIndex = anchorId ? pageGroups.findIndex((group) => group.id === anchorId) : -1; + if (anchorIndex === -1) { + return new Set([groupId]); + } + const start = Math.min(anchorIndex, groupIndex); + const end = Math.max(anchorIndex, groupIndex); + const next = new Set(); + for (let idx = start; idx <= end; idx += 1) { + const candidate = pageGroups[idx]; + if (candidate) { + next.add(candidate.id); + } + } + return next; + } + return new Set([groupId]); + }); + if (!rangeSelect) { + lastSelectedGroupIdRef.current = groupId; + } + return !(multiSelect || rangeSelect); + }, + [pageGroups], + ); + + const handleResizeStart = useCallback( + (event: React.MouseEvent, group: TextGroup, currentWidth: number) => { + const baseWidth = Math.max(group.bounds.right - group.bounds.left, 1); + const maxWidth = Math.max(pageWidth - group.bounds.left, baseWidth); + event.stopPropagation(); + event.preventDefault(); + const startX = event.clientX; + const handleMouseMove = (moveEvent: MouseEvent) => { + const context = resizingRef.current; + if (!context) { + return; + } + moveEvent.preventDefault(); + const deltaPx = moveEvent.clientX - context.startX; + const deltaWidth = deltaPx / scale; + const nextWidth = Math.min( + Math.max(context.startWidth + deltaWidth, context.baseWidth), + context.maxWidth, + ); + setWidthOverrides((prev) => { + const next = new Map(prev); + if (Math.abs(nextWidth - context.baseWidth) <= 0.5) { + next.delete(context.groupId); + } else { + next.set(context.groupId, nextWidth); + } + return next; + }); + }; + const handleMouseUp = () => { + resizingRef.current = null; + window.removeEventListener('mousemove', handleMouseMove); + window.removeEventListener('mouseup', handleMouseUp); + }; + resizingRef.current = { + groupId: group.id, + startX, + startWidth: currentWidth, + baseWidth, + maxWidth, + }; + window.addEventListener('mousemove', handleMouseMove); + window.addEventListener('mouseup', handleMouseUp); + }, + [pageWidth, scale], + ); + const renderGroupContainer = ( groupId: string, pageIndex: number, @@ -988,6 +1312,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { content: React.ReactNode, onActivate?: (event: React.MouseEvent) => void, onClick?: (event: React.MouseEvent) => void, + isSelected = false, + resizeHandle?: React.ReactNode, ) => ( { marginTop: '-3px', outline: isActive ? '2px solid var(--mantine-color-blue-5)' - : isChanged - ? '1px solid var(--mantine-color-yellow-5)' - : 'none', + : isSelected + ? '1px solid var(--mantine-color-violet-5)' + : isChanged + ? '1px solid var(--mantine-color-yellow-5)' + : 'none', outlineOffset: '-1px', borderRadius: 6, - backgroundColor: isChanged || isActive ? 'rgba(250,255,189,0.28)' : 'transparent', + backgroundColor: isActive + ? 'rgba(184,212,255,0.35)' + : isSelected + ? 'rgba(206,190,255,0.32)' + : isChanged + ? 'rgba(250,255,189,0.28)' + : 'transparent', transition: 'outline 120ms ease, background-color 120ms ease', pointerEvents: 'auto', overflow: 'visible', @@ -1023,6 +1357,7 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { }} > {content} + {resizeHandle} {activeGroupId === groupId && ( { position: 'absolute', top: -8, right: -8, - zIndex: 10, + zIndex: 9999, cursor: 'pointer', + pointerEvents: 'auto', + }} + onMouseDown={(event) => { + console.log(`❌ MOUSEDOWN on X button for group ${groupId}`); + event.stopPropagation(); + event.preventDefault(); + + // Find the current group to check if it's already empty + const currentGroups = groupsByPage[pageIndex] ?? []; + const currentGroup = currentGroups.find(g => g.id === groupId); + const currentText = (currentGroup?.text ?? '').trim(); + + if (currentText.length === 0) { + // Already empty - remove the textbox entirely + console.log(` Text already empty, removing textbox`); + onGroupDelete(pageIndex, groupId); + setActiveGroupId(null); + setEditingGroupId(null); + } else { + // Has text - clear it but keep the textbox + console.log(` Clearing text (textbox remains)`); + onGroupEdit(pageIndex, groupId, ''); + } + console.log(` Operation completed`); }} onClick={(event) => { + console.log(`❌ X button ONCLICK fired for group ${groupId} on page ${pageIndex}`); event.stopPropagation(); - onGroupDelete(pageIndex, groupId); - setActiveGroupId(null); - setEditingGroupId(null); + event.preventDefault(); }} > @@ -1110,19 +1468,6 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { - - {(props) => ( - - )} - + + + + + + { borderRadius: '0.5rem', overflow: 'hidden', }} - ref={containerRef} + ref={(node) => { + containerRef.current = node; + if (node) { + console.log(`πŸ–ΌοΈ [PdfTextEditor] Canvas Rendered:`, { + renderedWidth: node.offsetWidth, + renderedHeight: node.offsetHeight, + styleWidth: scaledWidth, + styleHeight: scaledHeight, + pageNumber: selectedPage + 1, + }); + } + }} > {pagePreview && ( { }} /> )} + {selectionToolbarPosition && ( + { + event.stopPropagation(); + }} + onClick={(event) => { + event.stopPropagation(); + }} + > + {canMergeSelection && ( + + + + + + )} + {canUngroupSelection && ( + + + + + + )} + + + event.stopPropagation()} + onClick={(event) => event.stopPropagation()} + > + + + + + handleWidthAdjustment('expand')} + > + {t('pdfTextEditor.manual.expandWidth', 'Expand to page edge')} + + handleWidthAdjustment('reset')} + > + {t('pdfTextEditor.manual.resetWidth', 'Reset width')} + + + + + )} {orderedImages.map((image, imageIndex) => { if (!image?.imageData) { return null; @@ -1599,7 +2115,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { let containerLeft = bounds.left; let containerTop = bounds.top; - let containerWidth = Math.max(bounds.width, fontSizePx); + const { width: resolvedWidth, base: baseWidth, max: maxWidth } = resolveGroupWidth(group); + let containerWidth = Math.max(resolvedWidth * scale, fontSizePx); let containerHeight = Math.max(bounds.height, paragraphHeightPx); let transform: string | undefined; let transformOrigin: React.CSSProperties['transformOrigin']; @@ -1636,13 +2153,30 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { const textColor = group.color || '#111827'; const fontWeight = group.fontWeight || getFontWeight(effectiveFontId, group.pageIndex); + // Determine text wrapping behavior based on whether text has been changed + const hasChanges = changed; + const widthExtended = resolvedWidth - baseWidth > 0.5; + const enableWrap = isParagraphLayout || widthExtended || isEditing || hasChanges; + const whiteSpace = enableWrap ? 'pre-wrap' : 'pre'; + const wordBreak = enableWrap ? 'break-word' : 'normal'; + const overflowWrap = enableWrap ? 'break-word' : 'normal'; + + // For paragraph mode, allow height to grow to accommodate lines without wrapping + // For single-line mode, maintain fixed height based on PDF bounds + const useFlexibleHeight = isEditing || enableWrap || (isParagraphLayout && lineCount > 1); + + // The renderGroupContainer wrapper adds 4px horizontal padding (2px left + 2px right) + // We need to add this to the container width to compensate, so the inner content + // has the full PDF-defined width available for text + const WRAPPER_HORIZONTAL_PADDING = 4; + const containerStyle: React.CSSProperties = { position: 'absolute', left: `${containerLeft}px`, top: `${containerTop}px`, - width: `${containerWidth}px`, - height: isEditing ? 'auto' : `${containerHeight}px`, - minHeight: `${containerHeight}px`, + width: `${containerWidth + WRAPPER_HORIZONTAL_PADDING}px`, + height: useFlexibleHeight ? 'auto' : `${containerHeight}px`, + minHeight: useFlexibleHeight ? 'auto' : `${containerHeight}px`, display: 'flex', alignItems: 'flex-start', justifyContent: 'flex-start', @@ -1653,6 +2187,35 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { transformOrigin, }; + const showResizeHandle = !hasRotation && (selectedGroupIds.has(group.id) || activeGroupId === group.id); + const resizeHandle = showResizeHandle ? ( + handleResizeStart(event, group, resolvedWidth)} + style={{ + position: 'absolute', + top: '50%', + right: -6, + width: 12, + height: 32, + marginTop: -16, + cursor: 'ew-resize', + borderRadius: 6, + backgroundColor: 'rgba(76, 110, 245, 0.35)', + border: '1px solid rgba(76, 110, 245, 0.8)', + display: 'flex', + alignItems: 'center', + justifyContent: 'center', + color: 'white', + fontSize: 9, + userSelect: 'none', + }} + > + || + + ) : null; + if (isEditing) { return ( @@ -1708,8 +2271,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { width: '100%', minHeight: '100%', height: 'auto', - padding: 0, - backgroundColor: 'rgba(255,255,255,0.95)', + padding: '2px', + backgroundColor: 'rgba(255,255,255,0.95)', color: textColor, fontSize: `${fontSizePx}px`, fontFamily, @@ -1718,15 +2281,19 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { outline: 'none', border: 'none', display: 'block', - whiteSpace: paragraphWhiteSpace, - wordBreak: paragraphWordBreak, - overflowWrap: paragraphOverflowWrap, + whiteSpace, + wordBreak, + overflowWrap, cursor: 'text', overflow: 'visible', }} > {group.text || '\u00A0'} , + undefined, + undefined, + selectedGroupIds.has(group.id), + resizeHandle, )} ); @@ -1747,10 +2314,10 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { style={{ width: '100%', minHeight: '100%', - padding: 0, - whiteSpace: paragraphWhiteSpace, - wordBreak: paragraphWordBreak, - overflowWrap: paragraphOverflowWrap, + padding: '2px', + whiteSpace, + wordBreak, + overflowWrap, fontSize: `${fontSizePx}px`, fontFamily, fontWeight, @@ -1758,17 +2325,17 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { color: textColor, display: 'block', cursor: 'text', - overflow: 'hidden', + overflow: enableWrap ? 'visible' : 'hidden', }} > {group.text || '\u00A0'} @@ -1776,6 +2343,13 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { , undefined, (event: React.MouseEvent) => { + const shouldActivate = handleSelectionInteraction(group.id, pageGroupIndex, event); + if (!shouldActivate) { + setActiveGroupId(null); + setEditingGroupId(null); + return; + } + const clickX = event.clientX; const clickY = event.clientY; @@ -1783,6 +2357,22 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { setEditingGroupId(group.id); caretOffsetsRef.current.delete(group.id); + // Log group stats when selected + const lines = (group.text ?? '').split('\n'); + const words = (group.text ?? '').split(/\s+/).filter(w => w.length > 0).length; + const chars = (group.text ?? '').length; + const width = group.bounds.right - group.bounds.left; + const height = group.bounds.bottom - group.bounds.top; + const isMultiLine = lines.length > 1; + console.log(`πŸ“ Selected Text Group "${group.id}":`); + console.log(` Lines: ${lines.length}, Words: ${words}, Chars: ${chars}`); + console.log(` Dimensions: ${width.toFixed(1)}pt Γ— ${height.toFixed(1)}pt`); + console.log(` Type: ${isMultiLine ? 'MULTI-LINE (paragraph)' : 'SINGLE-LINE'}`); + console.log(` Text preview: "${(group.text ?? '').substring(0, 80)}${(group.text ?? '').length > 80 ? '...' : ''}"`); + if (isMultiLine) { + console.log(` Line spacing: ${group.lineSpacing?.toFixed(1) ?? 'unknown'}pt`); + } + requestAnimationFrame(() => { const editor = document.querySelector(`[data-editor-group="${group.id}"]`); if (!editor) return; @@ -1814,6 +2404,8 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { }, 10); }); }, + selectedGroupIds.has(group.id), + resizeHandle, )} ); @@ -1852,6 +2444,11 @@ const PdfTextEditorView = ({ data }: PdfTextEditorViewProps) => { + + {/* Navigation Warning Modal */} + ); }; diff --git a/frontend/src/proprietary/tools/pdfTextEditor/PdfTextEditor.tsx b/frontend/src/proprietary/tools/pdfTextEditor/PdfTextEditor.tsx index 8ef040338..d04d8e246 100644 --- a/frontend/src/proprietary/tools/pdfTextEditor/PdfTextEditor.tsx +++ b/frontend/src/proprietary/tools/pdfTextEditor/PdfTextEditor.tsx @@ -18,6 +18,7 @@ import { PdfJsonPage, TextGroup, PdfTextEditorViewData, + BoundingBox, } from './pdfTextEditorTypes'; import { deepCloneDocument, @@ -26,6 +27,7 @@ import { restoreGlyphElements, extractDocumentImages, cloneImageElement, + cloneTextElement, valueOr, } from './pdfTextEditorUtils'; import PdfTextEditorView from '@app/components/tools/pdfTextEditor/PdfTextEditorView'; @@ -52,6 +54,148 @@ const getAutoLoadKey = (file: File): string => { return `${file.name}|${file.size}|${file.lastModified}`; }; +const normalizeLineArray = (value: string | undefined | null, expected: number): string[] => { + const normalized = (value ?? '').replace(/\r/g, ''); + if (expected <= 0) { + return [normalized]; + } + const parts = normalized.split('\n'); + if (parts.length === expected) { + return parts; + } + if (parts.length < expected) { + return parts.concat(Array(expected - parts.length).fill('')); + } + const head = parts.slice(0, Math.max(expected - 1, 0)); + const tail = parts.slice(Math.max(expected - 1, 0)).join('\n'); + return [...head, tail]; +}; + +const cloneLineTemplate = (line: TextGroup, text?: string, originalText?: string): TextGroup => ({ + ...line, + text: text ?? line.text, + originalText: originalText ?? line.originalText, + childLineGroups: null, + lineElementCounts: null, + lineSpacing: null, + elements: line.elements.map(cloneTextElement), + originalElements: line.originalElements.map(cloneTextElement), +}); + +const expandGroupToLines = (group: TextGroup): TextGroup[] => { + if (group.childLineGroups && group.childLineGroups.length > 0) { + const textLines = normalizeLineArray(group.text, group.childLineGroups.length); + const originalLines = normalizeLineArray(group.originalText, group.childLineGroups.length); + return group.childLineGroups.map((child, index) => + cloneLineTemplate(child, textLines[index], originalLines[index]), + ); + } + return [cloneLineTemplate(group)]; +}; + +const mergeBoundingBoxes = (boxes: BoundingBox[]): BoundingBox => { + if (boxes.length === 0) { + return { left: 0, right: 0, top: 0, bottom: 0 }; + } + return boxes.reduce( + (acc, box) => ({ + left: Math.min(acc.left, box.left), + right: Math.max(acc.right, box.right), + top: Math.min(acc.top, box.top), + bottom: Math.max(acc.bottom, box.bottom), + }), + { ...boxes[0] }, + ); +}; + +const buildMergedGroupFromSelection = (groups: TextGroup[]): TextGroup | null => { + if (groups.length === 0) { + return null; + } + + const lineTemplates = groups.flatMap(expandGroupToLines); + if (lineTemplates.length <= 1) { + return null; + } + + const lineTexts = lineTemplates.map((line) => line.text ?? ''); + const lineOriginalTexts = lineTemplates.map((line) => line.originalText ?? ''); + const combinedOriginals = lineTemplates.flatMap((line) => line.originalElements.map(cloneTextElement)); + const combinedElements = combinedOriginals.map(cloneTextElement); + const mergedBounds = mergeBoundingBoxes(lineTemplates.map((line) => line.bounds)); + + const spacingValues: number[] = []; + for (let index = 1; index < lineTemplates.length; index += 1) { + const prevBaseline = lineTemplates[index - 1].baseline ?? lineTemplates[index - 1].bounds.bottom; + const currentBaseline = lineTemplates[index].baseline ?? lineTemplates[index].bounds.bottom; + const spacing = Math.abs(prevBaseline - currentBaseline); + if (spacing > 0) { + spacingValues.push(spacing); + } + } + const averageSpacing = + spacingValues.length > 0 + ? spacingValues.reduce((sum, value) => sum + value, 0) / spacingValues.length + : null; + + const first = groups[0]; + const lineElementCounts = lineTemplates.map((line) => Math.max(line.originalElements.length, 1)); + const paragraph: TextGroup = { + ...first, + text: lineTexts.join('\n'), + originalText: lineOriginalTexts.join('\n'), + elements: combinedElements, + originalElements: combinedOriginals, + bounds: mergedBounds, + lineSpacing: averageSpacing, + lineElementCounts: lineElementCounts.length > 1 ? lineElementCounts : null, + childLineGroups: lineTemplates.map((line, index) => + cloneLineTemplate(line, lineTexts[index], lineOriginalTexts[index]), + ), + }; + + return paragraph; +}; + +const splitParagraphGroup = (group: TextGroup): TextGroup[] => { + if (!group.childLineGroups || group.childLineGroups.length <= 1) { + return []; + } + + const templateLines = group.childLineGroups.map((child) => cloneLineTemplate(child)); + const lineCount = templateLines.length; + const textLines = normalizeLineArray(group.text, lineCount); + const originalLines = normalizeLineArray(group.originalText, lineCount); + const baseCounts = + group.lineElementCounts && group.lineElementCounts.length === lineCount + ? [...group.lineElementCounts] + : templateLines.map((line) => Math.max(line.originalElements.length, 1)); + + const totalOriginals = group.originalElements.length; + const counted = baseCounts.reduce((sum, count) => sum + count, 0); + if (counted < totalOriginals && baseCounts.length > 0) { + baseCounts[baseCounts.length - 1] += totalOriginals - counted; + } + + let offset = 0; + return templateLines.map((template, index) => { + const take = Math.max(1, baseCounts[index] ?? 1); + const slice = group.originalElements.slice(offset, offset + take).map(cloneTextElement); + offset += take; + return { + ...template, + id: `${group.id}-line-${index + 1}-${Date.now()}-${index}`, + text: textLines[index] ?? '', + originalText: originalLines[index] ?? '', + elements: slice.map(cloneTextElement), + originalElements: slice, + lineElementCounts: null, + lineSpacing: null, + childLineGroups: null, + }; + }); +}; + const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { const { t } = useTranslation(); const { @@ -63,6 +207,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { } = useToolWorkflow(); const { actions: navigationActions } = useNavigationActions(); const navigationState = useNavigationState(); + const { registerUnsavedChangesChecker, unregisterUnsavedChangesChecker } = navigationActions; const [loadedDocument, setLoadedDocument] = useState(null); const [groupsByPage, setGroupsByPage] = useState([]); @@ -89,6 +234,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { const [loadingImagePages, setLoadingImagePages] = useState>(new Set()); const originalImagesRef = useRef([]); + const originalGroupsRef = useRef([]); const imagesByPageRef = useRef([]); const autoLoadKeyRef = useRef(null); const loadRequestIdRef = useRef(0); @@ -131,7 +277,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { }, []); const dirtyPages = useMemo( - () => getDirtyPages(groupsByPage, imagesByPage, originalImagesRef.current), + () => getDirtyPages(groupsByPage, imagesByPage, originalGroupsRef.current, originalImagesRef.current), [groupsByPage, imagesByPage], ); const hasChanges = useMemo(() => dirtyPages.some(Boolean), [dirtyPages]); @@ -157,6 +303,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { const images = extractDocumentImages(cloned); const originalImages = images.map((page) => page.map(cloneImageElement)); originalImagesRef.current = originalImages; + originalGroupsRef.current = groups.map((page) => page.map((group) => ({ ...group }))); imagesByPageRef.current = images.map((page) => page.map(cloneImageElement)); const initialLoaded = new Set(); originalImages.forEach((pageImages, index) => { @@ -351,8 +498,6 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { let shouldUseLazyMode = false; let pendingJobId: string | null = null; - setErrorMessage(null); - if (isPdf) { latestPdfRequestIdRef.current = requestId; setIsConverting(true); @@ -539,7 +684,6 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { setCachedJobId(shouldUseLazyMode ? pendingJobId : null); setFileName(file.name); setErrorMessage(null); - autoLoadKeyRef.current = fileKey; } catch (error: any) { console.error('Failed to load file', error); console.error('Error details:', { @@ -598,13 +742,83 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { }, []); const handleGroupDelete = useCallback((pageIndex: number, groupId: string) => { + console.log(`πŸ—‘οΈ Deleting group ${groupId} from page ${pageIndex}`); + setGroupsByPage((previous) => { + const updated = previous.map((groups, idx) => { + if (idx !== pageIndex) return groups; + const filtered = groups.filter((group) => group.id !== groupId); + console.log(` Before: ${groups.length} groups, After: ${filtered.length} groups`); + return filtered; + }); + return updated; + }); + }, []); + + const handleMergeGroups = useCallback((pageIndex: number, groupIds: string[]): boolean => { + if (groupIds.length < 2) { + return false; + } + let updated = false; setGroupsByPage((previous) => - previous.map((groups, idx) => - idx !== pageIndex - ? groups - : groups.map((group) => (group.id === groupId ? { ...group, text: '' } : group)) - ) + previous.map((groups, idx) => { + if (idx !== pageIndex) { + return groups; + } + const indices = groupIds + .map((id) => groups.findIndex((group) => group.id === id)) + .filter((index) => index >= 0); + if (indices.length !== groupIds.length) { + return groups; + } + const sorted = [...indices].sort((a, b) => a - b); + for (let i = 1; i < sorted.length; i += 1) { + if (sorted[i] !== sorted[i - 1] + 1) { + return groups; + } + } + const selection = sorted.map((position) => groups[position]); + const merged = buildMergedGroupFromSelection(selection); + if (!merged) { + return groups; + } + const next = [ + ...groups.slice(0, sorted[0]), + merged, + ...groups.slice(sorted[sorted.length - 1] + 1), + ]; + updated = true; + return next; + }), ); + return updated; + }, []); + + const handleUngroupGroup = useCallback((pageIndex: number, groupId: string): boolean => { + let updated = false; + setGroupsByPage((previous) => + previous.map((groups, idx) => { + if (idx !== pageIndex) { + return groups; + } + const targetIndex = groups.findIndex((group) => group.id === groupId); + if (targetIndex < 0) { + return groups; + } + const targetGroup = groups[targetIndex]; + const splits = splitParagraphGroup(targetGroup); + if (splits.length <= 1) { + return groups; + } + const next = [ + ...groups.slice(0, targetIndex), + ...splits, + ...groups.slice(targetIndex + 1), + ]; + updated = true; + return next; + }), + ); + return updated; }, []); const handleImageTransform = useCallback( @@ -746,7 +960,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { } }, [buildPayload, onComplete]); - const handleGeneratePdf = useCallback(async () => { + const handleGeneratePdf = useCallback(async (skipComplete = false) => { try { setIsGeneratingPdf(true); @@ -840,7 +1054,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { downloadBlob(response.data, downloadName); - if (onComplete) { + if (onComplete && !skipComplete) { const pdfFile = new File([response.data], downloadName, { type: 'application/pdf' }); onComplete([pdfFile]); } @@ -881,7 +1095,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { downloadBlob(response.data, downloadName); - if (onComplete) { + if (onComplete && !skipComplete) { const pdfFile = new File([response.data], downloadName, { type: 'application/pdf' }); onComplete([pdfFile]); } @@ -1052,7 +1266,6 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { forceSingleTextElement, groupingMode, requestPagePreview, - onLoadJson: handleLoadFile, onSelectPage: handleSelectPage, onGroupEdit: handleGroupTextChange, onGroupDelete: handleGroupDelete, @@ -1061,9 +1274,17 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { onReset: handleResetEdits, onDownloadJson: handleDownloadJson, onGeneratePdf: handleGeneratePdf, + onGeneratePdfForNavigation: async () => { + // Generate PDF without triggering tool completion + await handleGeneratePdf(true); + }, onForceSingleTextElementChange: setForceSingleTextElement, onGroupingModeChange: setGroupingMode, + onMergeGroups: handleMergeGroups, + onUngroupGroup: handleUngroupGroup, }), [ + handleMergeGroups, + handleUngroupGroup, handleImageTransform, imagesByPage, pagePreviews, @@ -1076,7 +1297,6 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { handleGroupTextChange, handleGroupDelete, handleImageReset, - handleLoadFile, handleResetEdits, handleSelectPage, hasChanges, @@ -1155,14 +1375,30 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => { unregisterCustomWorkbenchView, ]); + // Note: Compare tool doesn't auto-force workbench, and neither should we + // The workbench should be set when the tool is selected via proper channels + // (tool registry, tool picker, etc.) - not forced here + + // Keep hasChanges in a ref for the checker to access + const hasChangesRef = useRef(hasChanges); useEffect(() => { - if ( - navigationState.selectedTool === 'pdfTextEditor' && - navigationState.workbench !== WORKBENCH_ID - ) { - navigationActions.setWorkbench(WORKBENCH_ID); - } - }, [navigationActions, navigationState.selectedTool, navigationState.workbench]); + hasChangesRef.current = hasChanges; + console.log('[PdfTextEditor] hasChanges updated to:', hasChanges); + }, [hasChanges]); + + // Register unsaved changes checker for navigation guard + useEffect(() => { + const checker = () => { + console.log('[PdfTextEditor] Checking unsaved changes:', hasChangesRef.current); + return hasChangesRef.current; + }; + registerUnsavedChangesChecker(checker); + console.log('[PdfTextEditor] Registered unsaved changes checker'); + return () => { + console.log('[PdfTextEditor] Unregistered unsaved changes checker'); + unregisterUnsavedChangesChecker(); + }; + }, [registerUnsavedChangesChecker, unregisterUnsavedChangesChecker]); const lastSentViewDataRef = useRef(null); diff --git a/frontend/src/proprietary/tools/pdfTextEditor/pdfTextEditorTypes.ts b/frontend/src/proprietary/tools/pdfTextEditor/pdfTextEditorTypes.ts index 795b4542e..249553716 100644 --- a/frontend/src/proprietary/tools/pdfTextEditor/pdfTextEditorTypes.ts +++ b/frontend/src/proprietary/tools/pdfTextEditor/pdfTextEditorTypes.ts @@ -168,6 +168,7 @@ export interface TextGroup { text: string; originalText: string; bounds: BoundingBox; + childLineGroups?: TextGroup[] | null; } export const DEFAULT_PAGE_WIDTH = 612; @@ -199,7 +200,6 @@ export interface PdfTextEditorViewData { forceSingleTextElement: boolean; groupingMode: 'auto' | 'paragraph' | 'singleLine'; requestPagePreview: (pageIndex: number, scale: number) => void; - onLoadJson: (file: File | null) => Promise | void; onSelectPage: (pageIndex: number) => void; onGroupEdit: (pageIndex: number, groupId: string, value: string) => void; onGroupDelete: (pageIndex: number, groupId: string) => void; @@ -218,6 +218,9 @@ export interface PdfTextEditorViewData { onReset: () => void; onDownloadJson: () => void; onGeneratePdf: () => void; + onGeneratePdfForNavigation: () => Promise; onForceSingleTextElementChange: (value: boolean) => void; onGroupingModeChange: (value: 'auto' | 'paragraph' | 'singleLine') => void; + onMergeGroups: (pageIndex: number, groupIds: string[]) => boolean; + onUngroupGroup: (pageIndex: number, groupId: string) => boolean; } diff --git a/frontend/src/proprietary/tools/pdfTextEditor/pdfTextEditorUtils.ts b/frontend/src/proprietary/tools/pdfTextEditor/pdfTextEditorUtils.ts index f7e005125..85f6ed738 100644 --- a/frontend/src/proprietary/tools/pdfTextEditor/pdfTextEditorUtils.ts +++ b/frontend/src/proprietary/tools/pdfTextEditor/pdfTextEditorUtils.ts @@ -520,8 +520,18 @@ const createGroup = ( }; }; +const cloneLineTemplate = (line: TextGroup): TextGroup => ({ + ...line, + childLineGroups: null, + lineElementCounts: null, + lineSpacing: null, + elements: line.elements.map(cloneTextElement), + originalElements: line.originalElements.map(cloneTextElement), +}); + const groupLinesIntoParagraphs = ( lineGroups: TextGroup[], + pageWidth: number, metrics?: FontMetricsMap, ): TextGroup[] => { if (lineGroups.length === 0) { @@ -530,6 +540,8 @@ const groupLinesIntoParagraphs = ( const paragraphs: TextGroup[][] = []; let currentParagraph: TextGroup[] = [lineGroups[0]]; + const bulletFlags = new Map(); + bulletFlags.set(lineGroups[0].id, false); for (let i = 1; i < lineGroups.length; i++) { const prevLine = lineGroups[i - 1]; @@ -561,11 +573,85 @@ const groupLinesIntoParagraphs = ( const maxReasonableSpacing = avgFontSize * 3.0; // Max ~3x font size for normal line spacing const hasReasonableSpacing = lineSpacing <= maxReasonableSpacing; + // Check if current line looks like a bullet/list item + const prevRight = prevLine.bounds.right; + const currentRight = currentLine.bounds.right; + const prevWidth = prevRight - prevLeft; + const currentWidth = currentRight - currentLeft; + + // Count word count to help identify bullets (typically short) + const prevWords = (prevLine.text ?? '').split(/\s+/).filter(w => w.length > 0).length; + const currentWords = (currentLine.text ?? '').split(/\s+/).filter(w => w.length > 0).length; + const prevText = (prevLine.text ?? '').trim(); + const currentText = (currentLine.text ?? '').trim(); + + // Bullet detection - look for bullet markers or very short lines + const bulletMarkerRegex = /^[\u2022\u2023\u25E6\u2043\u2219‒·◦‣⁃\-\*]\s|^\d+[\.\)]\s|^[a-z][\.\)]\s/i; + const prevHasBulletMarker = bulletMarkerRegex.test(prevText); + const currentHasBulletMarker = bulletMarkerRegex.test(currentText); + + // True bullets are: + // 1. Have bullet markers/numbers OR + // 2. Very short (< 10 words) AND much narrower than average (< 60% of page width) + const headingKeywords = ['action items', 'next steps', 'notes', 'logistics', 'tasks']; + const normalizedPageWidth = pageWidth > 0 ? pageWidth : avgFontSize * 70; + const maxReferenceWidth = normalizedPageWidth > 0 ? normalizedPageWidth : avgFontSize * 70; + const indentDelta = currentLeft - prevLeft; + const indentThreshold = Math.max(avgFontSize * 0.6, 8); + const hasIndent = indentDelta > indentThreshold; + const currentWidthRatio = maxReferenceWidth > 0 ? currentWidth / maxReferenceWidth : 0; + const prevWidthRatio = maxReferenceWidth > 0 ? prevWidth / maxReferenceWidth : 0; + const prevLooksLikeHeading = + prevText.endsWith(':') || + (prevWords <= 4 && prevWidthRatio < 0.4) || + headingKeywords.some((keyword) => prevText.toLowerCase().includes(keyword)); + + const wrapCandidate = + !currentHasBulletMarker && + !hasIndent && + !prevLooksLikeHeading && + currentWords <= 12 && + currentWidthRatio < 0.45 && + Math.abs(prevLeft - currentLeft) <= leftAlignmentTolerance && + currentWidth < prevWidth * 0.85; + + const currentIsBullet = wrapCandidate + ? false + : currentHasBulletMarker || + (hasIndent && (currentWords <= 14 || currentWidthRatio <= 0.65)) || + (prevLooksLikeHeading && (currentWords <= 16 || currentWidthRatio <= 0.8 || prevWidthRatio < 0.35)) || + (currentWords <= 8 && currentWidthRatio <= 0.45 && prevWidth - currentWidth > avgFontSize * 4); + + const prevIsBullet = bulletFlags.get(prevLine.id) ?? prevHasBulletMarker; + bulletFlags.set(currentLine.id, currentIsBullet); + + // Detect paragraphβ†’bullet transition + const likelyBulletStart = !prevIsBullet && currentIsBullet; + + // Don't merge two consecutive bullets + const bothAreBullets = prevIsBullet && currentIsBullet; + // Merge into paragraph if: // 1. Left aligned // 2. Same font - // 3. Reasonable line spacing (not a large gap indicating paragraph break) - const shouldMerge = isLeftAligned && sameFont && hasReasonableSpacing; + // 3. Reasonable line spacing + // 4. NOT transitioning to bullets + // 5. NOT both are bullets + const shouldMerge = + isLeftAligned && + sameFont && + hasReasonableSpacing && + !likelyBulletStart && + !bothAreBullets && + !currentIsBullet; + + if (i < 10 || likelyBulletStart || bothAreBullets || !shouldMerge) { + console.log(` Line ${i}:`); + console.log(` prev: "${prevText.substring(0, 40)}" (${prevWords}w, ${prevWidth.toFixed(0)}pt, marker:${prevHasBulletMarker}, bullet:${prevIsBullet})`); + console.log(` curr: "${currentText.substring(0, 40)}" (${currentWords}w, ${currentWidth.toFixed(0)}pt, marker:${currentHasBulletMarker}, bullet:${currentIsBullet})`); + console.log(` checks: leftAlign:${isLeftAligned} (${Math.abs(prevLeft - currentLeft).toFixed(1)}pt), sameFont:${sameFont}, spacing:${hasReasonableSpacing} (${lineSpacing.toFixed(1)}pt/${maxReasonableSpacing.toFixed(1)}pt)`); + console.log(` decision: merge=${shouldMerge} (bulletStart:${likelyBulletStart}, bothBullets:${bothAreBullets})`); + } if (shouldMerge) { currentParagraph.push(currentLine); @@ -587,17 +673,24 @@ const groupLinesIntoParagraphs = ( } // Combine all elements from all lines - const allElements = lines.flatMap(line => line.originalElements); + const lineTemplates = lines.map(line => cloneLineTemplate(line)); + const flattenedLineTemplates = lineTemplates.flatMap((line) => + line.childLineGroups && line.childLineGroups.length > 0 + ? line.childLineGroups + : [line], + ); + const allLines = flattenedLineTemplates.length > 0 ? flattenedLineTemplates : lineTemplates; + const allElements = allLines.flatMap(line => line.originalElements); const pageIndex = lines[0].pageIndex; - const lineElementCounts = lines.map((line) => line.originalElements.length); + const lineElementCounts = allLines.map((line) => line.originalElements.length); // Create merged group with newlines between lines - const paragraphText = lines.map(line => line.text).join('\n'); - const mergedBounds = mergeBounds(lines.map(line => line.bounds)); + const paragraphText = allLines.map(line => line.text).join('\n'); + const mergedBounds = mergeBounds(allLines.map(line => line.bounds)); const spacingValues: number[] = []; - for (let i = 1; i < lines.length; i++) { - const prevBaseline = lines[i - 1].baseline ?? lines[i - 1].bounds.bottom; - const currentBaseline = lines[i].baseline ?? lines[i].bounds.bottom; + for (let i = 1; i < allLines.length; i++) { + const prevBaseline = allLines[i - 1].baseline ?? allLines[i - 1].bounds.bottom; + const currentBaseline = allLines[i].baseline ?? allLines[i].bounds.bottom; const spacing = Math.abs(prevBaseline - currentBaseline); if (spacing > 0) { spacingValues.push(spacing); @@ -633,6 +726,7 @@ const groupLinesIntoParagraphs = ( text: paragraphText, originalText: paragraphText, bounds: mergedBounds, + childLineGroups: allLines, }; }); }; @@ -647,6 +741,8 @@ export const groupPageTextElements = ( return []; } + const pageWidth = valueOr(page.width, DEFAULT_PAGE_WIDTH); + const elements = page.textElements .map(cloneTextElement) .filter((element) => element.text !== null && element.text !== undefined); @@ -740,7 +836,7 @@ export const groupPageTextElements = ( if (groupingMode === 'paragraph') { // Paragraph mode: always apply grouping - return groupLinesIntoParagraphs(lineGroups, metrics); + return groupLinesIntoParagraphs(lineGroups, pageWidth, metrics); } // Auto mode: use heuristic to determine if we should group @@ -749,6 +845,11 @@ export const groupPageTextElements = ( let totalWords = 0; let longTextGroups = 0; let totalGroups = 0; + const wordCounts: number[] = []; + let fullWidthLines = 0; + + // Define "full width" as extending to at least 70% of page width + const fullWidthThreshold = pageWidth * 0.7; lineGroups.forEach((group) => { const text = (group.text || '').trim(); @@ -760,14 +861,21 @@ export const groupPageTextElements = ( const wordCount = text.split(/\s+/).filter((w) => w.length > 0).length; totalWords += wordCount; + wordCounts.push(wordCount); if (lineCount > 1) { multiLineGroups++; } - if (wordCount >= 5 || text.length >= 30) { + if (wordCount >= 10 || text.length >= 50) { longTextGroups++; } + + // Check if this line extends close to the right margin (paragraph-like) + const rightEdge = group.bounds.right; + if (rightEdge >= fullWidthThreshold) { + fullWidthLines++; + } }); if (totalGroups === 0) { @@ -776,18 +884,65 @@ export const groupPageTextElements = ( const avgWordsPerGroup = totalWords / totalGroups; const longTextRatio = longTextGroups / totalGroups; + const fullWidthRatio = fullWidthLines / totalGroups; - const isParagraphPage = - (multiLineGroups >= 2 && avgWordsPerGroup > 8) || - avgWordsPerGroup > 12 || - longTextRatio > 0.4; + // Calculate variance in line lengths (paragraphs have varying lengths, lists are uniform) + const variance = wordCounts.reduce((sum, count) => { + const diff = count - avgWordsPerGroup; + return sum + diff * diff; + }, 0) / totalGroups; + const stdDev = Math.sqrt(variance); + const coefficientOfVariation = avgWordsPerGroup > 0 ? stdDev / avgWordsPerGroup : 0; + + // Check each criterion + const criterion1 = avgWordsPerGroup > 5; + const criterion2 = longTextRatio > 0.4; + const criterion3 = coefficientOfVariation > 0.5 || fullWidthRatio > 0.6; // High variance OR many full-width lines = paragraph text + + const isParagraphPage = criterion1 && criterion2 && criterion3; + + // Log detection stats + console.log(`πŸ“„ Page ${pageIndex} Grouping Analysis (mode: ${groupingMode}):`); + console.log(` Stats:`); + console.log(` β€’ Page width: ${pageWidth.toFixed(1)}pt (full-width threshold: ${fullWidthThreshold.toFixed(1)}pt)`); + console.log(` β€’ Multi-line groups: ${multiLineGroups}`); + console.log(` β€’ Total groups: ${totalGroups}`); + console.log(` β€’ Total words: ${totalWords}`); + console.log(` β€’ Long text groups (β‰₯10 words or β‰₯50 chars): ${longTextGroups}`); + console.log(` β€’ Full-width lines (β‰₯70% page width): ${fullWidthLines}`); + console.log(` β€’ Avg words per group: ${avgWordsPerGroup.toFixed(2)}`); + console.log(` β€’ Long text ratio: ${(longTextRatio * 100).toFixed(1)}%`); + console.log(` β€’ Full-width ratio: ${(fullWidthRatio * 100).toFixed(1)}%`); + console.log(` β€’ Std deviation: ${stdDev.toFixed(2)}`); + console.log(` β€’ Coefficient of variation: ${coefficientOfVariation.toFixed(2)}`); + console.log(` Criteria:`); + console.log(` 1. Avg Words Per Group: ${criterion1 ? 'βœ… PASS' : '❌ FAIL'}`); + console.log(` (${avgWordsPerGroup.toFixed(2)} > 5)`); + console.log(` 2. Long Text Ratio: ${criterion2 ? 'βœ… PASS' : '❌ FAIL'}`); + console.log(` (${(longTextRatio * 100).toFixed(1)}% > 40%)`); + console.log(` 3. Line Width Pattern: ${criterion3 ? 'βœ… PASS' : '❌ FAIL'}`); + console.log(` (CV ${coefficientOfVariation.toFixed(2)} > 0.5 OR ${(fullWidthRatio * 100).toFixed(1)}% > 60%)`); + console.log(` ${coefficientOfVariation > 0.5 ? 'βœ“ High variance (varying line lengths)' : 'βœ— Low variance'} ${fullWidthRatio > 0.6 ? 'βœ“ Many full-width lines (paragraph-like)' : 'βœ— Few full-width lines (list-like)'}`); + console.log(` Decision: ${isParagraphPage ? 'πŸ“ PARAGRAPH MODE' : 'πŸ“‹ LINE MODE'}`); + if (isParagraphPage) { + console.log(` Reason: All three criteria passed (AND logic)`); + } else { + const failedReasons = []; + if (!criterion1) failedReasons.push('low average words per group'); + if (!criterion2) failedReasons.push('low ratio of long text groups'); + if (!criterion3) failedReasons.push('low variance and few full-width lines (list-like structure)'); + console.log(` Reason: ${failedReasons.join(', ')}`); + } + console.log(''); // Only apply paragraph grouping if it looks like a paragraph-heavy page if (isParagraphPage) { - return groupLinesIntoParagraphs(lineGroups, metrics); + console.log(`πŸ”€ Applying paragraph grouping to page ${pageIndex}`); + return groupLinesIntoParagraphs(lineGroups, pageWidth, metrics); } // For sparse pages, keep lines separate + console.log(`πŸ“‹ Keeping lines separate for page ${pageIndex}`); return lineGroups; }; @@ -829,10 +984,28 @@ export const deepCloneDocument = (document: PdfJsonDocument): PdfJsonDocument => }; export const pageDimensions = (page: PdfJsonPage | null | undefined): { width: number; height: number } => { - return { - width: valueOr(page?.width, DEFAULT_PAGE_WIDTH), - height: valueOr(page?.height, DEFAULT_PAGE_HEIGHT), - }; + const width = valueOr(page?.width, DEFAULT_PAGE_WIDTH); + const height = valueOr(page?.height, DEFAULT_PAGE_HEIGHT); + + console.log(`πŸ“ [pageDimensions] Calculating page size:`, { + hasPage: !!page, + rawWidth: page?.width, + rawHeight: page?.height, + mediaBox: page?.mediaBox, + cropBox: page?.cropBox, + rotation: page?.rotation, + calculatedWidth: width, + calculatedHeight: height, + DEFAULT_PAGE_WIDTH, + DEFAULT_PAGE_HEIGHT, + commonFormats: { + 'US Letter': '612 Γ— 792 pt', + 'A4': '595 Γ— 842 pt', + 'Legal': '612 Γ— 1008 pt', + }, + }); + + return { width, height }; }; export const createMergedElement = (group: TextGroup): PdfJsonTextElement => { @@ -1192,14 +1365,35 @@ export const areImageListsDifferent = ( export const getDirtyPages = ( groupsByPage: TextGroup[][], imagesByPage: PdfJsonImageElement[][], + originalGroupsByPage: TextGroup[][], originalImagesByPage: PdfJsonImageElement[][], ): boolean[] => { return groupsByPage.map((groups, index) => { + // Check if any text was modified const textDirty = groups.some((group) => group.text !== group.originalText); + + // Check if any groups were deleted by comparing with original groups + const originalGroups = originalGroupsByPage[index] ?? []; + const groupCountChanged = groups.length !== originalGroups.length; + const imageDirty = areImageListsDifferent( imagesByPage[index] ?? [], originalImagesByPage[index] ?? [], ); - return textDirty || imageDirty; + + const isDirty = textDirty || groupCountChanged || imageDirty; + + if (groupCountChanged || textDirty) { + console.log(`πŸ“„ Page ${index} dirty check:`, { + textDirty, + groupCountChanged, + originalGroupsLength: originalGroups.length, + currentGroupsLength: groups.length, + imageDirty, + isDirty, + }); + } + + return isDirty; }); };