mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
Merge remote-tracking branch 'origin/codex/add-pdf-to-json-and-json-to-pdf-features' into demo
This commit is contained in:
commit
2c1d93887a
@ -415,9 +415,16 @@ public class PdfJsonConversionService {
|
|||||||
for (PDPage page : document.getPages()) {
|
for (PDPage page : document.getPages()) {
|
||||||
PdfJsonPageDimension dim = new PdfJsonPageDimension();
|
PdfJsonPageDimension dim = new PdfJsonPageDimension();
|
||||||
dim.setPageNumber(pageIndex + 1);
|
dim.setPageNumber(pageIndex + 1);
|
||||||
PDRectangle mediaBox = page.getMediaBox();
|
// Use CropBox if present (defines visible page area), otherwise fall back
|
||||||
dim.setWidth(mediaBox.getWidth());
|
// to MediaBox
|
||||||
dim.setHeight(mediaBox.getHeight());
|
PDRectangle pageBox = page.getCropBox();
|
||||||
|
if (pageBox == null
|
||||||
|
|| pageBox.getWidth() == 0
|
||||||
|
|| pageBox.getHeight() == 0) {
|
||||||
|
pageBox = page.getMediaBox();
|
||||||
|
}
|
||||||
|
dim.setWidth(pageBox.getWidth());
|
||||||
|
dim.setHeight(pageBox.getHeight());
|
||||||
dim.setRotation(page.getRotation());
|
dim.setRotation(page.getRotation());
|
||||||
pageDimensions.add(dim);
|
pageDimensions.add(dim);
|
||||||
pageIndex++;
|
pageIndex++;
|
||||||
@ -1851,9 +1858,13 @@ public class PdfJsonConversionService {
|
|||||||
for (PDPage page : document.getPages()) {
|
for (PDPage page : document.getPages()) {
|
||||||
PdfJsonPage pageModel = new PdfJsonPage();
|
PdfJsonPage pageModel = new PdfJsonPage();
|
||||||
pageModel.setPageNumber(pageIndex + 1);
|
pageModel.setPageNumber(pageIndex + 1);
|
||||||
PDRectangle mediaBox = page.getMediaBox();
|
// Use CropBox if present (defines visible page area), otherwise fall back to MediaBox
|
||||||
pageModel.setWidth(mediaBox.getWidth());
|
PDRectangle pageBox = page.getCropBox();
|
||||||
pageModel.setHeight(mediaBox.getHeight());
|
if (pageBox == null || pageBox.getWidth() == 0 || pageBox.getHeight() == 0) {
|
||||||
|
pageBox = page.getMediaBox();
|
||||||
|
}
|
||||||
|
pageModel.setWidth(pageBox.getWidth());
|
||||||
|
pageModel.setHeight(pageBox.getHeight());
|
||||||
pageModel.setRotation(page.getRotation());
|
pageModel.setRotation(page.getRotation());
|
||||||
pageModel.setTextElements(textByPage.getOrDefault(pageIndex + 1, new ArrayList<>()));
|
pageModel.setTextElements(textByPage.getOrDefault(pageIndex + 1, new ArrayList<>()));
|
||||||
pageModel.setImageElements(imagesByPage.getOrDefault(pageIndex + 1, new ArrayList<>()));
|
pageModel.setImageElements(imagesByPage.getOrDefault(pageIndex + 1, new ArrayList<>()));
|
||||||
|
|||||||
@ -4533,6 +4533,32 @@
|
|||||||
"cancel": "Cancel",
|
"cancel": "Cancel",
|
||||||
"confirm": "Reset and Change Mode"
|
"confirm": "Reset and Change Mode"
|
||||||
},
|
},
|
||||||
|
"welcomeBanner": {
|
||||||
|
"title": "Welcome to PDF Text Editor (Early Access)",
|
||||||
|
"experimental": "This is an experimental feature in active development. Expect some instability and issues during use.",
|
||||||
|
"howItWorks": "This tool converts your PDF to an editable format where you can modify text content and reposition images. Changes are saved back as a new PDF.",
|
||||||
|
"bestFor": "Works Best With:",
|
||||||
|
"bestFor1": "Simple PDFs containing primarily text and images",
|
||||||
|
"bestFor2": "Documents with standard paragraph formatting",
|
||||||
|
"bestFor3": "Letters, essays, reports, and basic documents",
|
||||||
|
"notIdealFor": "Not Ideal For:",
|
||||||
|
"notIdealFor1": "PDFs with special formatting like bullet points, tables, or multi-column layouts",
|
||||||
|
"notIdealFor2": "Magazines, brochures, or heavily designed documents",
|
||||||
|
"notIdealFor3": "Instruction manuals with complex layouts",
|
||||||
|
"limitations": "Current Limitations:",
|
||||||
|
"limitation1": "Font rendering may differ slightly from the original PDF",
|
||||||
|
"limitation2": "Complex graphics, form fields, and annotations are preserved but not editable",
|
||||||
|
"limitation3": "Large files may take time to convert and process",
|
||||||
|
"knownIssues": "Known Issues (Being Fixed):",
|
||||||
|
"issue1": "Text colour is not currently preserved (will be added soon)",
|
||||||
|
"issue2": "Paragraph mode has more alignment and spacing issues - Single Line mode recommended",
|
||||||
|
"issue3": "The preview display differs from the exported PDF - exported PDFs are closer to the original",
|
||||||
|
"issue4": "Rotated text alignment may need manual adjustment",
|
||||||
|
"issue5": "Transparency and layering effects may vary from original",
|
||||||
|
"feedback": "This is an early access feature. Please report any issues you encounter to help us improve!",
|
||||||
|
"gotIt": "Got it",
|
||||||
|
"dontShowAgain": "Don't show again"
|
||||||
|
},
|
||||||
"disclaimer": {
|
"disclaimer": {
|
||||||
"heading": "Preview limitations",
|
"heading": "Preview limitations",
|
||||||
"textFocus": "This workspace focuses on editing text and repositioning embedded images. Complex page artwork, form widgets, and layered graphics are preserved for export but are not fully editable here.",
|
"textFocus": "This workspace focuses on editing text and repositioning embedded images. Complex page artwork, form widgets, and layered graphics are preserved for export but are not fully editable here.",
|
||||||
@ -4579,6 +4605,21 @@
|
|||||||
"standard14": "Standard PDF Font",
|
"standard14": "Standard PDF Font",
|
||||||
"warnings": "Warnings",
|
"warnings": "Warnings",
|
||||||
"suggestions": "Notes"
|
"suggestions": "Notes"
|
||||||
|
},
|
||||||
|
"manual": {
|
||||||
|
"mergeTooltip": "Merge selected boxes into a single paragraph",
|
||||||
|
"merge": "Merge selection",
|
||||||
|
"ungroupTooltip": "Split paragraph back into separate lines",
|
||||||
|
"ungroup": "Ungroup selection",
|
||||||
|
"widthMenu": "Width options",
|
||||||
|
"expandWidth": "Expand to page edge",
|
||||||
|
"resetWidth": "Reset width",
|
||||||
|
"resizeHandle": "Adjust text width"
|
||||||
|
},
|
||||||
|
"options": {
|
||||||
|
"manualGrouping": {
|
||||||
|
"descriptionInline": "Tip: Hold Ctrl (Cmd) or Shift to multi-select text boxes. A floating toolbar will appear above the selection so you can merge, ungroup, or adjust widths."
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"workspace": {
|
"workspace": {
|
||||||
|
|||||||
@ -121,10 +121,11 @@ export const NavigationProvider: React.FC<{
|
|||||||
hasUnsavedChanges
|
hasUnsavedChanges
|
||||||
});
|
});
|
||||||
|
|
||||||
// If we're leaving pageEditor or viewer workbench and have unsaved changes, request navigation
|
// If we're leaving pageEditor, viewer, or custom workbench and have unsaved changes, request navigation
|
||||||
const leavingWorkbenchWithChanges =
|
const leavingWorkbenchWithChanges =
|
||||||
(state.workbench === 'pageEditor' && workbench !== 'pageEditor' && hasUnsavedChanges) ||
|
(state.workbench === 'pageEditor' && workbench !== 'pageEditor' && hasUnsavedChanges) ||
|
||||||
(state.workbench === 'viewer' && workbench !== 'viewer' && hasUnsavedChanges);
|
(state.workbench === 'viewer' && workbench !== 'viewer' && hasUnsavedChanges) ||
|
||||||
|
(state.workbench.startsWith('custom:') && workbench !== state.workbench && hasUnsavedChanges);
|
||||||
|
|
||||||
if (leavingWorkbenchWithChanges) {
|
if (leavingWorkbenchWithChanges) {
|
||||||
// Update state to reflect unsaved changes so modal knows
|
// Update state to reflect unsaved changes so modal knows
|
||||||
@ -132,7 +133,19 @@ export const NavigationProvider: React.FC<{
|
|||||||
dispatch({ type: 'SET_UNSAVED_CHANGES', payload: { hasChanges: true } });
|
dispatch({ type: 'SET_UNSAVED_CHANGES', payload: { hasChanges: true } });
|
||||||
}
|
}
|
||||||
const performWorkbenchChange = () => {
|
const performWorkbenchChange = () => {
|
||||||
dispatch({ type: 'SET_WORKBENCH', payload: { workbench } });
|
// When leaving a custom workbench, clear the selected tool
|
||||||
|
console.log('[NavigationContext] performWorkbenchChange executing', {
|
||||||
|
from: state.workbench,
|
||||||
|
to: workbench,
|
||||||
|
isCustom: state.workbench.startsWith('custom:')
|
||||||
|
});
|
||||||
|
if (state.workbench.startsWith('custom:')) {
|
||||||
|
console.log('[NavigationContext] Clearing tool and changing workbench to:', workbench);
|
||||||
|
dispatch({ type: 'SET_TOOL_AND_WORKBENCH', payload: { toolId: null, workbench } });
|
||||||
|
} else {
|
||||||
|
console.log('[NavigationContext] Just changing workbench to:', workbench);
|
||||||
|
dispatch({ type: 'SET_WORKBENCH', payload: { workbench } });
|
||||||
|
}
|
||||||
};
|
};
|
||||||
dispatch({ type: 'SET_PENDING_NAVIGATION', payload: { navigationFn: performWorkbenchChange } });
|
dispatch({ type: 'SET_PENDING_NAVIGATION', payload: { navigationFn: performWorkbenchChange } });
|
||||||
dispatch({ type: 'SHOW_NAVIGATION_WARNING', payload: { show: true } });
|
dispatch({ type: 'SHOW_NAVIGATION_WARNING', payload: { show: true } });
|
||||||
@ -149,10 +162,11 @@ export const NavigationProvider: React.FC<{
|
|||||||
// Check for unsaved changes using registered checker or state
|
// Check for unsaved changes using registered checker or state
|
||||||
const hasUnsavedChanges = unsavedChangesCheckerRef.current?.() || state.hasUnsavedChanges;
|
const hasUnsavedChanges = unsavedChangesCheckerRef.current?.() || state.hasUnsavedChanges;
|
||||||
|
|
||||||
// If we're leaving pageEditor or viewer workbench and have unsaved changes, request navigation
|
// If we're leaving pageEditor, viewer, or custom workbench and have unsaved changes, request navigation
|
||||||
const leavingWorkbenchWithChanges =
|
const leavingWorkbenchWithChanges =
|
||||||
(state.workbench === 'pageEditor' && workbench !== 'pageEditor' && hasUnsavedChanges) ||
|
(state.workbench === 'pageEditor' && workbench !== 'pageEditor' && hasUnsavedChanges) ||
|
||||||
(state.workbench === 'viewer' && workbench !== 'viewer' && hasUnsavedChanges);
|
(state.workbench === 'viewer' && workbench !== 'viewer' && hasUnsavedChanges) ||
|
||||||
|
(state.workbench.startsWith('custom:') && workbench !== state.workbench && hasUnsavedChanges);
|
||||||
|
|
||||||
if (leavingWorkbenchWithChanges) {
|
if (leavingWorkbenchWithChanges) {
|
||||||
const performWorkbenchChange = () => {
|
const performWorkbenchChange = () => {
|
||||||
@ -192,13 +206,19 @@ export const NavigationProvider: React.FC<{
|
|||||||
}, [state.hasUnsavedChanges]),
|
}, [state.hasUnsavedChanges]),
|
||||||
|
|
||||||
confirmNavigation: useCallback(() => {
|
confirmNavigation: useCallback(() => {
|
||||||
|
console.log('[NavigationContext] confirmNavigation called', {
|
||||||
|
hasPendingNav: !!state.pendingNavigation,
|
||||||
|
currentWorkbench: state.workbench,
|
||||||
|
currentTool: state.selectedTool
|
||||||
|
});
|
||||||
if (state.pendingNavigation) {
|
if (state.pendingNavigation) {
|
||||||
state.pendingNavigation();
|
state.pendingNavigation();
|
||||||
}
|
}
|
||||||
|
|
||||||
dispatch({ type: 'SET_PENDING_NAVIGATION', payload: { navigationFn: null } });
|
dispatch({ type: 'SET_PENDING_NAVIGATION', payload: { navigationFn: null } });
|
||||||
dispatch({ type: 'SHOW_NAVIGATION_WARNING', payload: { show: false } });
|
dispatch({ type: 'SHOW_NAVIGATION_WARNING', payload: { show: false } });
|
||||||
}, [state.pendingNavigation]),
|
console.log('[NavigationContext] confirmNavigation completed');
|
||||||
|
}, [state.pendingNavigation, state.workbench, state.selectedTool]),
|
||||||
|
|
||||||
cancelNavigation: useCallback(() => {
|
cancelNavigation: useCallback(() => {
|
||||||
dispatch({ type: 'SET_PENDING_NAVIGATION', payload: { navigationFn: null } });
|
dispatch({ type: 'SET_PENDING_NAVIGATION', payload: { navigationFn: null } });
|
||||||
|
|||||||
@ -218,15 +218,25 @@ export function ToolWorkflowProvider({ children }: ToolWorkflowProviderProps) {
|
|||||||
}, [customViewRegistry, customViewData]);
|
}, [customViewRegistry, customViewData]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (isBaseWorkbench(navigationState.workbench)) {
|
const { workbench } = navigationState;
|
||||||
|
if (isBaseWorkbench(workbench)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const currentCustomView = customWorkbenchViews.find(view => view.workbenchId === navigationState.workbench);
|
const currentCustomView = customWorkbenchViews.find(view => view.workbenchId === workbench);
|
||||||
|
const expectedWorkbench = selectedTool?.workbench;
|
||||||
|
const workbenchOwnedBySelectedTool = expectedWorkbench === workbench;
|
||||||
|
|
||||||
if (!currentCustomView || currentCustomView.data == null) {
|
if (!currentCustomView || currentCustomView.data == null) {
|
||||||
|
// If the currently selected tool expects this custom workbench, allow it
|
||||||
|
// some time to register/populate the view instead of immediately bouncing
|
||||||
|
// the user back to Active Files.
|
||||||
|
if (workbenchOwnedBySelectedTool) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
actions.setWorkbench(getDefaultWorkbench());
|
actions.setWorkbench(getDefaultWorkbench());
|
||||||
}
|
}
|
||||||
}, [actions, customWorkbenchViews, navigationState.workbench]);
|
}, [actions, customWorkbenchViews, navigationState.workbench, selectedTool]);
|
||||||
|
|
||||||
// Persisted via PreferencesContext; no direct localStorage writes needed here
|
// Persisted via PreferencesContext; no direct localStorage writes needed here
|
||||||
|
|
||||||
|
|||||||
@ -173,10 +173,6 @@ const FontStatusPanel: React.FC<FontStatusPanelProps> = ({ document, pageIndex }
|
|||||||
[document, pageIndex]
|
[document, pageIndex]
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!document || fontAnalysis.fonts.length === 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
const { canReproducePerfectly, hasWarnings, summary, fonts } = fontAnalysis;
|
const { canReproducePerfectly, hasWarnings, summary, fonts } = fontAnalysis;
|
||||||
|
|
||||||
const statusIcon = useMemo(() => {
|
const statusIcon = useMemo(() => {
|
||||||
@ -189,6 +185,11 @@ const FontStatusPanel: React.FC<FontStatusPanelProps> = ({ document, pageIndex }
|
|||||||
return <InfoIcon sx={{ fontSize: 16 }} />;
|
return <InfoIcon sx={{ fontSize: 16 }} />;
|
||||||
}, [canReproducePerfectly, hasWarnings]);
|
}, [canReproducePerfectly, hasWarnings]);
|
||||||
|
|
||||||
|
// Early return AFTER all hooks are declared
|
||||||
|
if (!document || fontAnalysis.fonts.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
const statusColor = canReproducePerfectly ? 'green' : hasWarnings ? 'yellow' : 'blue';
|
const statusColor = canReproducePerfectly ? 'green' : hasWarnings ? 'yellow' : 'blue';
|
||||||
|
|
||||||
const pageLabel = pageIndex !== undefined
|
const pageLabel = pageIndex !== undefined
|
||||||
@ -199,14 +200,30 @@ const FontStatusPanel: React.FC<FontStatusPanelProps> = ({ document, pageIndex }
|
|||||||
<Accordion variant="contained" defaultValue={hasWarnings ? 'fonts' : undefined}>
|
<Accordion variant="contained" defaultValue={hasWarnings ? 'fonts' : undefined}>
|
||||||
<Accordion.Item value="fonts">
|
<Accordion.Item value="fonts">
|
||||||
<Accordion.Control>
|
<Accordion.Control>
|
||||||
<Group gap="xs" wrap="nowrap">
|
<Group gap="xs" wrap="wrap" style={{ flex: 1 }}>
|
||||||
{statusIcon}
|
<Group gap="xs" wrap="nowrap">
|
||||||
<Text size="sm" fw={500}>
|
{statusIcon}
|
||||||
{pageLabel}
|
<Text size="sm" fw={500}>
|
||||||
</Text>
|
{pageLabel}
|
||||||
<Badge size="xs" color={statusColor} variant="dot">
|
</Text>
|
||||||
{fonts.length}
|
<Badge size="xs" color={statusColor} variant="dot">
|
||||||
</Badge>
|
{fonts.length}
|
||||||
|
</Badge>
|
||||||
|
</Group>
|
||||||
|
|
||||||
|
{/* Warning badges BEFORE expansion */}
|
||||||
|
<Group gap={4} wrap="wrap">
|
||||||
|
{summary.systemFallback > 0 && (
|
||||||
|
<Badge size="xs" color="yellow" variant="filled" leftSection={<WarningIcon sx={{ fontSize: 12 }} />}>
|
||||||
|
{summary.systemFallback} {t('pdfTextEditor.fontAnalysis.fallback', 'fallback')}
|
||||||
|
</Badge>
|
||||||
|
)}
|
||||||
|
{summary.missing > 0 && (
|
||||||
|
<Badge size="xs" color="red" variant="filled" leftSection={<ErrorIcon sx={{ fontSize: 12 }} />}>
|
||||||
|
{summary.missing} {t('pdfTextEditor.fontAnalysis.missing', 'missing')}
|
||||||
|
</Badge>
|
||||||
|
)}
|
||||||
|
</Group>
|
||||||
</Group>
|
</Group>
|
||||||
</Accordion.Control>
|
</Accordion.Control>
|
||||||
<Accordion.Panel>
|
<Accordion.Panel>
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -18,6 +18,7 @@ import {
|
|||||||
PdfJsonPage,
|
PdfJsonPage,
|
||||||
TextGroup,
|
TextGroup,
|
||||||
PdfTextEditorViewData,
|
PdfTextEditorViewData,
|
||||||
|
BoundingBox,
|
||||||
} from './pdfTextEditorTypes';
|
} from './pdfTextEditorTypes';
|
||||||
import {
|
import {
|
||||||
deepCloneDocument,
|
deepCloneDocument,
|
||||||
@ -26,6 +27,7 @@ import {
|
|||||||
restoreGlyphElements,
|
restoreGlyphElements,
|
||||||
extractDocumentImages,
|
extractDocumentImages,
|
||||||
cloneImageElement,
|
cloneImageElement,
|
||||||
|
cloneTextElement,
|
||||||
valueOr,
|
valueOr,
|
||||||
} from './pdfTextEditorUtils';
|
} from './pdfTextEditorUtils';
|
||||||
import PdfTextEditorView from '@app/components/tools/pdfTextEditor/PdfTextEditorView';
|
import PdfTextEditorView from '@app/components/tools/pdfTextEditor/PdfTextEditorView';
|
||||||
@ -52,6 +54,148 @@ const getAutoLoadKey = (file: File): string => {
|
|||||||
return `${file.name}|${file.size}|${file.lastModified}`;
|
return `${file.name}|${file.size}|${file.lastModified}`;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const normalizeLineArray = (value: string | undefined | null, expected: number): string[] => {
|
||||||
|
const normalized = (value ?? '').replace(/\r/g, '');
|
||||||
|
if (expected <= 0) {
|
||||||
|
return [normalized];
|
||||||
|
}
|
||||||
|
const parts = normalized.split('\n');
|
||||||
|
if (parts.length === expected) {
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
if (parts.length < expected) {
|
||||||
|
return parts.concat(Array(expected - parts.length).fill(''));
|
||||||
|
}
|
||||||
|
const head = parts.slice(0, Math.max(expected - 1, 0));
|
||||||
|
const tail = parts.slice(Math.max(expected - 1, 0)).join('\n');
|
||||||
|
return [...head, tail];
|
||||||
|
};
|
||||||
|
|
||||||
|
const cloneLineTemplate = (line: TextGroup, text?: string, originalText?: string): TextGroup => ({
|
||||||
|
...line,
|
||||||
|
text: text ?? line.text,
|
||||||
|
originalText: originalText ?? line.originalText,
|
||||||
|
childLineGroups: null,
|
||||||
|
lineElementCounts: null,
|
||||||
|
lineSpacing: null,
|
||||||
|
elements: line.elements.map(cloneTextElement),
|
||||||
|
originalElements: line.originalElements.map(cloneTextElement),
|
||||||
|
});
|
||||||
|
|
||||||
|
const expandGroupToLines = (group: TextGroup): TextGroup[] => {
|
||||||
|
if (group.childLineGroups && group.childLineGroups.length > 0) {
|
||||||
|
const textLines = normalizeLineArray(group.text, group.childLineGroups.length);
|
||||||
|
const originalLines = normalizeLineArray(group.originalText, group.childLineGroups.length);
|
||||||
|
return group.childLineGroups.map((child, index) =>
|
||||||
|
cloneLineTemplate(child, textLines[index], originalLines[index]),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return [cloneLineTemplate(group)];
|
||||||
|
};
|
||||||
|
|
||||||
|
const mergeBoundingBoxes = (boxes: BoundingBox[]): BoundingBox => {
|
||||||
|
if (boxes.length === 0) {
|
||||||
|
return { left: 0, right: 0, top: 0, bottom: 0 };
|
||||||
|
}
|
||||||
|
return boxes.reduce(
|
||||||
|
(acc, box) => ({
|
||||||
|
left: Math.min(acc.left, box.left),
|
||||||
|
right: Math.max(acc.right, box.right),
|
||||||
|
top: Math.min(acc.top, box.top),
|
||||||
|
bottom: Math.max(acc.bottom, box.bottom),
|
||||||
|
}),
|
||||||
|
{ ...boxes[0] },
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
const buildMergedGroupFromSelection = (groups: TextGroup[]): TextGroup | null => {
|
||||||
|
if (groups.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const lineTemplates = groups.flatMap(expandGroupToLines);
|
||||||
|
if (lineTemplates.length <= 1) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const lineTexts = lineTemplates.map((line) => line.text ?? '');
|
||||||
|
const lineOriginalTexts = lineTemplates.map((line) => line.originalText ?? '');
|
||||||
|
const combinedOriginals = lineTemplates.flatMap((line) => line.originalElements.map(cloneTextElement));
|
||||||
|
const combinedElements = combinedOriginals.map(cloneTextElement);
|
||||||
|
const mergedBounds = mergeBoundingBoxes(lineTemplates.map((line) => line.bounds));
|
||||||
|
|
||||||
|
const spacingValues: number[] = [];
|
||||||
|
for (let index = 1; index < lineTemplates.length; index += 1) {
|
||||||
|
const prevBaseline = lineTemplates[index - 1].baseline ?? lineTemplates[index - 1].bounds.bottom;
|
||||||
|
const currentBaseline = lineTemplates[index].baseline ?? lineTemplates[index].bounds.bottom;
|
||||||
|
const spacing = Math.abs(prevBaseline - currentBaseline);
|
||||||
|
if (spacing > 0) {
|
||||||
|
spacingValues.push(spacing);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const averageSpacing =
|
||||||
|
spacingValues.length > 0
|
||||||
|
? spacingValues.reduce((sum, value) => sum + value, 0) / spacingValues.length
|
||||||
|
: null;
|
||||||
|
|
||||||
|
const first = groups[0];
|
||||||
|
const lineElementCounts = lineTemplates.map((line) => Math.max(line.originalElements.length, 1));
|
||||||
|
const paragraph: TextGroup = {
|
||||||
|
...first,
|
||||||
|
text: lineTexts.join('\n'),
|
||||||
|
originalText: lineOriginalTexts.join('\n'),
|
||||||
|
elements: combinedElements,
|
||||||
|
originalElements: combinedOriginals,
|
||||||
|
bounds: mergedBounds,
|
||||||
|
lineSpacing: averageSpacing,
|
||||||
|
lineElementCounts: lineElementCounts.length > 1 ? lineElementCounts : null,
|
||||||
|
childLineGroups: lineTemplates.map((line, index) =>
|
||||||
|
cloneLineTemplate(line, lineTexts[index], lineOriginalTexts[index]),
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
return paragraph;
|
||||||
|
};
|
||||||
|
|
||||||
|
const splitParagraphGroup = (group: TextGroup): TextGroup[] => {
|
||||||
|
if (!group.childLineGroups || group.childLineGroups.length <= 1) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const templateLines = group.childLineGroups.map((child) => cloneLineTemplate(child));
|
||||||
|
const lineCount = templateLines.length;
|
||||||
|
const textLines = normalizeLineArray(group.text, lineCount);
|
||||||
|
const originalLines = normalizeLineArray(group.originalText, lineCount);
|
||||||
|
const baseCounts =
|
||||||
|
group.lineElementCounts && group.lineElementCounts.length === lineCount
|
||||||
|
? [...group.lineElementCounts]
|
||||||
|
: templateLines.map((line) => Math.max(line.originalElements.length, 1));
|
||||||
|
|
||||||
|
const totalOriginals = group.originalElements.length;
|
||||||
|
const counted = baseCounts.reduce((sum, count) => sum + count, 0);
|
||||||
|
if (counted < totalOriginals && baseCounts.length > 0) {
|
||||||
|
baseCounts[baseCounts.length - 1] += totalOriginals - counted;
|
||||||
|
}
|
||||||
|
|
||||||
|
let offset = 0;
|
||||||
|
return templateLines.map((template, index) => {
|
||||||
|
const take = Math.max(1, baseCounts[index] ?? 1);
|
||||||
|
const slice = group.originalElements.slice(offset, offset + take).map(cloneTextElement);
|
||||||
|
offset += take;
|
||||||
|
return {
|
||||||
|
...template,
|
||||||
|
id: `${group.id}-line-${index + 1}-${Date.now()}-${index}`,
|
||||||
|
text: textLines[index] ?? '',
|
||||||
|
originalText: originalLines[index] ?? '',
|
||||||
|
elements: slice.map(cloneTextElement),
|
||||||
|
originalElements: slice,
|
||||||
|
lineElementCounts: null,
|
||||||
|
lineSpacing: null,
|
||||||
|
childLineGroups: null,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const {
|
const {
|
||||||
@ -63,6 +207,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
} = useToolWorkflow();
|
} = useToolWorkflow();
|
||||||
const { actions: navigationActions } = useNavigationActions();
|
const { actions: navigationActions } = useNavigationActions();
|
||||||
const navigationState = useNavigationState();
|
const navigationState = useNavigationState();
|
||||||
|
const { registerUnsavedChangesChecker, unregisterUnsavedChangesChecker } = navigationActions;
|
||||||
|
|
||||||
const [loadedDocument, setLoadedDocument] = useState<PdfJsonDocument | null>(null);
|
const [loadedDocument, setLoadedDocument] = useState<PdfJsonDocument | null>(null);
|
||||||
const [groupsByPage, setGroupsByPage] = useState<TextGroup[][]>([]);
|
const [groupsByPage, setGroupsByPage] = useState<TextGroup[][]>([]);
|
||||||
@ -89,6 +234,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
const [loadingImagePages, setLoadingImagePages] = useState<Set<number>>(new Set());
|
const [loadingImagePages, setLoadingImagePages] = useState<Set<number>>(new Set());
|
||||||
|
|
||||||
const originalImagesRef = useRef<PdfJsonImageElement[][]>([]);
|
const originalImagesRef = useRef<PdfJsonImageElement[][]>([]);
|
||||||
|
const originalGroupsRef = useRef<TextGroup[][]>([]);
|
||||||
const imagesByPageRef = useRef<PdfJsonImageElement[][]>([]);
|
const imagesByPageRef = useRef<PdfJsonImageElement[][]>([]);
|
||||||
const autoLoadKeyRef = useRef<string | null>(null);
|
const autoLoadKeyRef = useRef<string | null>(null);
|
||||||
const loadRequestIdRef = useRef(0);
|
const loadRequestIdRef = useRef(0);
|
||||||
@ -131,7 +277,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const dirtyPages = useMemo(
|
const dirtyPages = useMemo(
|
||||||
() => getDirtyPages(groupsByPage, imagesByPage, originalImagesRef.current),
|
() => getDirtyPages(groupsByPage, imagesByPage, originalGroupsRef.current, originalImagesRef.current),
|
||||||
[groupsByPage, imagesByPage],
|
[groupsByPage, imagesByPage],
|
||||||
);
|
);
|
||||||
const hasChanges = useMemo(() => dirtyPages.some(Boolean), [dirtyPages]);
|
const hasChanges = useMemo(() => dirtyPages.some(Boolean), [dirtyPages]);
|
||||||
@ -157,6 +303,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
const images = extractDocumentImages(cloned);
|
const images = extractDocumentImages(cloned);
|
||||||
const originalImages = images.map((page) => page.map(cloneImageElement));
|
const originalImages = images.map((page) => page.map(cloneImageElement));
|
||||||
originalImagesRef.current = originalImages;
|
originalImagesRef.current = originalImages;
|
||||||
|
originalGroupsRef.current = groups.map((page) => page.map((group) => ({ ...group })));
|
||||||
imagesByPageRef.current = images.map((page) => page.map(cloneImageElement));
|
imagesByPageRef.current = images.map((page) => page.map(cloneImageElement));
|
||||||
const initialLoaded = new Set<number>();
|
const initialLoaded = new Set<number>();
|
||||||
originalImages.forEach((pageImages, index) => {
|
originalImages.forEach((pageImages, index) => {
|
||||||
@ -351,8 +498,6 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
let shouldUseLazyMode = false;
|
let shouldUseLazyMode = false;
|
||||||
let pendingJobId: string | null = null;
|
let pendingJobId: string | null = null;
|
||||||
|
|
||||||
setErrorMessage(null);
|
|
||||||
|
|
||||||
if (isPdf) {
|
if (isPdf) {
|
||||||
latestPdfRequestIdRef.current = requestId;
|
latestPdfRequestIdRef.current = requestId;
|
||||||
setIsConverting(true);
|
setIsConverting(true);
|
||||||
@ -539,7 +684,6 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
setCachedJobId(shouldUseLazyMode ? pendingJobId : null);
|
setCachedJobId(shouldUseLazyMode ? pendingJobId : null);
|
||||||
setFileName(file.name);
|
setFileName(file.name);
|
||||||
setErrorMessage(null);
|
setErrorMessage(null);
|
||||||
autoLoadKeyRef.current = fileKey;
|
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.error('Failed to load file', error);
|
console.error('Failed to load file', error);
|
||||||
console.error('Error details:', {
|
console.error('Error details:', {
|
||||||
@ -598,13 +742,83 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const handleGroupDelete = useCallback((pageIndex: number, groupId: string) => {
|
const handleGroupDelete = useCallback((pageIndex: number, groupId: string) => {
|
||||||
|
console.log(`🗑️ Deleting group ${groupId} from page ${pageIndex}`);
|
||||||
|
setGroupsByPage((previous) => {
|
||||||
|
const updated = previous.map((groups, idx) => {
|
||||||
|
if (idx !== pageIndex) return groups;
|
||||||
|
const filtered = groups.filter((group) => group.id !== groupId);
|
||||||
|
console.log(` Before: ${groups.length} groups, After: ${filtered.length} groups`);
|
||||||
|
return filtered;
|
||||||
|
});
|
||||||
|
return updated;
|
||||||
|
});
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const handleMergeGroups = useCallback((pageIndex: number, groupIds: string[]): boolean => {
|
||||||
|
if (groupIds.length < 2) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let updated = false;
|
||||||
setGroupsByPage((previous) =>
|
setGroupsByPage((previous) =>
|
||||||
previous.map((groups, idx) =>
|
previous.map((groups, idx) => {
|
||||||
idx !== pageIndex
|
if (idx !== pageIndex) {
|
||||||
? groups
|
return groups;
|
||||||
: groups.map((group) => (group.id === groupId ? { ...group, text: '' } : group))
|
}
|
||||||
)
|
const indices = groupIds
|
||||||
|
.map((id) => groups.findIndex((group) => group.id === id))
|
||||||
|
.filter((index) => index >= 0);
|
||||||
|
if (indices.length !== groupIds.length) {
|
||||||
|
return groups;
|
||||||
|
}
|
||||||
|
const sorted = [...indices].sort((a, b) => a - b);
|
||||||
|
for (let i = 1; i < sorted.length; i += 1) {
|
||||||
|
if (sorted[i] !== sorted[i - 1] + 1) {
|
||||||
|
return groups;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const selection = sorted.map((position) => groups[position]);
|
||||||
|
const merged = buildMergedGroupFromSelection(selection);
|
||||||
|
if (!merged) {
|
||||||
|
return groups;
|
||||||
|
}
|
||||||
|
const next = [
|
||||||
|
...groups.slice(0, sorted[0]),
|
||||||
|
merged,
|
||||||
|
...groups.slice(sorted[sorted.length - 1] + 1),
|
||||||
|
];
|
||||||
|
updated = true;
|
||||||
|
return next;
|
||||||
|
}),
|
||||||
);
|
);
|
||||||
|
return updated;
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const handleUngroupGroup = useCallback((pageIndex: number, groupId: string): boolean => {
|
||||||
|
let updated = false;
|
||||||
|
setGroupsByPage((previous) =>
|
||||||
|
previous.map((groups, idx) => {
|
||||||
|
if (idx !== pageIndex) {
|
||||||
|
return groups;
|
||||||
|
}
|
||||||
|
const targetIndex = groups.findIndex((group) => group.id === groupId);
|
||||||
|
if (targetIndex < 0) {
|
||||||
|
return groups;
|
||||||
|
}
|
||||||
|
const targetGroup = groups[targetIndex];
|
||||||
|
const splits = splitParagraphGroup(targetGroup);
|
||||||
|
if (splits.length <= 1) {
|
||||||
|
return groups;
|
||||||
|
}
|
||||||
|
const next = [
|
||||||
|
...groups.slice(0, targetIndex),
|
||||||
|
...splits,
|
||||||
|
...groups.slice(targetIndex + 1),
|
||||||
|
];
|
||||||
|
updated = true;
|
||||||
|
return next;
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
return updated;
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const handleImageTransform = useCallback(
|
const handleImageTransform = useCallback(
|
||||||
@ -746,7 +960,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
}
|
}
|
||||||
}, [buildPayload, onComplete]);
|
}, [buildPayload, onComplete]);
|
||||||
|
|
||||||
const handleGeneratePdf = useCallback(async () => {
|
const handleGeneratePdf = useCallback(async (skipComplete = false) => {
|
||||||
try {
|
try {
|
||||||
setIsGeneratingPdf(true);
|
setIsGeneratingPdf(true);
|
||||||
|
|
||||||
@ -840,7 +1054,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
|
|
||||||
downloadBlob(response.data, downloadName);
|
downloadBlob(response.data, downloadName);
|
||||||
|
|
||||||
if (onComplete) {
|
if (onComplete && !skipComplete) {
|
||||||
const pdfFile = new File([response.data], downloadName, { type: 'application/pdf' });
|
const pdfFile = new File([response.data], downloadName, { type: 'application/pdf' });
|
||||||
onComplete([pdfFile]);
|
onComplete([pdfFile]);
|
||||||
}
|
}
|
||||||
@ -881,7 +1095,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
|
|
||||||
downloadBlob(response.data, downloadName);
|
downloadBlob(response.data, downloadName);
|
||||||
|
|
||||||
if (onComplete) {
|
if (onComplete && !skipComplete) {
|
||||||
const pdfFile = new File([response.data], downloadName, { type: 'application/pdf' });
|
const pdfFile = new File([response.data], downloadName, { type: 'application/pdf' });
|
||||||
onComplete([pdfFile]);
|
onComplete([pdfFile]);
|
||||||
}
|
}
|
||||||
@ -1052,7 +1266,6 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
forceSingleTextElement,
|
forceSingleTextElement,
|
||||||
groupingMode,
|
groupingMode,
|
||||||
requestPagePreview,
|
requestPagePreview,
|
||||||
onLoadJson: handleLoadFile,
|
|
||||||
onSelectPage: handleSelectPage,
|
onSelectPage: handleSelectPage,
|
||||||
onGroupEdit: handleGroupTextChange,
|
onGroupEdit: handleGroupTextChange,
|
||||||
onGroupDelete: handleGroupDelete,
|
onGroupDelete: handleGroupDelete,
|
||||||
@ -1061,9 +1274,17 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
onReset: handleResetEdits,
|
onReset: handleResetEdits,
|
||||||
onDownloadJson: handleDownloadJson,
|
onDownloadJson: handleDownloadJson,
|
||||||
onGeneratePdf: handleGeneratePdf,
|
onGeneratePdf: handleGeneratePdf,
|
||||||
|
onGeneratePdfForNavigation: async () => {
|
||||||
|
// Generate PDF without triggering tool completion
|
||||||
|
await handleGeneratePdf(true);
|
||||||
|
},
|
||||||
onForceSingleTextElementChange: setForceSingleTextElement,
|
onForceSingleTextElementChange: setForceSingleTextElement,
|
||||||
onGroupingModeChange: setGroupingMode,
|
onGroupingModeChange: setGroupingMode,
|
||||||
|
onMergeGroups: handleMergeGroups,
|
||||||
|
onUngroupGroup: handleUngroupGroup,
|
||||||
}), [
|
}), [
|
||||||
|
handleMergeGroups,
|
||||||
|
handleUngroupGroup,
|
||||||
handleImageTransform,
|
handleImageTransform,
|
||||||
imagesByPage,
|
imagesByPage,
|
||||||
pagePreviews,
|
pagePreviews,
|
||||||
@ -1076,7 +1297,6 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
handleGroupTextChange,
|
handleGroupTextChange,
|
||||||
handleGroupDelete,
|
handleGroupDelete,
|
||||||
handleImageReset,
|
handleImageReset,
|
||||||
handleLoadFile,
|
|
||||||
handleResetEdits,
|
handleResetEdits,
|
||||||
handleSelectPage,
|
handleSelectPage,
|
||||||
hasChanges,
|
hasChanges,
|
||||||
@ -1155,14 +1375,30 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
unregisterCustomWorkbenchView,
|
unregisterCustomWorkbenchView,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
// Note: Compare tool doesn't auto-force workbench, and neither should we
|
||||||
|
// The workbench should be set when the tool is selected via proper channels
|
||||||
|
// (tool registry, tool picker, etc.) - not forced here
|
||||||
|
|
||||||
|
// Keep hasChanges in a ref for the checker to access
|
||||||
|
const hasChangesRef = useRef(hasChanges);
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (
|
hasChangesRef.current = hasChanges;
|
||||||
navigationState.selectedTool === 'pdfTextEditor' &&
|
console.log('[PdfTextEditor] hasChanges updated to:', hasChanges);
|
||||||
navigationState.workbench !== WORKBENCH_ID
|
}, [hasChanges]);
|
||||||
) {
|
|
||||||
navigationActions.setWorkbench(WORKBENCH_ID);
|
// Register unsaved changes checker for navigation guard
|
||||||
}
|
useEffect(() => {
|
||||||
}, [navigationActions, navigationState.selectedTool, navigationState.workbench]);
|
const checker = () => {
|
||||||
|
console.log('[PdfTextEditor] Checking unsaved changes:', hasChangesRef.current);
|
||||||
|
return hasChangesRef.current;
|
||||||
|
};
|
||||||
|
registerUnsavedChangesChecker(checker);
|
||||||
|
console.log('[PdfTextEditor] Registered unsaved changes checker');
|
||||||
|
return () => {
|
||||||
|
console.log('[PdfTextEditor] Unregistered unsaved changes checker');
|
||||||
|
unregisterUnsavedChangesChecker();
|
||||||
|
};
|
||||||
|
}, [registerUnsavedChangesChecker, unregisterUnsavedChangesChecker]);
|
||||||
|
|
||||||
const lastSentViewDataRef = useRef<PdfTextEditorViewData | null>(null);
|
const lastSentViewDataRef = useRef<PdfTextEditorViewData | null>(null);
|
||||||
|
|
||||||
|
|||||||
@ -168,6 +168,7 @@ export interface TextGroup {
|
|||||||
text: string;
|
text: string;
|
||||||
originalText: string;
|
originalText: string;
|
||||||
bounds: BoundingBox;
|
bounds: BoundingBox;
|
||||||
|
childLineGroups?: TextGroup[] | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const DEFAULT_PAGE_WIDTH = 612;
|
export const DEFAULT_PAGE_WIDTH = 612;
|
||||||
@ -199,7 +200,6 @@ export interface PdfTextEditorViewData {
|
|||||||
forceSingleTextElement: boolean;
|
forceSingleTextElement: boolean;
|
||||||
groupingMode: 'auto' | 'paragraph' | 'singleLine';
|
groupingMode: 'auto' | 'paragraph' | 'singleLine';
|
||||||
requestPagePreview: (pageIndex: number, scale: number) => void;
|
requestPagePreview: (pageIndex: number, scale: number) => void;
|
||||||
onLoadJson: (file: File | null) => Promise<void> | void;
|
|
||||||
onSelectPage: (pageIndex: number) => void;
|
onSelectPage: (pageIndex: number) => void;
|
||||||
onGroupEdit: (pageIndex: number, groupId: string, value: string) => void;
|
onGroupEdit: (pageIndex: number, groupId: string, value: string) => void;
|
||||||
onGroupDelete: (pageIndex: number, groupId: string) => void;
|
onGroupDelete: (pageIndex: number, groupId: string) => void;
|
||||||
@ -218,6 +218,9 @@ export interface PdfTextEditorViewData {
|
|||||||
onReset: () => void;
|
onReset: () => void;
|
||||||
onDownloadJson: () => void;
|
onDownloadJson: () => void;
|
||||||
onGeneratePdf: () => void;
|
onGeneratePdf: () => void;
|
||||||
|
onGeneratePdfForNavigation: () => Promise<void>;
|
||||||
onForceSingleTextElementChange: (value: boolean) => void;
|
onForceSingleTextElementChange: (value: boolean) => void;
|
||||||
onGroupingModeChange: (value: 'auto' | 'paragraph' | 'singleLine') => void;
|
onGroupingModeChange: (value: 'auto' | 'paragraph' | 'singleLine') => void;
|
||||||
|
onMergeGroups: (pageIndex: number, groupIds: string[]) => boolean;
|
||||||
|
onUngroupGroup: (pageIndex: number, groupId: string) => boolean;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -520,8 +520,18 @@ const createGroup = (
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const cloneLineTemplate = (line: TextGroup): TextGroup => ({
|
||||||
|
...line,
|
||||||
|
childLineGroups: null,
|
||||||
|
lineElementCounts: null,
|
||||||
|
lineSpacing: null,
|
||||||
|
elements: line.elements.map(cloneTextElement),
|
||||||
|
originalElements: line.originalElements.map(cloneTextElement),
|
||||||
|
});
|
||||||
|
|
||||||
const groupLinesIntoParagraphs = (
|
const groupLinesIntoParagraphs = (
|
||||||
lineGroups: TextGroup[],
|
lineGroups: TextGroup[],
|
||||||
|
pageWidth: number,
|
||||||
metrics?: FontMetricsMap,
|
metrics?: FontMetricsMap,
|
||||||
): TextGroup[] => {
|
): TextGroup[] => {
|
||||||
if (lineGroups.length === 0) {
|
if (lineGroups.length === 0) {
|
||||||
@ -530,6 +540,8 @@ const groupLinesIntoParagraphs = (
|
|||||||
|
|
||||||
const paragraphs: TextGroup[][] = [];
|
const paragraphs: TextGroup[][] = [];
|
||||||
let currentParagraph: TextGroup[] = [lineGroups[0]];
|
let currentParagraph: TextGroup[] = [lineGroups[0]];
|
||||||
|
const bulletFlags = new Map<string, boolean>();
|
||||||
|
bulletFlags.set(lineGroups[0].id, false);
|
||||||
|
|
||||||
for (let i = 1; i < lineGroups.length; i++) {
|
for (let i = 1; i < lineGroups.length; i++) {
|
||||||
const prevLine = lineGroups[i - 1];
|
const prevLine = lineGroups[i - 1];
|
||||||
@ -561,11 +573,85 @@ const groupLinesIntoParagraphs = (
|
|||||||
const maxReasonableSpacing = avgFontSize * 3.0; // Max ~3x font size for normal line spacing
|
const maxReasonableSpacing = avgFontSize * 3.0; // Max ~3x font size for normal line spacing
|
||||||
const hasReasonableSpacing = lineSpacing <= maxReasonableSpacing;
|
const hasReasonableSpacing = lineSpacing <= maxReasonableSpacing;
|
||||||
|
|
||||||
|
// Check if current line looks like a bullet/list item
|
||||||
|
const prevRight = prevLine.bounds.right;
|
||||||
|
const currentRight = currentLine.bounds.right;
|
||||||
|
const prevWidth = prevRight - prevLeft;
|
||||||
|
const currentWidth = currentRight - currentLeft;
|
||||||
|
|
||||||
|
// Count word count to help identify bullets (typically short)
|
||||||
|
const prevWords = (prevLine.text ?? '').split(/\s+/).filter(w => w.length > 0).length;
|
||||||
|
const currentWords = (currentLine.text ?? '').split(/\s+/).filter(w => w.length > 0).length;
|
||||||
|
const prevText = (prevLine.text ?? '').trim();
|
||||||
|
const currentText = (currentLine.text ?? '').trim();
|
||||||
|
|
||||||
|
// Bullet detection - look for bullet markers or very short lines
|
||||||
|
const bulletMarkerRegex = /^[\u2022\u2023\u25E6\u2043\u2219•·◦‣⁃\-\*]\s|^\d+[\.\)]\s|^[a-z][\.\)]\s/i;
|
||||||
|
const prevHasBulletMarker = bulletMarkerRegex.test(prevText);
|
||||||
|
const currentHasBulletMarker = bulletMarkerRegex.test(currentText);
|
||||||
|
|
||||||
|
// True bullets are:
|
||||||
|
// 1. Have bullet markers/numbers OR
|
||||||
|
// 2. Very short (< 10 words) AND much narrower than average (< 60% of page width)
|
||||||
|
const headingKeywords = ['action items', 'next steps', 'notes', 'logistics', 'tasks'];
|
||||||
|
const normalizedPageWidth = pageWidth > 0 ? pageWidth : avgFontSize * 70;
|
||||||
|
const maxReferenceWidth = normalizedPageWidth > 0 ? normalizedPageWidth : avgFontSize * 70;
|
||||||
|
const indentDelta = currentLeft - prevLeft;
|
||||||
|
const indentThreshold = Math.max(avgFontSize * 0.6, 8);
|
||||||
|
const hasIndent = indentDelta > indentThreshold;
|
||||||
|
const currentWidthRatio = maxReferenceWidth > 0 ? currentWidth / maxReferenceWidth : 0;
|
||||||
|
const prevWidthRatio = maxReferenceWidth > 0 ? prevWidth / maxReferenceWidth : 0;
|
||||||
|
const prevLooksLikeHeading =
|
||||||
|
prevText.endsWith(':') ||
|
||||||
|
(prevWords <= 4 && prevWidthRatio < 0.4) ||
|
||||||
|
headingKeywords.some((keyword) => prevText.toLowerCase().includes(keyword));
|
||||||
|
|
||||||
|
const wrapCandidate =
|
||||||
|
!currentHasBulletMarker &&
|
||||||
|
!hasIndent &&
|
||||||
|
!prevLooksLikeHeading &&
|
||||||
|
currentWords <= 12 &&
|
||||||
|
currentWidthRatio < 0.45 &&
|
||||||
|
Math.abs(prevLeft - currentLeft) <= leftAlignmentTolerance &&
|
||||||
|
currentWidth < prevWidth * 0.85;
|
||||||
|
|
||||||
|
const currentIsBullet = wrapCandidate
|
||||||
|
? false
|
||||||
|
: currentHasBulletMarker ||
|
||||||
|
(hasIndent && (currentWords <= 14 || currentWidthRatio <= 0.65)) ||
|
||||||
|
(prevLooksLikeHeading && (currentWords <= 16 || currentWidthRatio <= 0.8 || prevWidthRatio < 0.35)) ||
|
||||||
|
(currentWords <= 8 && currentWidthRatio <= 0.45 && prevWidth - currentWidth > avgFontSize * 4);
|
||||||
|
|
||||||
|
const prevIsBullet = bulletFlags.get(prevLine.id) ?? prevHasBulletMarker;
|
||||||
|
bulletFlags.set(currentLine.id, currentIsBullet);
|
||||||
|
|
||||||
|
// Detect paragraph→bullet transition
|
||||||
|
const likelyBulletStart = !prevIsBullet && currentIsBullet;
|
||||||
|
|
||||||
|
// Don't merge two consecutive bullets
|
||||||
|
const bothAreBullets = prevIsBullet && currentIsBullet;
|
||||||
|
|
||||||
// Merge into paragraph if:
|
// Merge into paragraph if:
|
||||||
// 1. Left aligned
|
// 1. Left aligned
|
||||||
// 2. Same font
|
// 2. Same font
|
||||||
// 3. Reasonable line spacing (not a large gap indicating paragraph break)
|
// 3. Reasonable line spacing
|
||||||
const shouldMerge = isLeftAligned && sameFont && hasReasonableSpacing;
|
// 4. NOT transitioning to bullets
|
||||||
|
// 5. NOT both are bullets
|
||||||
|
const shouldMerge =
|
||||||
|
isLeftAligned &&
|
||||||
|
sameFont &&
|
||||||
|
hasReasonableSpacing &&
|
||||||
|
!likelyBulletStart &&
|
||||||
|
!bothAreBullets &&
|
||||||
|
!currentIsBullet;
|
||||||
|
|
||||||
|
if (i < 10 || likelyBulletStart || bothAreBullets || !shouldMerge) {
|
||||||
|
console.log(` Line ${i}:`);
|
||||||
|
console.log(` prev: "${prevText.substring(0, 40)}" (${prevWords}w, ${prevWidth.toFixed(0)}pt, marker:${prevHasBulletMarker}, bullet:${prevIsBullet})`);
|
||||||
|
console.log(` curr: "${currentText.substring(0, 40)}" (${currentWords}w, ${currentWidth.toFixed(0)}pt, marker:${currentHasBulletMarker}, bullet:${currentIsBullet})`);
|
||||||
|
console.log(` checks: leftAlign:${isLeftAligned} (${Math.abs(prevLeft - currentLeft).toFixed(1)}pt), sameFont:${sameFont}, spacing:${hasReasonableSpacing} (${lineSpacing.toFixed(1)}pt/${maxReasonableSpacing.toFixed(1)}pt)`);
|
||||||
|
console.log(` decision: merge=${shouldMerge} (bulletStart:${likelyBulletStart}, bothBullets:${bothAreBullets})`);
|
||||||
|
}
|
||||||
|
|
||||||
if (shouldMerge) {
|
if (shouldMerge) {
|
||||||
currentParagraph.push(currentLine);
|
currentParagraph.push(currentLine);
|
||||||
@ -587,17 +673,24 @@ const groupLinesIntoParagraphs = (
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Combine all elements from all lines
|
// Combine all elements from all lines
|
||||||
const allElements = lines.flatMap(line => line.originalElements);
|
const lineTemplates = lines.map(line => cloneLineTemplate(line));
|
||||||
|
const flattenedLineTemplates = lineTemplates.flatMap((line) =>
|
||||||
|
line.childLineGroups && line.childLineGroups.length > 0
|
||||||
|
? line.childLineGroups
|
||||||
|
: [line],
|
||||||
|
);
|
||||||
|
const allLines = flattenedLineTemplates.length > 0 ? flattenedLineTemplates : lineTemplates;
|
||||||
|
const allElements = allLines.flatMap(line => line.originalElements);
|
||||||
const pageIndex = lines[0].pageIndex;
|
const pageIndex = lines[0].pageIndex;
|
||||||
const lineElementCounts = lines.map((line) => line.originalElements.length);
|
const lineElementCounts = allLines.map((line) => line.originalElements.length);
|
||||||
|
|
||||||
// Create merged group with newlines between lines
|
// Create merged group with newlines between lines
|
||||||
const paragraphText = lines.map(line => line.text).join('\n');
|
const paragraphText = allLines.map(line => line.text).join('\n');
|
||||||
const mergedBounds = mergeBounds(lines.map(line => line.bounds));
|
const mergedBounds = mergeBounds(allLines.map(line => line.bounds));
|
||||||
const spacingValues: number[] = [];
|
const spacingValues: number[] = [];
|
||||||
for (let i = 1; i < lines.length; i++) {
|
for (let i = 1; i < allLines.length; i++) {
|
||||||
const prevBaseline = lines[i - 1].baseline ?? lines[i - 1].bounds.bottom;
|
const prevBaseline = allLines[i - 1].baseline ?? allLines[i - 1].bounds.bottom;
|
||||||
const currentBaseline = lines[i].baseline ?? lines[i].bounds.bottom;
|
const currentBaseline = allLines[i].baseline ?? allLines[i].bounds.bottom;
|
||||||
const spacing = Math.abs(prevBaseline - currentBaseline);
|
const spacing = Math.abs(prevBaseline - currentBaseline);
|
||||||
if (spacing > 0) {
|
if (spacing > 0) {
|
||||||
spacingValues.push(spacing);
|
spacingValues.push(spacing);
|
||||||
@ -633,6 +726,7 @@ const groupLinesIntoParagraphs = (
|
|||||||
text: paragraphText,
|
text: paragraphText,
|
||||||
originalText: paragraphText,
|
originalText: paragraphText,
|
||||||
bounds: mergedBounds,
|
bounds: mergedBounds,
|
||||||
|
childLineGroups: allLines,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
@ -647,6 +741,8 @@ export const groupPageTextElements = (
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const pageWidth = valueOr(page.width, DEFAULT_PAGE_WIDTH);
|
||||||
|
|
||||||
const elements = page.textElements
|
const elements = page.textElements
|
||||||
.map(cloneTextElement)
|
.map(cloneTextElement)
|
||||||
.filter((element) => element.text !== null && element.text !== undefined);
|
.filter((element) => element.text !== null && element.text !== undefined);
|
||||||
@ -740,7 +836,7 @@ export const groupPageTextElements = (
|
|||||||
|
|
||||||
if (groupingMode === 'paragraph') {
|
if (groupingMode === 'paragraph') {
|
||||||
// Paragraph mode: always apply grouping
|
// Paragraph mode: always apply grouping
|
||||||
return groupLinesIntoParagraphs(lineGroups, metrics);
|
return groupLinesIntoParagraphs(lineGroups, pageWidth, metrics);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Auto mode: use heuristic to determine if we should group
|
// Auto mode: use heuristic to determine if we should group
|
||||||
@ -749,6 +845,11 @@ export const groupPageTextElements = (
|
|||||||
let totalWords = 0;
|
let totalWords = 0;
|
||||||
let longTextGroups = 0;
|
let longTextGroups = 0;
|
||||||
let totalGroups = 0;
|
let totalGroups = 0;
|
||||||
|
const wordCounts: number[] = [];
|
||||||
|
let fullWidthLines = 0;
|
||||||
|
|
||||||
|
// Define "full width" as extending to at least 70% of page width
|
||||||
|
const fullWidthThreshold = pageWidth * 0.7;
|
||||||
|
|
||||||
lineGroups.forEach((group) => {
|
lineGroups.forEach((group) => {
|
||||||
const text = (group.text || '').trim();
|
const text = (group.text || '').trim();
|
||||||
@ -760,14 +861,21 @@ export const groupPageTextElements = (
|
|||||||
const wordCount = text.split(/\s+/).filter((w) => w.length > 0).length;
|
const wordCount = text.split(/\s+/).filter((w) => w.length > 0).length;
|
||||||
|
|
||||||
totalWords += wordCount;
|
totalWords += wordCount;
|
||||||
|
wordCounts.push(wordCount);
|
||||||
|
|
||||||
if (lineCount > 1) {
|
if (lineCount > 1) {
|
||||||
multiLineGroups++;
|
multiLineGroups++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wordCount >= 5 || text.length >= 30) {
|
if (wordCount >= 10 || text.length >= 50) {
|
||||||
longTextGroups++;
|
longTextGroups++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if this line extends close to the right margin (paragraph-like)
|
||||||
|
const rightEdge = group.bounds.right;
|
||||||
|
if (rightEdge >= fullWidthThreshold) {
|
||||||
|
fullWidthLines++;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
if (totalGroups === 0) {
|
if (totalGroups === 0) {
|
||||||
@ -776,18 +884,65 @@ export const groupPageTextElements = (
|
|||||||
|
|
||||||
const avgWordsPerGroup = totalWords / totalGroups;
|
const avgWordsPerGroup = totalWords / totalGroups;
|
||||||
const longTextRatio = longTextGroups / totalGroups;
|
const longTextRatio = longTextGroups / totalGroups;
|
||||||
|
const fullWidthRatio = fullWidthLines / totalGroups;
|
||||||
|
|
||||||
const isParagraphPage =
|
// Calculate variance in line lengths (paragraphs have varying lengths, lists are uniform)
|
||||||
(multiLineGroups >= 2 && avgWordsPerGroup > 8) ||
|
const variance = wordCounts.reduce((sum, count) => {
|
||||||
avgWordsPerGroup > 12 ||
|
const diff = count - avgWordsPerGroup;
|
||||||
longTextRatio > 0.4;
|
return sum + diff * diff;
|
||||||
|
}, 0) / totalGroups;
|
||||||
|
const stdDev = Math.sqrt(variance);
|
||||||
|
const coefficientOfVariation = avgWordsPerGroup > 0 ? stdDev / avgWordsPerGroup : 0;
|
||||||
|
|
||||||
|
// Check each criterion
|
||||||
|
const criterion1 = avgWordsPerGroup > 5;
|
||||||
|
const criterion2 = longTextRatio > 0.4;
|
||||||
|
const criterion3 = coefficientOfVariation > 0.5 || fullWidthRatio > 0.6; // High variance OR many full-width lines = paragraph text
|
||||||
|
|
||||||
|
const isParagraphPage = criterion1 && criterion2 && criterion3;
|
||||||
|
|
||||||
|
// Log detection stats
|
||||||
|
console.log(`📄 Page ${pageIndex} Grouping Analysis (mode: ${groupingMode}):`);
|
||||||
|
console.log(` Stats:`);
|
||||||
|
console.log(` • Page width: ${pageWidth.toFixed(1)}pt (full-width threshold: ${fullWidthThreshold.toFixed(1)}pt)`);
|
||||||
|
console.log(` • Multi-line groups: ${multiLineGroups}`);
|
||||||
|
console.log(` • Total groups: ${totalGroups}`);
|
||||||
|
console.log(` • Total words: ${totalWords}`);
|
||||||
|
console.log(` • Long text groups (≥10 words or ≥50 chars): ${longTextGroups}`);
|
||||||
|
console.log(` • Full-width lines (≥70% page width): ${fullWidthLines}`);
|
||||||
|
console.log(` • Avg words per group: ${avgWordsPerGroup.toFixed(2)}`);
|
||||||
|
console.log(` • Long text ratio: ${(longTextRatio * 100).toFixed(1)}%`);
|
||||||
|
console.log(` • Full-width ratio: ${(fullWidthRatio * 100).toFixed(1)}%`);
|
||||||
|
console.log(` • Std deviation: ${stdDev.toFixed(2)}`);
|
||||||
|
console.log(` • Coefficient of variation: ${coefficientOfVariation.toFixed(2)}`);
|
||||||
|
console.log(` Criteria:`);
|
||||||
|
console.log(` 1. Avg Words Per Group: ${criterion1 ? '✅ PASS' : '❌ FAIL'}`);
|
||||||
|
console.log(` (${avgWordsPerGroup.toFixed(2)} > 5)`);
|
||||||
|
console.log(` 2. Long Text Ratio: ${criterion2 ? '✅ PASS' : '❌ FAIL'}`);
|
||||||
|
console.log(` (${(longTextRatio * 100).toFixed(1)}% > 40%)`);
|
||||||
|
console.log(` 3. Line Width Pattern: ${criterion3 ? '✅ PASS' : '❌ FAIL'}`);
|
||||||
|
console.log(` (CV ${coefficientOfVariation.toFixed(2)} > 0.5 OR ${(fullWidthRatio * 100).toFixed(1)}% > 60%)`);
|
||||||
|
console.log(` ${coefficientOfVariation > 0.5 ? '✓ High variance (varying line lengths)' : '✗ Low variance'} ${fullWidthRatio > 0.6 ? '✓ Many full-width lines (paragraph-like)' : '✗ Few full-width lines (list-like)'}`);
|
||||||
|
console.log(` Decision: ${isParagraphPage ? '📝 PARAGRAPH MODE' : '📋 LINE MODE'}`);
|
||||||
|
if (isParagraphPage) {
|
||||||
|
console.log(` Reason: All three criteria passed (AND logic)`);
|
||||||
|
} else {
|
||||||
|
const failedReasons = [];
|
||||||
|
if (!criterion1) failedReasons.push('low average words per group');
|
||||||
|
if (!criterion2) failedReasons.push('low ratio of long text groups');
|
||||||
|
if (!criterion3) failedReasons.push('low variance and few full-width lines (list-like structure)');
|
||||||
|
console.log(` Reason: ${failedReasons.join(', ')}`);
|
||||||
|
}
|
||||||
|
console.log('');
|
||||||
|
|
||||||
// Only apply paragraph grouping if it looks like a paragraph-heavy page
|
// Only apply paragraph grouping if it looks like a paragraph-heavy page
|
||||||
if (isParagraphPage) {
|
if (isParagraphPage) {
|
||||||
return groupLinesIntoParagraphs(lineGroups, metrics);
|
console.log(`🔀 Applying paragraph grouping to page ${pageIndex}`);
|
||||||
|
return groupLinesIntoParagraphs(lineGroups, pageWidth, metrics);
|
||||||
}
|
}
|
||||||
|
|
||||||
// For sparse pages, keep lines separate
|
// For sparse pages, keep lines separate
|
||||||
|
console.log(`📋 Keeping lines separate for page ${pageIndex}`);
|
||||||
return lineGroups;
|
return lineGroups;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -829,10 +984,28 @@ export const deepCloneDocument = (document: PdfJsonDocument): PdfJsonDocument =>
|
|||||||
};
|
};
|
||||||
|
|
||||||
export const pageDimensions = (page: PdfJsonPage | null | undefined): { width: number; height: number } => {
|
export const pageDimensions = (page: PdfJsonPage | null | undefined): { width: number; height: number } => {
|
||||||
return {
|
const width = valueOr(page?.width, DEFAULT_PAGE_WIDTH);
|
||||||
width: valueOr(page?.width, DEFAULT_PAGE_WIDTH),
|
const height = valueOr(page?.height, DEFAULT_PAGE_HEIGHT);
|
||||||
height: valueOr(page?.height, DEFAULT_PAGE_HEIGHT),
|
|
||||||
};
|
console.log(`📏 [pageDimensions] Calculating page size:`, {
|
||||||
|
hasPage: !!page,
|
||||||
|
rawWidth: page?.width,
|
||||||
|
rawHeight: page?.height,
|
||||||
|
mediaBox: page?.mediaBox,
|
||||||
|
cropBox: page?.cropBox,
|
||||||
|
rotation: page?.rotation,
|
||||||
|
calculatedWidth: width,
|
||||||
|
calculatedHeight: height,
|
||||||
|
DEFAULT_PAGE_WIDTH,
|
||||||
|
DEFAULT_PAGE_HEIGHT,
|
||||||
|
commonFormats: {
|
||||||
|
'US Letter': '612 × 792 pt',
|
||||||
|
'A4': '595 × 842 pt',
|
||||||
|
'Legal': '612 × 1008 pt',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return { width, height };
|
||||||
};
|
};
|
||||||
|
|
||||||
export const createMergedElement = (group: TextGroup): PdfJsonTextElement => {
|
export const createMergedElement = (group: TextGroup): PdfJsonTextElement => {
|
||||||
@ -1192,14 +1365,35 @@ export const areImageListsDifferent = (
|
|||||||
export const getDirtyPages = (
|
export const getDirtyPages = (
|
||||||
groupsByPage: TextGroup[][],
|
groupsByPage: TextGroup[][],
|
||||||
imagesByPage: PdfJsonImageElement[][],
|
imagesByPage: PdfJsonImageElement[][],
|
||||||
|
originalGroupsByPage: TextGroup[][],
|
||||||
originalImagesByPage: PdfJsonImageElement[][],
|
originalImagesByPage: PdfJsonImageElement[][],
|
||||||
): boolean[] => {
|
): boolean[] => {
|
||||||
return groupsByPage.map((groups, index) => {
|
return groupsByPage.map((groups, index) => {
|
||||||
|
// Check if any text was modified
|
||||||
const textDirty = groups.some((group) => group.text !== group.originalText);
|
const textDirty = groups.some((group) => group.text !== group.originalText);
|
||||||
|
|
||||||
|
// Check if any groups were deleted by comparing with original groups
|
||||||
|
const originalGroups = originalGroupsByPage[index] ?? [];
|
||||||
|
const groupCountChanged = groups.length !== originalGroups.length;
|
||||||
|
|
||||||
const imageDirty = areImageListsDifferent(
|
const imageDirty = areImageListsDifferent(
|
||||||
imagesByPage[index] ?? [],
|
imagesByPage[index] ?? [],
|
||||||
originalImagesByPage[index] ?? [],
|
originalImagesByPage[index] ?? [],
|
||||||
);
|
);
|
||||||
return textDirty || imageDirty;
|
|
||||||
|
const isDirty = textDirty || groupCountChanged || imageDirty;
|
||||||
|
|
||||||
|
if (groupCountChanged || textDirty) {
|
||||||
|
console.log(`📄 Page ${index} dirty check:`, {
|
||||||
|
textDirty,
|
||||||
|
groupCountChanged,
|
||||||
|
originalGroupsLength: originalGroups.length,
|
||||||
|
currentGroupsLength: groups.length,
|
||||||
|
imageDirty,
|
||||||
|
isDirty,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return isDirty;
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user