diff --git a/frontend/src/core/components/tools/compare/CompareDocumentPane.tsx b/frontend/src/core/components/tools/compare/CompareDocumentPane.tsx index 9579c0d4a..26b5c4b92 100644 --- a/frontend/src/core/components/tools/compare/CompareDocumentPane.tsx +++ b/frontend/src/core/components/tools/compare/CompareDocumentPane.tsx @@ -203,8 +203,12 @@ const CompareDocumentPane = ({ if (!dragRef.current.active || dragRef.current.page !== page.pageNumber) return; const dx = e.clientX - dragRef.current.startX; const dy = e.clientY - dragRef.current.startY; - const maxX = Math.max(0, Math.round(baseWidth * innerScale - containerWidth)); - const maxY = Math.max(0, Math.round(baseHeight * innerScale - containerHeight)); + // Clamp panning based on the actual rendered content size. + // The inner layer is width/height of the container, then scaled by innerScale. + const contentWidth = Math.max(0, Math.round(containerWidth * innerScale)); + const contentHeight = Math.max(0, Math.round(containerHeight * innerScale)); + const maxX = Math.max(0, contentWidth - Math.round(containerWidth)); + const maxY = Math.max(0, contentHeight - Math.round(containerHeight)); const candX = dragRef.current.startPanX - dx; const candY = dragRef.current.startPanY - dy; const next = { x: Math.max(0, Math.min(maxX, candX)), y: Math.max(0, Math.min(maxY, candY)) }; diff --git a/frontend/src/core/components/tools/compare/compare.ts b/frontend/src/core/components/tools/compare/compare.ts index 8dd7a0d85..1cb0e9590 100644 --- a/frontend/src/core/components/tools/compare/compare.ts +++ b/frontend/src/core/components/tools/compare/compare.ts @@ -107,9 +107,39 @@ export const computePageLayoutMetrics = (args: { const baseWidth = isStackedPortrait ? stackedWidth : Math.round(page.width * fit); const baseHeight = isStackedPortrait ? stackedHeight : Math.round(targetHeight); const containerMaxW = scrollRefWidth ?? viewportWidth; - const containerWidth = Math.min(baseWidth, Math.max(120, containerMaxW)); - const containerHeight = Math.round(baseHeight * (containerWidth / baseWidth)); - const innerScale = Math.max(1, zoom); + + // Container-first zooming with a stable baseline: + // Treat zoom=1 as "fit to available width" for the page's base size so + // the initial render is fully visible and centered (no cropping), regardless + // of rotation or pane/container width. When zoom < 1, shrink the container; + // when zoom > 1, keep the container at fit width and scale inner content. + const MIN_CONTAINER_WIDTH = 120; + const minScaleByWidth = MIN_CONTAINER_WIDTH / Math.max(1, baseWidth); + const fitScaleByContainer = containerMaxW / Math.max(1, baseWidth); + // Effective baseline scale used at zoom=1 (ensures at least the min width) + const baselineContainerScale = Math.max(minScaleByWidth, fitScaleByContainer); + // Lower bound the zoom so interactions remain stable + const desiredZoom = Math.max(0.1, zoom); + + let containerScale: number; + let innerScale: number; + if (desiredZoom >= 1) { + // At or above baseline: keep container at fit width and scale inner content + containerScale = baselineContainerScale; + innerScale = +Math.max(0.1, desiredZoom).toFixed(4); + } else { + // Below baseline: shrink container proportionally, do not upscale inner + const scaled = baselineContainerScale * desiredZoom; + // Never smaller than minimum readable width + containerScale = Math.max(minScaleByWidth, scaled); + innerScale = 1; + } + + const containerWidth = Math.max( + MIN_CONTAINER_WIDTH, + Math.min(containerMaxW, Math.round(baseWidth * containerScale)) + ); + const containerHeight = Math.round(baseHeight * (containerWidth / Math.max(1, baseWidth))); return { targetHeight, diff --git a/frontend/src/core/hooks/tools/compare/operationUtils.ts b/frontend/src/core/hooks/tools/compare/operationUtils.ts index f180fbb18..552ee8dd3 100644 --- a/frontend/src/core/hooks/tools/compare/operationUtils.ts +++ b/frontend/src/core/hooks/tools/compare/operationUtils.ts @@ -259,9 +259,9 @@ export const createSummaryFile = (result: CompareResultData): File => { export const clamp = (value: number): number => Math.min(1, Math.max(0, value)); -export const getWorkerErrorCode = (value: unknown): 'EMPTY_TEXT' | 'TOO_LARGE' | undefined => { +export const getWorkerErrorCode = (value: unknown): 'EMPTY_TEXT' | 'TOO_LARGE' | 'TOO_DISSIMILAR' | undefined => { if (typeof value === 'object' && value !== null && 'code' in value) { - const potentialCode = (value as { code?: 'EMPTY_TEXT' | 'TOO_LARGE' }).code; + const potentialCode = (value as { code?: 'EMPTY_TEXT' | 'TOO_LARGE' | 'TOO_DISSIMILAR' }).code; return potentialCode; } return undefined; diff --git a/frontend/src/core/hooks/tools/compare/useCompareOperation.ts b/frontend/src/core/hooks/tools/compare/useCompareOperation.ts index c85d9bc60..79bf20cce 100644 --- a/frontend/src/core/hooks/tools/compare/useCompareOperation.ts +++ b/frontend/src/core/hooks/tools/compare/useCompareOperation.ts @@ -36,7 +36,7 @@ export interface CompareOperationHook extends ToolOperationHook { const { t } = useTranslation(); - const { selectors } = useFileContext(); + const { selectors, actions: fileActions } = useFileContext(); const workerRef = useRef(null); const previousUrl = useRef(null); const activeRunIdRef = useRef(0); @@ -53,6 +53,8 @@ export const useCompareOperation = (): CompareOperationHook => { const [result, setResult] = useState(null); const [warnings, setWarnings] = useState([]); const longRunningToastIdRef = useRef(null); + const dissimilarityToastIdRef = useRef(null); + const dissimilarityToastShownRef = useRef(false); const ensureWorker = useCallback(() => { if (!workerRef.current) { @@ -139,7 +141,7 @@ export const useCompareOperation = (): CompareOperationHook => { dismissToast(longRunningToastIdRef.current); longRunningToastIdRef.current = null; } - const error: Error & { code?: 'EMPTY_TEXT' | 'TOO_LARGE' } = new Error(message.message); + const error: Error & { code?: 'EMPTY_TEXT' | 'TOO_LARGE' | 'TOO_DISSIMILAR' } = new Error(message.message); error.code = message.code; reject(error); break; @@ -228,6 +230,10 @@ export const useCompareOperation = (): CompareOperationHook => { 'compare.no.text.message', 'One or both of the selected PDFs have no text content. Please choose PDFs with text for comparison.' ), + tooDissimilarMessage: t( + 'compare.too.dissimilar.message', + 'These documents appear highly dissimilar. Comparison was stopped to save time.' + ), }; const operationStart = performance.now(); @@ -271,10 +277,57 @@ export const useCompareOperation = (): CompareOperationHook => { longRunningToastIdRef.current = toastId || null; } + // Heuristic: surface an early warning toast when we observe a very high ratio of differences + const EARLY_TOAST_MIN_TOKENS = 15000; // wait for some signal before warning + const EARLY_TOAST_DIFF_RATIO = 0.8; // 80% added/removed vs unchanged + let observedAddedRemoved = 0; + let observedUnchanged = 0; + + const handleEarlyDissimilarity = () => { + if (dissimilarityToastShownRef.current || dissimilarityToastIdRef.current) return; + const toastId = alert({ + alertType: 'warning', + title: t('compare.earlyDissimilarity.title', 'These PDFs look highly different'), + body: t( + 'compare.earlyDissimilarity.body', + "We're seeing very few similarities so far. You can stop the comparison if these aren't related documents." + ), + location: 'bottom-right' as ToastLocation, + isPersistentPopup: true, + expandable: false, + buttonText: t('compare.earlyDissimilarity.stopButton', 'Stop comparison'), + buttonCallback: () => { + try { cancelOperation(); } catch {} + try { window.dispatchEvent(new CustomEvent('compare:clear-selected')); } catch {} + if (dissimilarityToastIdRef.current) { + dismissToast(dissimilarityToastIdRef.current); + dissimilarityToastIdRef.current = null; + } + }, + }); + dissimilarityToastIdRef.current = toastId || null; + dissimilarityToastShownRef.current = true; + }; + const { tokens, stats, warnings: workerWarnings } = await runCompareWorker( baseFiltered.tokens, comparisonFiltered.tokens, - warningMessages + warningMessages, + (chunk) => { + // Incremental ratio tracking for early warning + for (const tok of chunk) { + if (tok.type === 'unchanged') observedUnchanged += 1; + else observedAddedRemoved += 1; + } + const seen = observedAddedRemoved + observedUnchanged; + if ( + !dissimilarityToastShownRef.current && + seen >= EARLY_TOAST_MIN_TOKENS && + observedAddedRemoved / Math.max(1, seen) >= EARLY_TOAST_DIFF_RATIO + ) { + handleEarlyDissimilarity(); + } + } ); if (cancelledRef.current || activeRunIdRef.current !== runId) return; @@ -409,6 +462,11 @@ export const useCompareOperation = (): CompareOperationHook => { dismissToast(longRunningToastIdRef.current); longRunningToastIdRef.current = null; } + if (dissimilarityToastIdRef.current) { + dismissToast(dissimilarityToastIdRef.current); + dissimilarityToastIdRef.current = null; + } + dissimilarityToastShownRef.current = false; } }, [cleanupDownloadUrl, runCompareWorker, selectors, t] diff --git a/frontend/src/core/tools/Compare.tsx b/frontend/src/core/tools/Compare.tsx index 41d9bd801..9e6a50c01 100644 --- a/frontend/src/core/tools/Compare.tsx +++ b/frontend/src/core/tools/Compare.tsx @@ -53,6 +53,25 @@ const Compare = (props: BaseToolProps) => { const compareIcon = useMemo(() => , []); const [swapConfirmOpen, setSwapConfirmOpen] = useState(false); const [clearConfirmOpen, setClearConfirmOpen] = useState(false); + const performClearSelected = useCallback(() => { + try { base.operation.cancelOperation(); } catch { console.error('Failed to cancel operation'); } + try { base.operation.resetResults(); } catch { console.error('Failed to reset results'); } + base.params.setParameters(prev => ({ ...prev, baseFileId: null, comparisonFileId: null })); + try { fileActions.clearSelections(); } catch { console.error('Failed to clear selections'); } + clearCustomWorkbenchViewData(CUSTOM_VIEW_ID); + navigationActions.setWorkbench(getDefaultWorkbench()); + }, [base.operation, base.params, clearCustomWorkbenchViewData, fileActions, navigationActions]); + + useEffect(() => { + const handler = () => { + performClearSelected(); + }; + window.addEventListener('compare:clear-selected', handler as unknown as EventListener); + return () => { + window.removeEventListener('compare:clear-selected', handler as unknown as EventListener); + }; + }, [performClearSelected]); + useEffect(() => { registerCustomWorkbenchView({ @@ -500,12 +519,7 @@ const Compare = (props: BaseToolProps) => { variant="filled" onClick={() => { setClearConfirmOpen(false); - try { base.operation.cancelOperation(); } catch {console.error('Failed to cancel operation');} - try { base.operation.resetResults(); } catch {console.error('Failed to reset results');} - base.params.setParameters(prev => ({ ...prev, baseFileId: null, comparisonFileId: null })); - try { fileActions.clearSelections(); } catch {console.error('Failed to clear selections');} - clearCustomWorkbenchViewData(CUSTOM_VIEW_ID); - navigationActions.setWorkbench(getDefaultWorkbench()); + performClearSelected(); }} > {t('compare.clear.confirm', 'Clear and return')} diff --git a/frontend/src/core/types/compare.ts b/frontend/src/core/types/compare.ts index b06e852a6..65b6ea12c 100644 --- a/frontend/src/core/types/compare.ts +++ b/frontend/src/core/types/compare.ts @@ -98,6 +98,7 @@ export interface CompareWorkerWarnings { complexMessage?: string; tooLargeMessage?: string; emptyTextMessage?: string; + tooDissimilarMessage?: string; } export interface CompareWorkerRequest { @@ -110,6 +111,14 @@ export interface CompareWorkerRequest { batchSize?: number; complexThreshold?: number; maxWordThreshold?: number; + // Early-stop and runtime controls (optional) + earlyStopEnabled?: boolean; + minJaccardUnigram?: number; + minJaccardBigram?: number; + minTokensForEarlyStop?: number; + sampleLimit?: number; + runtimeMaxProcessedTokens?: number; + runtimeMinUnchangedRatio?: number; }; }; } @@ -134,7 +143,7 @@ export type CompareWorkerResponse = | { type: 'error'; message: string; - code?: 'EMPTY_TEXT' | 'TOO_LARGE'; + code?: 'EMPTY_TEXT' | 'TOO_LARGE' | 'TOO_DISSIMILAR'; }; export interface CompareDocumentPaneProps { diff --git a/frontend/src/workers/compareWorker.ts b/frontend/src/workers/compareWorker.ts index ba39860aa..41af4ee77 100644 --- a/frontend/src/workers/compareWorker.ts +++ b/frontend/src/workers/compareWorker.ts @@ -12,6 +12,18 @@ const DEFAULT_SETTINGS = { batchSize: 5000, complexThreshold: 25000, maxWordThreshold: 60000, + // Early stop configuration + earlyStopEnabled: true, + // Jaccard thresholds for quick prefilter (unigram/bigram) + minJaccardUnigram: 0.005, + minJaccardBigram: 0.003, + // Only consider early stop when docs are reasonably large + minTokensForEarlyStop: 20000, + // Sampling cap for similarity estimation + sampleLimit: 50000, + // Runtime stop-loss during chunked diff + runtimeMaxProcessedTokens: 150000, + runtimeMinUnchangedRatio: 0.001, }; const buildMatrix = (words1: string[], words2: string[]) => { @@ -87,7 +99,8 @@ const chunkedDiff = ( words1: string[], words2: string[], chunkSize: number, - emit: (tokens: CompareDiffToken[]) => void + emit: (tokens: CompareDiffToken[]) => void, + runtimeStop?: { maxProcessedTokens: number; minUnchangedRatio: number } ) => { if (words1.length === 0 && words2.length === 0) { return; @@ -123,6 +136,12 @@ const chunkedDiff = ( let index2 = 0; let buffer1: string[] = []; let buffer2: string[] = []; + let totalProcessedBase = 0; + let totalProcessedComp = 0; + let totalUnchanged = 0; + + const countUnchanged = (segment: CompareDiffToken[]) => + segment.reduce((acc, token) => acc + (token.type === 'unchanged' ? 1 : 0), 0); const flushRemainder = () => { if (buffer1.length === 0 && buffer2.length === 0) { @@ -233,6 +252,24 @@ const chunkedDiff = ( buffer1 = window1.slice(baseConsumed); buffer2 = window2.slice(comparisonConsumed); + // Update runtime counters and early stop if necessary + totalProcessedBase += baseConsumed; + totalProcessedComp += comparisonConsumed; + totalUnchanged += countUnchanged(commitTokens); + + if (runtimeStop) { + const processedTotal = totalProcessedBase + totalProcessedComp; + if (processedTotal >= runtimeStop.maxProcessedTokens) { + const unchangedRatio = totalUnchanged / Math.max(1, processedTotal); + if (unchangedRatio < runtimeStop.minUnchangedRatio) { + // Signal early termination for extreme dissimilarity + const err = new Error('EARLY_STOP_TOO_DISSIMILAR'); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (err as any).__earlyStop = true; + throw err; + } + } + } if (reachedEnd) { flushRemainder(); @@ -264,6 +301,40 @@ const chunkedDiff = ( flushRemainder(); }; +// Fast similarity estimation using sampled unigrams and bigrams with Jaccard +const buildSampledSet = (tokens: string[], sampleLimit: number, ngram: 1 | 2): Set => { + const result = new Set(); + if (tokens.length === 0) return result; + const stride = Math.max(1, Math.ceil(tokens.length / sampleLimit)); + if (ngram === 1) { + for (let i = 0; i < tokens.length; i += stride) { + const t = tokens[i]; + if (t) result.add(t); + } + return result; + } + // ngram === 2 + for (let i = 0; i + 1 < tokens.length; i += stride) { + const a = tokens[i]; + const b = tokens[i + 1]; + if (a && b) result.add(`${a}|${b}`); + } + return result; +}; + +const jaccard = (a: Set, b: Set): number => { + if (a.size === 0 && b.size === 0) return 1; + if (a.size === 0 || b.size === 0) return 0; + let intersection = 0; + const smaller = a.size <= b.size ? a : b; + const larger = a.size <= b.size ? b : a; + for (const v of smaller) { + if (larger.has(v)) intersection += 1; + } + const union = a.size + b.size - intersection; + return union > 0 ? intersection / union : 0; +}; + self.onmessage = (event: MessageEvent) => { const { data } = event; if (!data || data.type !== 'compare') { @@ -275,6 +346,13 @@ self.onmessage = (event: MessageEvent) => { batchSize = DEFAULT_SETTINGS.batchSize, complexThreshold = DEFAULT_SETTINGS.complexThreshold, maxWordThreshold = DEFAULT_SETTINGS.maxWordThreshold, + earlyStopEnabled = DEFAULT_SETTINGS.earlyStopEnabled, + minJaccardUnigram = DEFAULT_SETTINGS.minJaccardUnigram, + minJaccardBigram = DEFAULT_SETTINGS.minJaccardBigram, + minTokensForEarlyStop = DEFAULT_SETTINGS.minTokensForEarlyStop, + sampleLimit = DEFAULT_SETTINGS.sampleLimit, + runtimeMaxProcessedTokens = DEFAULT_SETTINGS.runtimeMaxProcessedTokens, + runtimeMinUnchangedRatio = DEFAULT_SETTINGS.runtimeMinUnchangedRatio, } = settings ?? {}; if (!baseTokens || !comparisonTokens || baseTokens.length === 0 || comparisonTokens.length === 0) { @@ -306,22 +384,61 @@ self.onmessage = (event: MessageEvent) => { self.postMessage(warningResponse); } - const start = performance.now(); - chunkedDiff( - baseTokens, - comparisonTokens, - batchSize, - (tokens) => { - if (tokens.length === 0) { - return; - } + // Quick prefilter to avoid heavy diff on extremely dissimilar large docs + if (earlyStopEnabled && Math.min(baseTokens.length, comparisonTokens.length) >= minTokensForEarlyStop) { + const set1u = buildSampledSet(baseTokens, sampleLimit, 1); + const set2u = buildSampledSet(comparisonTokens, sampleLimit, 1); + const jUni = jaccard(set1u, set2u); + const set1b = buildSampledSet(baseTokens, sampleLimit, 2); + const set2b = buildSampledSet(comparisonTokens, sampleLimit, 2); + const jBi = jaccard(set1b, set2b); + if (jUni < minJaccardUnigram && jBi < minJaccardBigram) { const response: CompareWorkerResponse = { - type: 'chunk', - tokens, + type: 'error', + message: + warnings.tooDissimilarMessage ?? + 'These documents appear highly dissimilar. Comparison was stopped to save time.', + code: 'TOO_DISSIMILAR', }; self.postMessage(response); + return; } - ); + } + + const start = performance.now(); + try { + chunkedDiff( + baseTokens, + comparisonTokens, + batchSize, + (tokens) => { + if (tokens.length === 0) { + return; + } + const response: CompareWorkerResponse = { + type: 'chunk', + tokens, + }; + self.postMessage(response); + }, + { maxProcessedTokens: runtimeMaxProcessedTokens, minUnchangedRatio: runtimeMinUnchangedRatio } + ); + } catch (err) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const anyErr = err as any; + if (anyErr && (anyErr.__earlyStop || anyErr?.message === 'EARLY_STOP_TOO_DISSIMILAR')) { + const response: CompareWorkerResponse = { + type: 'error', + message: + warnings.tooDissimilarMessage ?? + 'These documents appear highly dissimilar. Comparison was stopped to save time.', + code: 'TOO_DISSIMILAR', + }; + self.postMessage(response); + return; + } + throw err; + } const durationMs = performance.now() - start; const response: CompareWorkerResponse = {