From 86caa3c327036254f0e71005da484ab3d4827879 Mon Sep 17 00:00:00 2001 From: EthanHealy01 Date: Tue, 28 Oct 2025 19:11:25 +0000 Subject: [PATCH] add chunk sizing --- .../tools/compare/useCompareOperation.ts | 42 ++++++++++++++ frontend/src/workers/compareWorker.ts | 58 +++++++++++++++---- 2 files changed, 90 insertions(+), 10 deletions(-) diff --git a/frontend/src/hooks/tools/compare/useCompareOperation.ts b/frontend/src/hooks/tools/compare/useCompareOperation.ts index c4ae137c5..e546ec090 100644 --- a/frontend/src/hooks/tools/compare/useCompareOperation.ts +++ b/frontend/src/hooks/tools/compare/useCompareOperation.ts @@ -22,6 +22,10 @@ import { getWorkerErrorCode, filterTokensForDiff, } from './operationUtils'; +import { alert, dismissToast } from '../../../components/toast'; +import type { ToastLocation } from '../../../components/toast/types'; + +const LONG_RUNNING_PAGE_THRESHOLD = 2000; export interface CompareOperationHook extends ToolOperationHook { result: CompareResultData | null; @@ -44,6 +48,7 @@ export const useCompareOperation = (): CompareOperationHook => { const [downloadFilename, setDownloadFilename] = useState(''); const [result, setResult] = useState(null); const [warnings, setWarnings] = useState([]); + const longRunningToastIdRef = useRef(null); const ensureWorker = useCallback(() => { if (!workerRef.current) { @@ -105,6 +110,10 @@ export const useCompareOperation = (): CompareOperationHook => { } case 'success': cleanup(); + if (longRunningToastIdRef.current) { + dismissToast(longRunningToastIdRef.current); + longRunningToastIdRef.current = null; + } resolve({ tokens: collectedTokens, stats: message.stats, @@ -116,6 +125,10 @@ export const useCompareOperation = (): CompareOperationHook => { break; case 'error': { cleanup(); + if (longRunningToastIdRef.current) { + dismissToast(longRunningToastIdRef.current); + longRunningToastIdRef.current = null; + } const error: Error & { code?: 'EMPTY_TEXT' | 'TOO_LARGE' } = new Error(message.message); error.code = message.code; reject(error); @@ -217,6 +230,27 @@ export const useCompareOperation = (): CompareOperationHook => { const baseFiltered = filterTokensForDiff(baseContent.tokens, baseContent.metadata); const comparisonFiltered = filterTokensForDiff(comparisonContent.tokens, comparisonContent.metadata); + const combinedPageCount = + (baseContent.pageSizes?.length ?? 0) + (comparisonContent.pageSizes?.length ?? 0); + + if ( + combinedPageCount >= LONG_RUNNING_PAGE_THRESHOLD && + !longRunningToastIdRef.current + ) { + const toastId = alert({ + alertType: 'neutral', + title: t('compare.longJob.title', 'Large comparison in progress'), + body: t( + 'compare.longJob.body', + 'These PDFs together exceed 2,000 pages. Processing can take several minutes.' + ), + location: 'bottom-right' as ToastLocation, + isPersistentPopup: true, + expandable: false, + }); + longRunningToastIdRef.current = toastId || null; + } + const { tokens, stats, warnings: workerWarnings } = await runCompareWorker( baseFiltered.tokens, comparisonFiltered.tokens, @@ -349,6 +383,10 @@ export const useCompareOperation = (): CompareOperationHook => { const duration = performance.now() - operationStart; setStatus((prev) => (prev ? `${prev} (${Math.round(duration)} ms)` : prev)); setIsLoading(false); + if (longRunningToastIdRef.current) { + dismissToast(longRunningToastIdRef.current); + longRunningToastIdRef.current = null; + } } }, [cleanupDownloadUrl, runCompareWorker, selectors, t] @@ -372,6 +410,10 @@ export const useCompareOperation = (): CompareOperationHook => { workerRef.current.terminate(); workerRef.current = null; } + if (longRunningToastIdRef.current) { + dismissToast(longRunningToastIdRef.current); + longRunningToastIdRef.current = null; + } }; }, [cleanupDownloadUrl]); diff --git a/frontend/src/workers/compareWorker.ts b/frontend/src/workers/compareWorker.ts index b5dccf920..1fd983951 100644 --- a/frontend/src/workers/compareWorker.ts +++ b/frontend/src/workers/compareWorker.ts @@ -93,8 +93,31 @@ const chunkedDiff = ( return; } - const maxWindow = Math.max(chunkSize * 6, chunkSize + 512); - const minCommit = Math.max(1, Math.floor(chunkSize * 0.1)); + const baseChunkSize = Math.max(1, chunkSize); + let dynamicChunkSize = baseChunkSize; + const baseMaxWindow = Math.max(baseChunkSize * 6, baseChunkSize + 512); + let dynamicMaxWindow = baseMaxWindow; + let dynamicMinCommit = Math.max(1, Math.floor(dynamicChunkSize * 0.1)); + let dynamicStep = Math.max(64, Math.floor(dynamicChunkSize * 0.5)); + let stallIterations = 0; + + const increaseChunkSizes = () => { + const maxChunkSize = baseChunkSize * 8; + if (dynamicChunkSize >= maxChunkSize) { + return; + } + const nextChunk = Math.min( + maxChunkSize, + Math.max(dynamicChunkSize + dynamicStep, Math.floor(dynamicChunkSize * 1.5)) + ); + if (nextChunk === dynamicChunkSize) { + return; + } + dynamicChunkSize = nextChunk; + dynamicMaxWindow = Math.max(dynamicMaxWindow, Math.max(dynamicChunkSize * 6, dynamicChunkSize + 512)); + dynamicMinCommit = Math.max(1, Math.floor(dynamicChunkSize * 0.1)); + dynamicStep = Math.max(64, Math.floor(dynamicChunkSize * 0.5)); + }; let index1 = 0; let index2 = 0; @@ -124,7 +147,7 @@ const chunkedDiff = ( const remaining1 = Math.max(0, words1.length - index1); const remaining2 = Math.max(0, words2.length - index2); - let windowSize = Math.max(chunkSize, buffer1.length, buffer2.length); + let windowSize = Math.max(dynamicChunkSize, buffer1.length, buffer2.length); let window1: string[] = []; let window2: string[] = []; let chunkTokens: CompareDiffToken[] = []; @@ -153,8 +176,8 @@ const chunkedDiff = ( index2 + take2 >= words2.length; const windowTooLarge = - window1.length >= maxWindow || - window2.length >= maxWindow; + window1.length >= dynamicMaxWindow || + window2.length >= dynamicMaxWindow; if (lastStableIndex >= 0 || reachedEnd || windowTooLarge) { break; @@ -168,8 +191,8 @@ const chunkedDiff = ( } windowSize = Math.min( - maxWindow, - windowSize + Math.max(64, Math.floor(chunkSize * 0.5)) + dynamicMaxWindow, + windowSize + dynamicStep ); } @@ -178,7 +201,12 @@ const chunkedDiff = ( flushRemainder(); return; } - windowSize = Math.min(windowSize + Math.max(64, Math.floor(chunkSize * 0.5)), maxWindow); + windowSize = Math.min(windowSize + dynamicStep, dynamicMaxWindow); + stallIterations += 1; + if (stallIterations >= 3) { + increaseChunkSizes(); + stallIterations = 0; + } continue; } @@ -186,7 +214,7 @@ const chunkedDiff = ( if (commitIndex < 0) { commitIndex = reachedEnd ? chunkTokens.length - 1 - : Math.min(chunkTokens.length - 1, minCommit - 1); + : Math.min(chunkTokens.length - 1, dynamicMinCommit - 1); } const commitTokens = commitIndex >= 0 ? chunkTokens.slice(0, commitIndex + 1) : []; @@ -211,7 +239,12 @@ const chunkedDiff = ( break; } - // Prevent runaway buffers: if we made no progress, forcibly consume one token + if (commitTokens.length < dynamicMinCommit) { + stallIterations += 1; + } else { + stallIterations = 0; + } + if (commitTokens.length === 0 && buffer1.length + buffer2.length > 0) { if (buffer1.length > 0 && index1 < words1.length) { buffer1 = buffer1.slice(1); @@ -221,6 +254,11 @@ const chunkedDiff = ( index2 += 1; } } + + if (stallIterations >= 3) { + increaseChunkSizes(); + stallIterations = 0; + } } flushRemainder();