add chunk sizing

This commit is contained in:
EthanHealy01 2025-10-28 19:11:25 +00:00
parent 7c12d62320
commit 86caa3c327
2 changed files with 90 additions and 10 deletions

View File

@ -22,6 +22,10 @@ import {
getWorkerErrorCode,
filterTokensForDiff,
} from './operationUtils';
import { alert, dismissToast } from '../../../components/toast';
import type { ToastLocation } from '../../../components/toast/types';
const LONG_RUNNING_PAGE_THRESHOLD = 2000;
export interface CompareOperationHook extends ToolOperationHook<CompareParameters> {
result: CompareResultData | null;
@ -44,6 +48,7 @@ export const useCompareOperation = (): CompareOperationHook => {
const [downloadFilename, setDownloadFilename] = useState('');
const [result, setResult] = useState<CompareResultData | null>(null);
const [warnings, setWarnings] = useState<string[]>([]);
const longRunningToastIdRef = useRef<string | null>(null);
const ensureWorker = useCallback(() => {
if (!workerRef.current) {
@ -105,6 +110,10 @@ export const useCompareOperation = (): CompareOperationHook => {
}
case 'success':
cleanup();
if (longRunningToastIdRef.current) {
dismissToast(longRunningToastIdRef.current);
longRunningToastIdRef.current = null;
}
resolve({
tokens: collectedTokens,
stats: message.stats,
@ -116,6 +125,10 @@ export const useCompareOperation = (): CompareOperationHook => {
break;
case 'error': {
cleanup();
if (longRunningToastIdRef.current) {
dismissToast(longRunningToastIdRef.current);
longRunningToastIdRef.current = null;
}
const error: Error & { code?: 'EMPTY_TEXT' | 'TOO_LARGE' } = new Error(message.message);
error.code = message.code;
reject(error);
@ -217,6 +230,27 @@ export const useCompareOperation = (): CompareOperationHook => {
const baseFiltered = filterTokensForDiff(baseContent.tokens, baseContent.metadata);
const comparisonFiltered = filterTokensForDiff(comparisonContent.tokens, comparisonContent.metadata);
const combinedPageCount =
(baseContent.pageSizes?.length ?? 0) + (comparisonContent.pageSizes?.length ?? 0);
if (
combinedPageCount >= LONG_RUNNING_PAGE_THRESHOLD &&
!longRunningToastIdRef.current
) {
const toastId = alert({
alertType: 'neutral',
title: t('compare.longJob.title', 'Large comparison in progress'),
body: t(
'compare.longJob.body',
'These PDFs together exceed 2,000 pages. Processing can take several minutes.'
),
location: 'bottom-right' as ToastLocation,
isPersistentPopup: true,
expandable: false,
});
longRunningToastIdRef.current = toastId || null;
}
const { tokens, stats, warnings: workerWarnings } = await runCompareWorker(
baseFiltered.tokens,
comparisonFiltered.tokens,
@ -349,6 +383,10 @@ export const useCompareOperation = (): CompareOperationHook => {
const duration = performance.now() - operationStart;
setStatus((prev) => (prev ? `${prev} (${Math.round(duration)} ms)` : prev));
setIsLoading(false);
if (longRunningToastIdRef.current) {
dismissToast(longRunningToastIdRef.current);
longRunningToastIdRef.current = null;
}
}
},
[cleanupDownloadUrl, runCompareWorker, selectors, t]
@ -372,6 +410,10 @@ export const useCompareOperation = (): CompareOperationHook => {
workerRef.current.terminate();
workerRef.current = null;
}
if (longRunningToastIdRef.current) {
dismissToast(longRunningToastIdRef.current);
longRunningToastIdRef.current = null;
}
};
}, [cleanupDownloadUrl]);

View File

@ -93,8 +93,31 @@ const chunkedDiff = (
return;
}
const maxWindow = Math.max(chunkSize * 6, chunkSize + 512);
const minCommit = Math.max(1, Math.floor(chunkSize * 0.1));
const baseChunkSize = Math.max(1, chunkSize);
let dynamicChunkSize = baseChunkSize;
const baseMaxWindow = Math.max(baseChunkSize * 6, baseChunkSize + 512);
let dynamicMaxWindow = baseMaxWindow;
let dynamicMinCommit = Math.max(1, Math.floor(dynamicChunkSize * 0.1));
let dynamicStep = Math.max(64, Math.floor(dynamicChunkSize * 0.5));
let stallIterations = 0;
const increaseChunkSizes = () => {
const maxChunkSize = baseChunkSize * 8;
if (dynamicChunkSize >= maxChunkSize) {
return;
}
const nextChunk = Math.min(
maxChunkSize,
Math.max(dynamicChunkSize + dynamicStep, Math.floor(dynamicChunkSize * 1.5))
);
if (nextChunk === dynamicChunkSize) {
return;
}
dynamicChunkSize = nextChunk;
dynamicMaxWindow = Math.max(dynamicMaxWindow, Math.max(dynamicChunkSize * 6, dynamicChunkSize + 512));
dynamicMinCommit = Math.max(1, Math.floor(dynamicChunkSize * 0.1));
dynamicStep = Math.max(64, Math.floor(dynamicChunkSize * 0.5));
};
let index1 = 0;
let index2 = 0;
@ -124,7 +147,7 @@ const chunkedDiff = (
const remaining1 = Math.max(0, words1.length - index1);
const remaining2 = Math.max(0, words2.length - index2);
let windowSize = Math.max(chunkSize, buffer1.length, buffer2.length);
let windowSize = Math.max(dynamicChunkSize, buffer1.length, buffer2.length);
let window1: string[] = [];
let window2: string[] = [];
let chunkTokens: CompareDiffToken[] = [];
@ -153,8 +176,8 @@ const chunkedDiff = (
index2 + take2 >= words2.length;
const windowTooLarge =
window1.length >= maxWindow ||
window2.length >= maxWindow;
window1.length >= dynamicMaxWindow ||
window2.length >= dynamicMaxWindow;
if (lastStableIndex >= 0 || reachedEnd || windowTooLarge) {
break;
@ -168,8 +191,8 @@ const chunkedDiff = (
}
windowSize = Math.min(
maxWindow,
windowSize + Math.max(64, Math.floor(chunkSize * 0.5))
dynamicMaxWindow,
windowSize + dynamicStep
);
}
@ -178,7 +201,12 @@ const chunkedDiff = (
flushRemainder();
return;
}
windowSize = Math.min(windowSize + Math.max(64, Math.floor(chunkSize * 0.5)), maxWindow);
windowSize = Math.min(windowSize + dynamicStep, dynamicMaxWindow);
stallIterations += 1;
if (stallIterations >= 3) {
increaseChunkSizes();
stallIterations = 0;
}
continue;
}
@ -186,7 +214,7 @@ const chunkedDiff = (
if (commitIndex < 0) {
commitIndex = reachedEnd
? chunkTokens.length - 1
: Math.min(chunkTokens.length - 1, minCommit - 1);
: Math.min(chunkTokens.length - 1, dynamicMinCommit - 1);
}
const commitTokens = commitIndex >= 0 ? chunkTokens.slice(0, commitIndex + 1) : [];
@ -211,7 +239,12 @@ const chunkedDiff = (
break;
}
// Prevent runaway buffers: if we made no progress, forcibly consume one token
if (commitTokens.length < dynamicMinCommit) {
stallIterations += 1;
} else {
stallIterations = 0;
}
if (commitTokens.length === 0 && buffer1.length + buffer2.length > 0) {
if (buffer1.length > 0 && index1 < words1.length) {
buffer1 = buffer1.slice(1);
@ -221,6 +254,11 @@ const chunkedDiff = (
index2 += 1;
}
}
if (stallIterations >= 3) {
increaseChunkSizes();
stallIterations = 0;
}
}
flushRemainder();