mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-12-30 20:06:30 +01:00
add chunk sizing
This commit is contained in:
parent
7c12d62320
commit
86caa3c327
@ -22,6 +22,10 @@ import {
|
||||
getWorkerErrorCode,
|
||||
filterTokensForDiff,
|
||||
} from './operationUtils';
|
||||
import { alert, dismissToast } from '../../../components/toast';
|
||||
import type { ToastLocation } from '../../../components/toast/types';
|
||||
|
||||
const LONG_RUNNING_PAGE_THRESHOLD = 2000;
|
||||
|
||||
export interface CompareOperationHook extends ToolOperationHook<CompareParameters> {
|
||||
result: CompareResultData | null;
|
||||
@ -44,6 +48,7 @@ export const useCompareOperation = (): CompareOperationHook => {
|
||||
const [downloadFilename, setDownloadFilename] = useState('');
|
||||
const [result, setResult] = useState<CompareResultData | null>(null);
|
||||
const [warnings, setWarnings] = useState<string[]>([]);
|
||||
const longRunningToastIdRef = useRef<string | null>(null);
|
||||
|
||||
const ensureWorker = useCallback(() => {
|
||||
if (!workerRef.current) {
|
||||
@ -105,6 +110,10 @@ export const useCompareOperation = (): CompareOperationHook => {
|
||||
}
|
||||
case 'success':
|
||||
cleanup();
|
||||
if (longRunningToastIdRef.current) {
|
||||
dismissToast(longRunningToastIdRef.current);
|
||||
longRunningToastIdRef.current = null;
|
||||
}
|
||||
resolve({
|
||||
tokens: collectedTokens,
|
||||
stats: message.stats,
|
||||
@ -116,6 +125,10 @@ export const useCompareOperation = (): CompareOperationHook => {
|
||||
break;
|
||||
case 'error': {
|
||||
cleanup();
|
||||
if (longRunningToastIdRef.current) {
|
||||
dismissToast(longRunningToastIdRef.current);
|
||||
longRunningToastIdRef.current = null;
|
||||
}
|
||||
const error: Error & { code?: 'EMPTY_TEXT' | 'TOO_LARGE' } = new Error(message.message);
|
||||
error.code = message.code;
|
||||
reject(error);
|
||||
@ -217,6 +230,27 @@ export const useCompareOperation = (): CompareOperationHook => {
|
||||
const baseFiltered = filterTokensForDiff(baseContent.tokens, baseContent.metadata);
|
||||
const comparisonFiltered = filterTokensForDiff(comparisonContent.tokens, comparisonContent.metadata);
|
||||
|
||||
const combinedPageCount =
|
||||
(baseContent.pageSizes?.length ?? 0) + (comparisonContent.pageSizes?.length ?? 0);
|
||||
|
||||
if (
|
||||
combinedPageCount >= LONG_RUNNING_PAGE_THRESHOLD &&
|
||||
!longRunningToastIdRef.current
|
||||
) {
|
||||
const toastId = alert({
|
||||
alertType: 'neutral',
|
||||
title: t('compare.longJob.title', 'Large comparison in progress'),
|
||||
body: t(
|
||||
'compare.longJob.body',
|
||||
'These PDFs together exceed 2,000 pages. Processing can take several minutes.'
|
||||
),
|
||||
location: 'bottom-right' as ToastLocation,
|
||||
isPersistentPopup: true,
|
||||
expandable: false,
|
||||
});
|
||||
longRunningToastIdRef.current = toastId || null;
|
||||
}
|
||||
|
||||
const { tokens, stats, warnings: workerWarnings } = await runCompareWorker(
|
||||
baseFiltered.tokens,
|
||||
comparisonFiltered.tokens,
|
||||
@ -349,6 +383,10 @@ export const useCompareOperation = (): CompareOperationHook => {
|
||||
const duration = performance.now() - operationStart;
|
||||
setStatus((prev) => (prev ? `${prev} (${Math.round(duration)} ms)` : prev));
|
||||
setIsLoading(false);
|
||||
if (longRunningToastIdRef.current) {
|
||||
dismissToast(longRunningToastIdRef.current);
|
||||
longRunningToastIdRef.current = null;
|
||||
}
|
||||
}
|
||||
},
|
||||
[cleanupDownloadUrl, runCompareWorker, selectors, t]
|
||||
@ -372,6 +410,10 @@ export const useCompareOperation = (): CompareOperationHook => {
|
||||
workerRef.current.terminate();
|
||||
workerRef.current = null;
|
||||
}
|
||||
if (longRunningToastIdRef.current) {
|
||||
dismissToast(longRunningToastIdRef.current);
|
||||
longRunningToastIdRef.current = null;
|
||||
}
|
||||
};
|
||||
}, [cleanupDownloadUrl]);
|
||||
|
||||
|
||||
@ -93,8 +93,31 @@ const chunkedDiff = (
|
||||
return;
|
||||
}
|
||||
|
||||
const maxWindow = Math.max(chunkSize * 6, chunkSize + 512);
|
||||
const minCommit = Math.max(1, Math.floor(chunkSize * 0.1));
|
||||
const baseChunkSize = Math.max(1, chunkSize);
|
||||
let dynamicChunkSize = baseChunkSize;
|
||||
const baseMaxWindow = Math.max(baseChunkSize * 6, baseChunkSize + 512);
|
||||
let dynamicMaxWindow = baseMaxWindow;
|
||||
let dynamicMinCommit = Math.max(1, Math.floor(dynamicChunkSize * 0.1));
|
||||
let dynamicStep = Math.max(64, Math.floor(dynamicChunkSize * 0.5));
|
||||
let stallIterations = 0;
|
||||
|
||||
const increaseChunkSizes = () => {
|
||||
const maxChunkSize = baseChunkSize * 8;
|
||||
if (dynamicChunkSize >= maxChunkSize) {
|
||||
return;
|
||||
}
|
||||
const nextChunk = Math.min(
|
||||
maxChunkSize,
|
||||
Math.max(dynamicChunkSize + dynamicStep, Math.floor(dynamicChunkSize * 1.5))
|
||||
);
|
||||
if (nextChunk === dynamicChunkSize) {
|
||||
return;
|
||||
}
|
||||
dynamicChunkSize = nextChunk;
|
||||
dynamicMaxWindow = Math.max(dynamicMaxWindow, Math.max(dynamicChunkSize * 6, dynamicChunkSize + 512));
|
||||
dynamicMinCommit = Math.max(1, Math.floor(dynamicChunkSize * 0.1));
|
||||
dynamicStep = Math.max(64, Math.floor(dynamicChunkSize * 0.5));
|
||||
};
|
||||
|
||||
let index1 = 0;
|
||||
let index2 = 0;
|
||||
@ -124,7 +147,7 @@ const chunkedDiff = (
|
||||
const remaining1 = Math.max(0, words1.length - index1);
|
||||
const remaining2 = Math.max(0, words2.length - index2);
|
||||
|
||||
let windowSize = Math.max(chunkSize, buffer1.length, buffer2.length);
|
||||
let windowSize = Math.max(dynamicChunkSize, buffer1.length, buffer2.length);
|
||||
let window1: string[] = [];
|
||||
let window2: string[] = [];
|
||||
let chunkTokens: CompareDiffToken[] = [];
|
||||
@ -153,8 +176,8 @@ const chunkedDiff = (
|
||||
index2 + take2 >= words2.length;
|
||||
|
||||
const windowTooLarge =
|
||||
window1.length >= maxWindow ||
|
||||
window2.length >= maxWindow;
|
||||
window1.length >= dynamicMaxWindow ||
|
||||
window2.length >= dynamicMaxWindow;
|
||||
|
||||
if (lastStableIndex >= 0 || reachedEnd || windowTooLarge) {
|
||||
break;
|
||||
@ -168,8 +191,8 @@ const chunkedDiff = (
|
||||
}
|
||||
|
||||
windowSize = Math.min(
|
||||
maxWindow,
|
||||
windowSize + Math.max(64, Math.floor(chunkSize * 0.5))
|
||||
dynamicMaxWindow,
|
||||
windowSize + dynamicStep
|
||||
);
|
||||
}
|
||||
|
||||
@ -178,7 +201,12 @@ const chunkedDiff = (
|
||||
flushRemainder();
|
||||
return;
|
||||
}
|
||||
windowSize = Math.min(windowSize + Math.max(64, Math.floor(chunkSize * 0.5)), maxWindow);
|
||||
windowSize = Math.min(windowSize + dynamicStep, dynamicMaxWindow);
|
||||
stallIterations += 1;
|
||||
if (stallIterations >= 3) {
|
||||
increaseChunkSizes();
|
||||
stallIterations = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -186,7 +214,7 @@ const chunkedDiff = (
|
||||
if (commitIndex < 0) {
|
||||
commitIndex = reachedEnd
|
||||
? chunkTokens.length - 1
|
||||
: Math.min(chunkTokens.length - 1, minCommit - 1);
|
||||
: Math.min(chunkTokens.length - 1, dynamicMinCommit - 1);
|
||||
}
|
||||
|
||||
const commitTokens = commitIndex >= 0 ? chunkTokens.slice(0, commitIndex + 1) : [];
|
||||
@ -211,7 +239,12 @@ const chunkedDiff = (
|
||||
break;
|
||||
}
|
||||
|
||||
// Prevent runaway buffers: if we made no progress, forcibly consume one token
|
||||
if (commitTokens.length < dynamicMinCommit) {
|
||||
stallIterations += 1;
|
||||
} else {
|
||||
stallIterations = 0;
|
||||
}
|
||||
|
||||
if (commitTokens.length === 0 && buffer1.length + buffer2.length > 0) {
|
||||
if (buffer1.length > 0 && index1 < words1.length) {
|
||||
buffer1 = buffer1.slice(1);
|
||||
@ -221,6 +254,11 @@ const chunkedDiff = (
|
||||
index2 += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (stallIterations >= 3) {
|
||||
increaseChunkSizes();
|
||||
stallIterations = 0;
|
||||
}
|
||||
}
|
||||
|
||||
flushRemainder();
|
||||
|
||||
Loading…
Reference in New Issue
Block a user