diff --git a/frontend/src/components/tools/compare/CompareDocumentPane.tsx b/frontend/src/components/tools/compare/CompareDocumentPane.tsx index a3e866b95..c33a5c00a 100644 --- a/frontend/src/components/tools/compare/CompareDocumentPane.tsx +++ b/frontend/src/components/tools/compare/CompareDocumentPane.tsx @@ -34,7 +34,7 @@ interface CompareDocumentPaneProps { pairedPages: PagePreview[]; getRowHeightPx: (pageNumber: number) => number; wordHighlightMap: Map; - tokenIndexToGroupId: Map; + metaIndexToGroupId: Map; documentLabel: string; pageLabel: string; altLabel: string; @@ -103,7 +103,7 @@ const CompareDocumentPane = ({ pairedPages, getRowHeightPx, wordHighlightMap, - tokenIndexToGroupId, + metaIndexToGroupId, documentLabel, pageLabel, altLabel, @@ -181,9 +181,9 @@ const CompareDocumentPane = ({ const wordRects = wordHighlightMap.get(page.pageNumber) ?? []; const groupedRects = new Map(); - for (const { rect, index } of wordRects) { + for (const { rect, metaIndex } of wordRects) { const id = - tokenIndexToGroupId.get(index) ?? `${pane}-token-${index}`; + metaIndexToGroupId.get(metaIndex) ?? `${pane}-token-${metaIndex}`; const current = groupedRects.get(id) ?? []; current.push(rect); groupedRects.set(id, current); diff --git a/frontend/src/components/tools/compare/CompareWorkbenchView.tsx b/frontend/src/components/tools/compare/CompareWorkbenchView.tsx index 85bcb6420..2771bbbed 100644 --- a/frontend/src/components/tools/compare/CompareWorkbenchView.tsx +++ b/frontend/src/components/tools/compare/CompareWorkbenchView.tsx @@ -140,7 +140,7 @@ const CompareWorkbenchView = ({ data }: CompareWorkbenchViewProps) => { const { baseWordChanges, comparisonWordChanges, - tokenIndexToGroupId, + metaIndexToGroupId, wordHighlightMaps, getRowHeightPx, } = useCompareHighlights(result, basePages, comparisonPages); @@ -315,7 +315,7 @@ const CompareWorkbenchView = ({ data }: CompareWorkbenchViewProps) => { pairedPages={comparisonPages} getRowHeightPx={getRowHeightPx} wordHighlightMap={wordHighlightMaps.base} - tokenIndexToGroupId={tokenIndexToGroupId.base} + metaIndexToGroupId={metaIndexToGroupId.base} documentLabel={baseDocumentLabel} pageLabel={pageLabel} altLabel={baseDocumentLabel} @@ -347,7 +347,7 @@ const CompareWorkbenchView = ({ data }: CompareWorkbenchViewProps) => { pairedPages={basePages} getRowHeightPx={getRowHeightPx} wordHighlightMap={wordHighlightMaps.comparison} - tokenIndexToGroupId={tokenIndexToGroupId.comparison} + metaIndexToGroupId={metaIndexToGroupId.comparison} documentLabel={comparisonDocumentLabel} pageLabel={pageLabel} altLabel={comparisonDocumentLabel} diff --git a/frontend/src/components/tools/compare/hooks/useCompareHighlights.ts b/frontend/src/components/tools/compare/hooks/useCompareHighlights.ts index ccc639a7f..ef1f62ff0 100644 --- a/frontend/src/components/tools/compare/hooks/useCompareHighlights.ts +++ b/frontend/src/components/tools/compare/hooks/useCompareHighlights.ts @@ -1,15 +1,13 @@ import { useCallback, useMemo } from 'react'; import type { - CompareDiffToken, + CompareFilteredTokenInfo, CompareResultData, - CompareTokenMetadata, } from '../../../../types/compare'; import type { CompareChangeOption } from '../../../../types/compareWorkbench'; import type { PagePreview } from '../../../../hooks/useProgressivePagePreviews'; import type { WordHighlightEntry } from '../types'; -import { PARAGRAPH_SENTINEL } from '../../../../types/compare'; -interface TokenGroupMap { +interface MetaGroupMap { base: Map; comparison: Map; } @@ -22,101 +20,90 @@ interface WordHighlightMaps { export interface UseCompareHighlightsResult { baseWordChanges: CompareChangeOption[]; comparisonWordChanges: CompareChangeOption[]; - tokenIndexToGroupId: TokenGroupMap; + metaIndexToGroupId: MetaGroupMap; wordHighlightMaps: WordHighlightMaps; getRowHeightPx: (pageNumber: number) => number; } const buildWordChanges = ( - tokens: CompareDiffToken[], - metadata: CompareTokenMetadata[], - targetType: 'added' | 'removed', - tokenIndexToGroupId: Map, + tokens: CompareFilteredTokenInfo[], + metaIndexToGroupId: Map, groupPrefix: string ): CompareChangeOption[] => { - tokenIndexToGroupId.clear(); + metaIndexToGroupId.clear(); if (!tokens.length) return []; const items: CompareChangeOption[] = []; - let metadataIndex = 0; + let currentRun: CompareFilteredTokenInfo[] = []; - for (let i = 0; i < tokens.length; i += 1) { - const token = tokens[i]; - if (token.type === targetType) { - const parts: string[] = []; - const runIndices: number[] = []; - // We'll compute the page number from the first token in the run that has a bbox - let firstPageWithBox: number | null = null; - while (i < tokens.length && tokens[i].type === targetType) { - const t = tokens[i].text; - const isPara = t === PARAGRAPH_SENTINEL || t.startsWith('\uE000') || t.includes('PARA'); - // Skip paragraph sentinel tokens entirely from labels and grouping - if (!isPara) { - parts.push(t); - // Only add to grouping if there is a corresponding metadata index - // AND there is a bounding box to anchor highlights to - const meta = metadata[metadataIndex]; - if (meta) { - if (meta.bbox) { - runIndices.push(metadataIndex); - if (firstPageWithBox == null && typeof meta.page === 'number') { - firstPageWithBox = meta.page; - } - } - } - } - metadataIndex += 1; - i += 1; - } - i -= 1; - const label = parts.join(' ').trim(); - if (label.length > 0 && runIndices.length > 0) { - const startIndexForId = runIndices[0]; - const endIndexForId = runIndices[runIndices.length - 1]; - const groupId = `${groupPrefix}-${startIndexForId}-${endIndexForId}`; - runIndices.forEach((idx) => tokenIndexToGroupId.set(idx, groupId)); - const pageNumber = firstPageWithBox ?? (metadata[startIndexForId]?.page ?? 1); - items.push({ value: groupId, label, pageNumber }); - } - continue; + const flushRun = () => { + if (currentRun.length === 0) return; + const label = currentRun.map((token) => token.token).join(' ').trim(); + if (label.length === 0) { + currentRun = []; + return; } - if (token.type !== (targetType === 'added' ? 'removed' : 'added')) { - metadataIndex += 1; + const first = currentRun[0]; + const last = currentRun[currentRun.length - 1]; + const groupId = `${groupPrefix}-t${first.metaIndex}-t${last.metaIndex}`; + currentRun.forEach((token) => { + metaIndexToGroupId.set(token.metaIndex, groupId); + }); + const pageNumber = first.page ?? last.page ?? 1; + items.push({ value: groupId, label, pageNumber }); + currentRun = []; + }; + + for (const token of tokens) { + if (token.hasHighlight && token.bbox) { + currentRun.push(token); + } else { + flushRun(); } } + flushRun(); return items; }; +const buildHighlightMap = ( + tokens: CompareFilteredTokenInfo[] +): Map => { + const map = new Map(); + for (const token of tokens) { + if (!token.hasHighlight || !token.bbox || token.page == null) continue; + const list = map.get(token.page) ?? []; + list.push({ rect: token.bbox, metaIndex: token.metaIndex }); + map.set(token.page, list); + } + return map; +}; + export const useCompareHighlights = ( result: CompareResultData | null, basePages: PagePreview[], comparisonPages: PagePreview[], ): UseCompareHighlightsResult => { - const baseTokenIndexToGroupId = useMemo(() => new Map(), []); - const comparisonTokenIndexToGroupId = useMemo(() => new Map(), []); + const baseMetaIndexToGroupId = useMemo(() => new Map(), []); + const comparisonMetaIndexToGroupId = useMemo(() => new Map(), []); const baseWordChanges = useMemo(() => { if (!result) return []; return buildWordChanges( - result.tokens, - result.tokenMetadata.base, - 'removed', - baseTokenIndexToGroupId, + result.filteredTokenData.base, + baseMetaIndexToGroupId, 'base-group' ); - }, [baseTokenIndexToGroupId, result]); + }, [baseMetaIndexToGroupId, result]); const comparisonWordChanges = useMemo(() => { if (!result) return []; return buildWordChanges( - result.tokens, - result.tokenMetadata.comparison, - 'added', - comparisonTokenIndexToGroupId, + result.filteredTokenData.comparison, + comparisonMetaIndexToGroupId, 'comparison-group' ); - }, [comparisonTokenIndexToGroupId, result]); + }, [comparisonMetaIndexToGroupId, result]); const wordHighlightMaps = useMemo(() => { if (!result) { @@ -126,35 +113,10 @@ export const useCompareHighlights = ( }; } - const baseMap = new Map(); - const comparisonMap = new Map(); - - let baseIndex = 0; - let comparisonIndex = 0; - for (const token of result.tokens) { - if (token.type === 'removed') { - const meta = result.tokenMetadata.base[baseIndex]; - if (meta?.bbox) { - const list = baseMap.get(meta.page) ?? []; - list.push({ rect: meta.bbox, index: baseIndex }); - baseMap.set(meta.page, list); - } - baseIndex += 1; - } else if (token.type === 'added') { - const meta = result.tokenMetadata.comparison[comparisonIndex]; - if (meta?.bbox) { - const list = comparisonMap.get(meta.page) ?? []; - list.push({ rect: meta.bbox, index: comparisonIndex }); - comparisonMap.set(meta.page, list); - } - comparisonIndex += 1; - } else { - baseIndex += 1; - comparisonIndex += 1; - } - } - - return { base: baseMap, comparison: comparisonMap }; + return { + base: buildHighlightMap(result.filteredTokenData.base), + comparison: buildHighlightMap(result.filteredTokenData.comparison), + }; }, [result]); const getRowHeightPx = useCallback( @@ -172,9 +134,9 @@ export const useCompareHighlights = ( return { baseWordChanges, comparisonWordChanges, - tokenIndexToGroupId: { - base: baseTokenIndexToGroupId, - comparison: comparisonTokenIndexToGroupId, + metaIndexToGroupId: { + base: baseMetaIndexToGroupId, + comparison: comparisonMetaIndexToGroupId, }, wordHighlightMaps, getRowHeightPx, diff --git a/frontend/src/components/tools/compare/types.ts b/frontend/src/components/tools/compare/types.ts index 56ac464c9..7b85ebb77 100644 --- a/frontend/src/components/tools/compare/types.ts +++ b/frontend/src/components/tools/compare/types.ts @@ -10,5 +10,5 @@ export interface PagePreview { export interface WordHighlightEntry { rect: TokenBoundingBox; - index: number; + metaIndex: number; } diff --git a/frontend/src/hooks/tools/compare/useCompareOperation.ts b/frontend/src/hooks/tools/compare/useCompareOperation.ts index 257f4e466..b0b210376 100644 --- a/frontend/src/hooks/tools/compare/useCompareOperation.ts +++ b/frontend/src/hooks/tools/compare/useCompareOperation.ts @@ -3,6 +3,7 @@ import { useTranslation } from 'react-i18next'; import { ADDITION_HIGHLIGHT, CompareDiffToken, + CompareFilteredTokenInfo, CompareResultData, CompareWorkerRequest, CompareWorkerResponse, @@ -217,6 +218,49 @@ export const useCompareOperation = (): CompareOperationHook => { warningMessages ); + const baseHasHighlight = new Array(baseFiltered.tokens.length).fill(false); + const comparisonHasHighlight = new Array(comparisonFiltered.tokens.length).fill(false); + + let baseTokenPointer = 0; + let comparisonTokenPointer = 0; + for (const diffToken of tokens) { + if (diffToken.type === 'removed') { + if (baseTokenPointer < baseHasHighlight.length) { + baseHasHighlight[baseTokenPointer] = true; + } + baseTokenPointer += 1; + } else if (diffToken.type === 'added') { + if (comparisonTokenPointer < comparisonHasHighlight.length) { + comparisonHasHighlight[comparisonTokenPointer] = true; + } + comparisonTokenPointer += 1; + } else { + if (baseTokenPointer < baseHasHighlight.length) { + baseTokenPointer += 1; + } + if (comparisonTokenPointer < comparisonHasHighlight.length) { + comparisonTokenPointer += 1; + } + } + } + + const buildFilteredTokenData = ( + tokensList: typeof baseFiltered.tokens, + metadataList: typeof baseFiltered.metadata, + highlightFlags: boolean[] + ): CompareFilteredTokenInfo[] => + tokensList.map((token, index) => { + const meta = metadataList[index]; + return { + token, + page: meta?.page ?? null, + paragraph: meta?.paragraph ?? null, + bbox: meta?.bbox ?? null, + hasHighlight: highlightFlags[index] ?? false, + metaIndex: index, + }; + }); + const totals = aggregateTotals(tokens); const processedAt = Date.now(); @@ -250,6 +294,14 @@ export const useCompareOperation = (): CompareOperationHook => { base: baseMetadata, comparison: comparisonMetadata, }, + filteredTokenData: { + base: buildFilteredTokenData(baseFiltered.tokens, baseFiltered.metadata, baseHasHighlight), + comparison: buildFilteredTokenData( + comparisonFiltered.tokens, + comparisonFiltered.metadata, + comparisonHasHighlight + ), + }, sourceTokens: { base: baseContent.tokens, comparison: comparisonContent.tokens, diff --git a/frontend/src/types/compare.ts b/frontend/src/types/compare.ts index 360258cfa..d09f7deb1 100644 --- a/frontend/src/types/compare.ts +++ b/frontend/src/types/compare.ts @@ -41,6 +41,15 @@ export interface CompareParagraph { text: string; } +export interface CompareFilteredTokenInfo { + token: string; + page: number | null; + paragraph: number | null; + bbox: TokenBoundingBox | null; + hasHighlight: boolean; + metaIndex: number; +} + export interface CompareChangeSide { text: string; page: number | null; @@ -68,6 +77,10 @@ export interface CompareResultData { base: CompareTokenMetadata[]; comparison: CompareTokenMetadata[]; }; + filteredTokenData: { + base: CompareFilteredTokenInfo[]; + comparison: CompareFilteredTokenInfo[]; + }; sourceTokens: { base: string[]; comparison: string[];