From 3d71735dd89104b60f91b40ded4f317e796a9e5b Mon Sep 17 00:00:00 2001 From: EthanHealy01 Date: Wed, 22 Oct 2025 01:59:40 +0100 Subject: [PATCH] fix bug for rotated page highlighting --- .../tools/compare/useCompareOperation.ts | 120 ++++++++++++++---- 1 file changed, 92 insertions(+), 28 deletions(-) diff --git a/frontend/src/hooks/tools/compare/useCompareOperation.ts b/frontend/src/hooks/tools/compare/useCompareOperation.ts index fefa6a6ec..13f278f90 100644 --- a/frontend/src/hooks/tools/compare/useCompareOperation.ts +++ b/frontend/src/hooks/tools/compare/useCompareOperation.ts @@ -433,31 +433,50 @@ const extractContentFromPdf = async (file: StirlingFile): Promise px); + const ys = transformed.map(([, py]) => py); + const left = Math.min(...xs); + const right = Math.max(...xs); + const top = Math.min(...ys); + const bottom = Math.max(...ys); - const left = Math.min(x1, x2); - const right = Math.max(x1, x2); - const top = Math.min(y1, y2); - const bottom = Math.max(y1, y2); + if ( + !Number.isFinite(left) || + !Number.isFinite(right) || + !Number.isFinite(top) || + !Number.isFinite(bottom) + ) { + prevItem = item; + continue; + } - let normalizedTop = clamp(top / viewport.height); - let normalizedBottom = clamp(bottom / viewport.height); - let height = Math.max(normalizedBottom - normalizedTop, 0); + const [baselineStart, baselineEnd, verticalEnd] = transformed; + const baselineVector: [number, number] = [ + baselineEnd[0] - baselineStart[0], + baselineEnd[1] - baselineStart[1], + ]; + const verticalVector: [number, number] = [ + verticalEnd[0] - baselineStart[0], + verticalEnd[1] - baselineStart[1], + ]; + const baselineMagnitude = Math.hypot(baselineVector[0], baselineVector[1]); + const verticalMagnitude = Math.hypot(verticalVector[0], verticalVector[1]); + const hasOrientationVectors = baselineMagnitude > 1e-6 && verticalMagnitude > 1e-6; - // Tighten vertical box using font ascent/descent when available - const fontName: string | undefined = (item as any).fontName; - const font = fontName ? styles[fontName] : undefined; - const ascent = typeof font?.ascent === 'number' ? Math.max(0.7, Math.min(1.1, font.ascent)) : 0.9; - const descent = typeof font?.descent === 'number' ? Math.max(0.0, Math.min(0.5, Math.abs(font.descent))) : 0.2; - const vFactor = Math.min(1, Math.max(0.75, ascent + descent)); - const shrink = height * (1 - vFactor); - if (shrink > 0) { - normalizedTop += shrink / 2; - height = height * vFactor; - normalizedBottom = normalizedTop + height; - } + // Tighten vertical boxes using font ascent/descent when available + const fontName: string | undefined = (item as any).fontName; + const font = fontName ? styles[fontName] : undefined; + const ascent = typeof font?.ascent === 'number' ? Math.max(0.7, Math.min(1.1, font.ascent)) : 0.9; + const descent = typeof font?.descent === 'number' ? Math.max(0.0, Math.min(0.5, Math.abs(font.descent))) : 0.2; + const verticalScale = Math.min(1, Math.max(0.75, ascent + descent)); const wordRegex = /[A-Za-z0-9]+|[^\sA-Za-z0-9]/g; let match: RegExpExecArray | null; @@ -472,14 +491,59 @@ const extractContentFromPdf = async (file: StirlingFile): Promise = [ + segStart, + [segStart[0] + verticalVector[0], segStart[1] + verticalVector[1]], + [segEnd[0] + verticalVector[0], segEnd[1] + verticalVector[1]], + segEnd, + ]; + const cornerXs = cornerPoints.map(([px]) => px); + const cornerYs = cornerPoints.map(([, py]) => py); + wordLeftAbs = Math.min(...cornerXs); + wordRightAbs = Math.max(...cornerXs); + wordTopAbs = Math.min(...cornerYs); + wordBottomAbs = Math.max(...cornerYs); + } else { + const segLeftAbs = left + (right - left) * relStart; + const segRightAbs = left + (right - left) * relEnd; + wordLeftAbs = Math.min(segLeftAbs, segRightAbs); + wordRightAbs = Math.max(segLeftAbs, segRightAbs); + wordTopAbs = top; + wordBottomAbs = bottom; + } + + let wordLeft = clamp(wordLeftAbs / viewport.width); + let wordRight = clamp(wordRightAbs / viewport.width); + let wordTop = clamp(wordTopAbs / viewport.height); + let wordBottom = clamp(wordBottomAbs / viewport.height); + const wordWidth = Math.max(0, wordRight - wordLeft); + let wordHeight = Math.max(0, wordBottom - wordTop); + + if (wordHeight > 0 && verticalScale < 1) { + const midY = (wordTop + wordBottom) / 2; + const shrunkHeight = Math.max(0, wordHeight * verticalScale); + const half = shrunkHeight / 2; + wordTop = clamp(midY - half); + wordBottom = clamp(midY + half); + wordHeight = Math.max(0, wordBottom - wordTop); + } + + const bbox = adjustBoundingBox(wordLeft, wordTop, wordWidth, wordHeight); tokens.push(normalizedWord); metadata.push({