From 4e7f435016751c097ab09a5126feb96af1587093 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Mon, 20 Apr 2026 12:53:56 +0100 Subject: [PATCH] Swap thumbnail rendering from PDF.js to PDFium (#6135) --- .../services/thumbnailGenerationService.ts | 59 ++--- frontend/src/core/utils/pdfiumPageRender.ts | 124 +++++++++ frontend/src/core/utils/thumbnailUtils.ts | 246 +++++++++--------- 3 files changed, 274 insertions(+), 155 deletions(-) create mode 100644 frontend/src/core/utils/pdfiumPageRender.ts diff --git a/frontend/src/core/services/thumbnailGenerationService.ts b/frontend/src/core/services/thumbnailGenerationService.ts index 6ae9ab9712..0112f507e4 100644 --- a/frontend/src/core/services/thumbnailGenerationService.ts +++ b/frontend/src/core/services/thumbnailGenerationService.ts @@ -3,8 +3,11 @@ */ import { FileId } from "@app/types/file"; -import { pdfWorkerManager } from "@app/services/pdfWorkerManager"; -import { PDFDocumentProxy } from "pdfjs-dist"; +import { + openRawDocumentSafe, + closeRawDocument, +} from "@app/services/pdfiumService"; +import { renderPdfiumPageDataUrl } from "@app/utils/pdfiumPageRender"; interface ThumbnailResult { pageNumber: number; @@ -27,7 +30,7 @@ interface CachedThumbnail { } interface CachedPDFDocument { - pdf: PDFDocumentProxy; + docPtr: number; lastUsed: number; refCount: number; } @@ -50,38 +53,32 @@ export class ThumbnailGenerationService { } /** - * Get or create a cached PDF document + * Get or create a cached PDFium document pointer. */ private async getCachedPDFDocument( fileId: FileId, pdfArrayBuffer: ArrayBuffer, - ): Promise { + ): Promise { const cached = this.pdfDocumentCache.get(fileId); if (cached) { cached.lastUsed = Date.now(); cached.refCount++; - return cached.pdf; + return cached.docPtr; } - // Evict old PDFs if cache is full while (this.pdfDocumentCache.size >= this.maxPdfCacheSize) { this.evictLeastRecentlyUsedPDF(); } - // Use centralized worker manager instead of direct getDocument - const pdf = await pdfWorkerManager.createDocument(pdfArrayBuffer, { - disableAutoFetch: true, - disableStream: true, - stopAtErrors: false, - }); + const docPtr = await openRawDocumentSafe(pdfArrayBuffer); this.pdfDocumentCache.set(fileId, { - pdf, + docPtr, lastUsed: Date.now(), refCount: 1, }); - return pdf; + return docPtr; } /** @@ -110,7 +107,7 @@ export class ThumbnailGenerationService { } if (oldestEntry) { - pdfWorkerManager.destroyDocument(oldestEntry[1].pdf); // Use worker manager for cleanup + void closeRawDocument(oldestEntry[1].docPtr); this.pdfDocumentCache.delete(oldestEntry[0]); } } @@ -169,7 +166,7 @@ export class ThumbnailGenerationService { thumbnails: ThumbnailResult[]; }) => void, ): Promise { - const pdf = await this.getCachedPDFDocument(fileId, pdfArrayBuffer); + const docPtr = await this.getCachedPDFDocument(fileId, pdfArrayBuffer); const allResults: ThumbnailResult[] = []; let completed = 0; @@ -182,21 +179,15 @@ export class ThumbnailGenerationService { // Process batch sequentially (to avoid canvas conflicts) for (const pageNumber of batch) { try { - const page = await pdf.getPage(pageNumber); - const viewport = page.getViewport({ scale, rotation: 0 }); - - const canvas = document.createElement("canvas"); - canvas.width = viewport.width; - canvas.height = viewport.height; - - const context = canvas.getContext("2d"); - if (!context) { - throw new Error("Could not get canvas context"); + const thumbnail = await renderPdfiumPageDataUrl( + docPtr, + pageNumber - 1, + scale, + { applyRotation: false, format: "jpeg", quality }, + ); + if (!thumbnail) { + throw new Error(`Could not render page ${pageNumber}`); } - - await page.render({ canvasContext: context, viewport }).promise; - const thumbnail = canvas.toDataURL("image/jpeg", quality); - allResults.push({ pageNumber, thumbnail, success: true }); } catch (error) { console.error( @@ -304,7 +295,7 @@ export class ThumbnailGenerationService { clearPDFCache(): void { // Destroy all cached PDF documents using worker manager for (const [, cached] of this.pdfDocumentCache) { - pdfWorkerManager.destroyDocument(cached.pdf); + void closeRawDocument(cached.docPtr); } this.pdfDocumentCache.clear(); } @@ -312,7 +303,7 @@ export class ThumbnailGenerationService { clearPDFCacheForFile(fileId: FileId): void { const cached = this.pdfDocumentCache.get(fileId); if (cached) { - pdfWorkerManager.destroyDocument(cached.pdf); + void closeRawDocument(cached.docPtr); this.pdfDocumentCache.delete(fileId); } } @@ -324,7 +315,7 @@ export class ThumbnailGenerationService { cleanupCompletedDocument(fileId: FileId): void { const cached = this.pdfDocumentCache.get(fileId); if (cached && cached.refCount <= 0) { - pdfWorkerManager.destroyDocument(cached.pdf); + void closeRawDocument(cached.docPtr); this.pdfDocumentCache.delete(fileId); } } diff --git a/frontend/src/core/utils/pdfiumPageRender.ts b/frontend/src/core/utils/pdfiumPageRender.ts new file mode 100644 index 0000000000..0577245c98 --- /dev/null +++ b/frontend/src/core/utils/pdfiumPageRender.ts @@ -0,0 +1,124 @@ +/** + * pdfiumPageRender — render a single PDF page from an already-opened PDFium + * document pointer to a canvas data URL. + * + * Shared by the first-page thumbnail path (thumbnailUtils.ts) and the + * per-page thumbnail service (thumbnailGenerationService.ts) so the pixel- + * copy + white-background logic lives in one place. + */ +import { getPdfiumModule } from "@app/services/pdfiumService"; + +/** FPDF_ANNOT (0x01) | FPDF_LCD_TEXT (0x10). */ +const PDFIUM_RENDER_FLAGS = 0x01 | 0x10; + +export interface RenderPdfiumPageOptions { + /** When true (default), bake the page's own rotation into the bitmap. + * When false, render upright so callers can apply CSS rotation. */ + applyRotation?: boolean; + /** Output format; defaults to PNG. */ + format?: "png" | "jpeg"; + /** JPEG quality [0,1]; ignored for PNG. */ + quality?: number; +} + +/** + * Render a single page (0-indexed) of an open PDFium document into a data URL. + * + * The caller is responsible for opening and closing the document pointer. + */ +export async function renderPdfiumPageDataUrl( + docPtr: number, + pageIndex: number, + scale: number, + options: RenderPdfiumPageOptions = {}, +): Promise { + const { applyRotation = true, format = "png", quality } = options; + const m = await getPdfiumModule(); + + const pagePtr = m.FPDF_LoadPage(docPtr, pageIndex); + if (!pagePtr) return null; + + try { + const rawW = m.FPDF_GetPageWidthF(pagePtr); + const rawH = m.FPDF_GetPageHeightF(pagePtr); + // FPDFPage_GetRotation returns 0..3 for 0°/90°/180°/270° CW. + const pageRotQuarters = (m as any).FPDFPage_GetRotation(pagePtr) | 0; + + const isQuarterTurn = pageRotQuarters === 1 || pageRotQuarters === 3; + const outW = applyRotation && isQuarterTurn ? rawH : rawW; + const outH = applyRotation && isQuarterTurn ? rawW : rawH; + const w = Math.max(1, Math.round(outW * scale)); + const h = Math.max(1, Math.round(outH * scale)); + + const bitmapPtr = m.FPDFBitmap_Create(w, h, 1); + try { + // White background — PDF content doesn't encode paper colour, so + // unpainted regions would otherwise be transparent. + m.FPDFBitmap_FillRect(bitmapPtr, 0, 0, w, h, 0xffffffff); + m.FPDF_RenderPageBitmap( + bitmapPtr, + pagePtr, + 0, + 0, + w, + h, + applyRotation ? pageRotQuarters : 0, + PDFIUM_RENDER_FLAGS, + ); + + const bufferPtr = m.FPDFBitmap_GetBuffer(bitmapPtr); + const stride = m.FPDFBitmap_GetStride(bitmapPtr); + const heap = new Uint8Array((m.pdfium.wasmExports as any).memory.buffer); + const pixels = new Uint8ClampedArray(w * h * 4); + + // BGRA → RGBA. Direct HEAPU8 indexing is ~100× faster than + // per-pixel m.pdfium.getValue() calls for large bitmaps. + for (let y = 0; y < h; y++) { + const srcRow = bufferPtr + y * stride; + const dstRow = y * w * 4; + for (let x = 0; x < w; x++) { + const so = srcRow + x * 4; + const dst = dstRow + x * 4; + pixels[dst] = heap[so + 2]; + pixels[dst + 1] = heap[so + 1]; + pixels[dst + 2] = heap[so]; + pixels[dst + 3] = heap[so + 3]; + } + } + + const canvas = document.createElement("canvas"); + canvas.width = w; + canvas.height = h; + const ctx = canvas.getContext("2d"); + if (!ctx) return null; + ctx.putImageData(new ImageData(pixels, w, h), 0, 0); + return format === "jpeg" + ? canvas.toDataURL("image/jpeg", quality ?? 0.8) + : canvas.toDataURL(); + } finally { + m.FPDFBitmap_Destroy(bitmapPtr); + } + } finally { + m.FPDF_ClosePage(pagePtr); + } +} + +/** + * Read raw width/height/rotation for a page without rendering. + */ +export async function readPdfiumPageMetadata( + docPtr: number, + pageIndex: number, +): Promise<{ width: number; height: number; rotation: number } | null> { + const m = await getPdfiumModule(); + const pagePtr = m.FPDF_LoadPage(docPtr, pageIndex); + if (!pagePtr) return null; + try { + const width = m.FPDF_GetPageWidthF(pagePtr); + const height = m.FPDF_GetPageHeightF(pagePtr); + const rotation = (((m as any).FPDFPage_GetRotation(pagePtr) | 0) & 3) * 90; + return { width, height, rotation }; + } finally { + m.FPDF_ClosePage(pagePtr); + } +} diff --git a/frontend/src/core/utils/thumbnailUtils.ts b/frontend/src/core/utils/thumbnailUtils.ts index 3a2866dc98..36ec904521 100644 --- a/frontend/src/core/utils/thumbnailUtils.ts +++ b/frontend/src/core/utils/thumbnailUtils.ts @@ -1,4 +1,12 @@ -import { pdfWorkerManager } from "@app/services/pdfWorkerManager"; +import { + openRawDocumentSafe, + closeRawDocument, + getPdfiumModule, +} from "@app/services/pdfiumService"; +import { + renderPdfiumPageDataUrl, + readPdfiumPageMetadata, +} from "@app/utils/pdfiumPageRender"; export interface ThumbnailWithMetadata { thumbnail: string; // Always returns a thumbnail (placeholder if needed) @@ -519,26 +527,88 @@ function drawLargeLockIcon( ctx.fillRect(keyholeX - 2, keyholeY, 4, 8); } -/** - * Generate standard PDF thumbnail by rendering first page - */ -async function generateStandardPDFThumbnail( - pdf: any, - scale: number, -): Promise { - const page = await pdf.getPage(1); - const viewport = page.getViewport({ scale }); - const canvas = document.createElement("canvas"); - canvas.width = viewport.width; - canvas.height = viewport.height; - const context = canvas.getContext("2d"); +/** PDFium error code 4 = password required (encrypted PDF). */ +const PDFIUM_ERR_PASSWORD = 4; - if (!context) { - throw new Error("Could not get canvas context"); +interface PdfiumRenderResult { + thumbnail: string; + pageCount: number; + pageRotations: number[]; + pageDimensions: Array<{ width: number; height: number }>; + /** Set when the document is password-protected — caller substitutes the + * encrypted placeholder. Thumbnail/metadata fields are empty in that case. */ + isEncrypted?: boolean; +} + +/** + * Open a PDF with PDFium, render page 1 to a data URL, and optionally + * collect rotation + dimensions for every page. Returns `isEncrypted: true` + * (without rendering) when the document is password-protected. + * + * @param applyRotation When true, bakes the page's own rotation into the + * bitmap (static display). When false, renders upright so callers can + * apply rotation via CSS (PageEditor). + * @param collectAllPagesMetadata When true, reads per-page rotation and + * dimensions for all pages. When false (very large files), only the + * first page's metadata is populated. + */ +async function renderPdfThumbnailPdfium( + data: ArrayBuffer, + scale: number, + applyRotation: boolean, + collectAllPagesMetadata: boolean, +): Promise { + const m = await getPdfiumModule(); + let docPtr: number; + try { + docPtr = await openRawDocumentSafe(data); + } catch (error) { + if ( + error instanceof Error && + new RegExp(`error ${PDFIUM_ERR_PASSWORD}`).test(error.message) + ) { + return { + thumbnail: "", + pageCount: 1, + pageRotations: [], + pageDimensions: [], + isEncrypted: true, + }; + } + throw error; } - await page.render({ canvasContext: context, viewport }).promise; - return canvas.toDataURL(); + try { + const pageCount = m.FPDF_GetPageCount(docPtr); + const thumbnail = await renderPdfiumPageDataUrl(docPtr, 0, scale, { + applyRotation, + }); + if (!thumbnail) throw new Error("PDFium: failed to render page 0"); + + // Page 0 metadata is already available via the render, but read it + // directly for consistency with the later per-page loop. + const firstMeta = await readPdfiumPageMetadata(docPtr, 0); + const pageRotations: number[] = [firstMeta?.rotation ?? 0]; + const pageDimensions: Array<{ width: number; height: number }> = [ + { + width: firstMeta?.width ?? 0, + height: firstMeta?.height ?? 0, + }, + ]; + + if (collectAllPagesMetadata) { + for (let i = 1; i < pageCount; i++) { + const meta = await readPdfiumPageMetadata(docPtr, i); + if (!meta) continue; + pageRotations[i] = meta.rotation; + pageDimensions[i] = { width: meta.width, height: meta.height }; + } + } + + return { thumbnail, pageCount, pageRotations, pageDimensions }; + } finally { + await closeRawDocument(docPtr); + } } /** @@ -590,27 +660,16 @@ async function generatePDFThumbnail( file: File, scale: number, ): Promise { - try { - const pdf = await pdfWorkerManager.createDocument(arrayBuffer, { - disableAutoFetch: true, - disableStream: true, - }); - - const thumbnail = await generateStandardPDFThumbnail(pdf, scale); - - // Immediately clean up memory after thumbnail generation using worker manager - pdfWorkerManager.destroyDocument(pdf); - return thumbnail; - } catch (error) { - if ( - error && - typeof error === "object" && - (error as any).name === "PasswordException" - ) { - return generateEncryptedPDFThumbnail(file); - } - throw error; // Not an encryption issue, re-throw + const result = await renderPdfThumbnailPdfium( + arrayBuffer, + scale, + true, + false, + ); + if (result.isEncrypted) { + return generateEncryptedPDFThumbnail(file); } + return result.thumbnail; } /** @@ -643,27 +702,20 @@ export async function generateThumbnailForFile(file: File): Promise { try { return await generatePDFThumbnail(arrayBuffer, file, scale); - } catch (error) { - if (error instanceof Error && error.name === "InvalidPDFException") { + } catch { + // PDFium needs the xref table at the end of the file, so the 2MB + // chunk can fail to open for PDFs larger than that. Retry with the + // full buffer before falling back to a placeholder. + try { + const fullArrayBuffer = await file.arrayBuffer(); + return await generatePDFThumbnail(fullArrayBuffer, file, scale); + } catch (error) { console.warn( - `PDF structure issue for ${file.name} - trying with full file`, + `PDF processing failed for ${file.name} - using placeholder:`, + error, ); - try { - // Try with full file instead of chunk - const fullArrayBuffer = await file.arrayBuffer(); - return await generatePDFThumbnail(fullArrayBuffer, file, scale); - } catch { - console.warn( - `Full file PDF processing also failed for ${file.name} - using placeholder`, - ); - return generatePlaceholderThumbnail(file); - } + return generatePlaceholderThumbnail(file); } - console.warn( - `PDF processing failed for ${file.name} - using placeholder:`, - error, - ); - return generatePlaceholderThumbnail(file); } } @@ -691,76 +743,28 @@ export async function generateThumbnailWithMetadata( try { const arrayBuffer = await file.arrayBuffer(); - const pdf = await pdfWorkerManager.createDocument(arrayBuffer); + const result = await renderPdfThumbnailPdfium( + arrayBuffer, + scale, + applyRotation, + !isVeryLarge, + ); - const pageCount = pdf.numPages; - const page = await pdf.getPage(1); - const pageDimensions: Array<{ width: number; height: number }> = []; - - // If applyRotation is false, render without rotation (for CSS-based rotation) - // If applyRotation is true, let PDF.js apply rotation (for static display) - const viewport = applyRotation - ? page.getViewport({ scale }) - : page.getViewport({ scale, rotation: 0 }); - const baseViewport = page.getViewport({ scale: 1, rotation: 0 }); - pageDimensions[0] = { - width: baseViewport.width, - height: baseViewport.height, - }; - - const canvas = document.createElement("canvas"); - canvas.width = viewport.width; - canvas.height = viewport.height; - const context = canvas.getContext("2d"); - - if (!context) { - pdfWorkerManager.destroyDocument(pdf); - throw new Error("Could not get canvas context"); - } - - await page.render({ canvasContext: context, viewport, canvas }).promise; - const thumbnail = canvas.toDataURL(); - - // For very large files, skip reading rotation/dimensions for all pages (just use first page data) - if (isVeryLarge) { - const rotation = page.rotate || 0; - pdfWorkerManager.destroyDocument(pdf); + if (result.isEncrypted) { return { - thumbnail, - pageCount, - pageRotations: [rotation], - pageDimensions: [pageDimensions[0]], + thumbnail: generateEncryptedPDFThumbnail(file), + pageCount: 1, + isEncrypted: true, }; } - // Read rotation for all pages - const pageRotations: number[] = []; - for (let i = 1; i <= pageCount; i++) { - const p = await pdf.getPage(i); - const rotation = p.rotate || 0; - pageRotations.push(rotation); - if (!pageDimensions[i - 1]) { - const pageViewport = p.getViewport({ scale: 1, rotation: 0 }); - pageDimensions[i - 1] = { - width: pageViewport.width, - height: pageViewport.height, - }; - } - } - - pdfWorkerManager.destroyDocument(pdf); - return { thumbnail, pageCount, pageRotations, pageDimensions }; - } catch (error) { - if ( - error && - typeof error === "object" && - (error as any).name === "PasswordException" - ) { - // Handle encrypted PDFs - const thumbnail = generateEncryptedPDFThumbnail(file); - return { thumbnail, pageCount: 1, isEncrypted: true }; - } - + return { + thumbnail: result.thumbnail, + pageCount: result.pageCount, + pageRotations: result.pageRotations, + pageDimensions: result.pageDimensions, + }; + } catch { const thumbnail = generatePlaceholderThumbnail(file); return { thumbnail, pageCount: 1 }; }