Swap thumbnail rendering from PDF.js to PDFium (#6135)

This commit is contained in:
Anthony Stirling
2026-04-20 12:53:56 +01:00
committed by GitHub
parent b4b196556d
commit 4e7f435016
3 changed files with 274 additions and 155 deletions

View File

@@ -3,8 +3,11 @@
*/
import { FileId } from "@app/types/file";
import { pdfWorkerManager } from "@app/services/pdfWorkerManager";
import { PDFDocumentProxy } from "pdfjs-dist";
import {
openRawDocumentSafe,
closeRawDocument,
} from "@app/services/pdfiumService";
import { renderPdfiumPageDataUrl } from "@app/utils/pdfiumPageRender";
interface ThumbnailResult {
pageNumber: number;
@@ -27,7 +30,7 @@ interface CachedThumbnail {
}
interface CachedPDFDocument {
pdf: PDFDocumentProxy;
docPtr: number;
lastUsed: number;
refCount: number;
}
@@ -50,38 +53,32 @@ export class ThumbnailGenerationService {
}
/**
* Get or create a cached PDF document
* Get or create a cached PDFium document pointer.
*/
private async getCachedPDFDocument(
fileId: FileId,
pdfArrayBuffer: ArrayBuffer,
): Promise<any> {
): Promise<number> {
const cached = this.pdfDocumentCache.get(fileId);
if (cached) {
cached.lastUsed = Date.now();
cached.refCount++;
return cached.pdf;
return cached.docPtr;
}
// Evict old PDFs if cache is full
while (this.pdfDocumentCache.size >= this.maxPdfCacheSize) {
this.evictLeastRecentlyUsedPDF();
}
// Use centralized worker manager instead of direct getDocument
const pdf = await pdfWorkerManager.createDocument(pdfArrayBuffer, {
disableAutoFetch: true,
disableStream: true,
stopAtErrors: false,
});
const docPtr = await openRawDocumentSafe(pdfArrayBuffer);
this.pdfDocumentCache.set(fileId, {
pdf,
docPtr,
lastUsed: Date.now(),
refCount: 1,
});
return pdf;
return docPtr;
}
/**
@@ -110,7 +107,7 @@ export class ThumbnailGenerationService {
}
if (oldestEntry) {
pdfWorkerManager.destroyDocument(oldestEntry[1].pdf); // Use worker manager for cleanup
void closeRawDocument(oldestEntry[1].docPtr);
this.pdfDocumentCache.delete(oldestEntry[0]);
}
}
@@ -169,7 +166,7 @@ export class ThumbnailGenerationService {
thumbnails: ThumbnailResult[];
}) => void,
): Promise<ThumbnailResult[]> {
const pdf = await this.getCachedPDFDocument(fileId, pdfArrayBuffer);
const docPtr = await this.getCachedPDFDocument(fileId, pdfArrayBuffer);
const allResults: ThumbnailResult[] = [];
let completed = 0;
@@ -182,21 +179,15 @@ export class ThumbnailGenerationService {
// Process batch sequentially (to avoid canvas conflicts)
for (const pageNumber of batch) {
try {
const page = await pdf.getPage(pageNumber);
const viewport = page.getViewport({ scale, rotation: 0 });
const canvas = document.createElement("canvas");
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext("2d");
if (!context) {
throw new Error("Could not get canvas context");
const thumbnail = await renderPdfiumPageDataUrl(
docPtr,
pageNumber - 1,
scale,
{ applyRotation: false, format: "jpeg", quality },
);
if (!thumbnail) {
throw new Error(`Could not render page ${pageNumber}`);
}
await page.render({ canvasContext: context, viewport }).promise;
const thumbnail = canvas.toDataURL("image/jpeg", quality);
allResults.push({ pageNumber, thumbnail, success: true });
} catch (error) {
console.error(
@@ -304,7 +295,7 @@ export class ThumbnailGenerationService {
clearPDFCache(): void {
// Destroy all cached PDF documents using worker manager
for (const [, cached] of this.pdfDocumentCache) {
pdfWorkerManager.destroyDocument(cached.pdf);
void closeRawDocument(cached.docPtr);
}
this.pdfDocumentCache.clear();
}
@@ -312,7 +303,7 @@ export class ThumbnailGenerationService {
clearPDFCacheForFile(fileId: FileId): void {
const cached = this.pdfDocumentCache.get(fileId);
if (cached) {
pdfWorkerManager.destroyDocument(cached.pdf);
void closeRawDocument(cached.docPtr);
this.pdfDocumentCache.delete(fileId);
}
}
@@ -324,7 +315,7 @@ export class ThumbnailGenerationService {
cleanupCompletedDocument(fileId: FileId): void {
const cached = this.pdfDocumentCache.get(fileId);
if (cached && cached.refCount <= 0) {
pdfWorkerManager.destroyDocument(cached.pdf);
void closeRawDocument(cached.docPtr);
this.pdfDocumentCache.delete(fileId);
}
}

View File

@@ -0,0 +1,124 @@
/**
* pdfiumPageRender — render a single PDF page from an already-opened PDFium
* document pointer to a canvas data URL.
*
* Shared by the first-page thumbnail path (thumbnailUtils.ts) and the
* per-page thumbnail service (thumbnailGenerationService.ts) so the pixel-
* copy + white-background logic lives in one place.
*/
import { getPdfiumModule } from "@app/services/pdfiumService";
/** FPDF_ANNOT (0x01) | FPDF_LCD_TEXT (0x10). */
const PDFIUM_RENDER_FLAGS = 0x01 | 0x10;
export interface RenderPdfiumPageOptions {
/** When true (default), bake the page's own rotation into the bitmap.
* When false, render upright so callers can apply CSS rotation. */
applyRotation?: boolean;
/** Output format; defaults to PNG. */
format?: "png" | "jpeg";
/** JPEG quality [0,1]; ignored for PNG. */
quality?: number;
}
/**
* Render a single page (0-indexed) of an open PDFium document into a data URL.
*
* The caller is responsible for opening and closing the document pointer.
*/
export async function renderPdfiumPageDataUrl(
docPtr: number,
pageIndex: number,
scale: number,
options: RenderPdfiumPageOptions = {},
): Promise<string | null> {
const { applyRotation = true, format = "png", quality } = options;
const m = await getPdfiumModule();
const pagePtr = m.FPDF_LoadPage(docPtr, pageIndex);
if (!pagePtr) return null;
try {
const rawW = m.FPDF_GetPageWidthF(pagePtr);
const rawH = m.FPDF_GetPageHeightF(pagePtr);
// FPDFPage_GetRotation returns 0..3 for 0°/90°/180°/270° CW.
const pageRotQuarters = (m as any).FPDFPage_GetRotation(pagePtr) | 0;
const isQuarterTurn = pageRotQuarters === 1 || pageRotQuarters === 3;
const outW = applyRotation && isQuarterTurn ? rawH : rawW;
const outH = applyRotation && isQuarterTurn ? rawW : rawH;
const w = Math.max(1, Math.round(outW * scale));
const h = Math.max(1, Math.round(outH * scale));
const bitmapPtr = m.FPDFBitmap_Create(w, h, 1);
try {
// White background — PDF content doesn't encode paper colour, so
// unpainted regions would otherwise be transparent.
m.FPDFBitmap_FillRect(bitmapPtr, 0, 0, w, h, 0xffffffff);
m.FPDF_RenderPageBitmap(
bitmapPtr,
pagePtr,
0,
0,
w,
h,
applyRotation ? pageRotQuarters : 0,
PDFIUM_RENDER_FLAGS,
);
const bufferPtr = m.FPDFBitmap_GetBuffer(bitmapPtr);
const stride = m.FPDFBitmap_GetStride(bitmapPtr);
const heap = new Uint8Array((m.pdfium.wasmExports as any).memory.buffer);
const pixels = new Uint8ClampedArray(w * h * 4);
// BGRA → RGBA. Direct HEAPU8 indexing is ~100× faster than
// per-pixel m.pdfium.getValue() calls for large bitmaps.
for (let y = 0; y < h; y++) {
const srcRow = bufferPtr + y * stride;
const dstRow = y * w * 4;
for (let x = 0; x < w; x++) {
const so = srcRow + x * 4;
const dst = dstRow + x * 4;
pixels[dst] = heap[so + 2];
pixels[dst + 1] = heap[so + 1];
pixels[dst + 2] = heap[so];
pixels[dst + 3] = heap[so + 3];
}
}
const canvas = document.createElement("canvas");
canvas.width = w;
canvas.height = h;
const ctx = canvas.getContext("2d");
if (!ctx) return null;
ctx.putImageData(new ImageData(pixels, w, h), 0, 0);
return format === "jpeg"
? canvas.toDataURL("image/jpeg", quality ?? 0.8)
: canvas.toDataURL();
} finally {
m.FPDFBitmap_Destroy(bitmapPtr);
}
} finally {
m.FPDF_ClosePage(pagePtr);
}
}
/**
* Read raw width/height/rotation for a page without rendering.
*/
export async function readPdfiumPageMetadata(
docPtr: number,
pageIndex: number,
): Promise<{ width: number; height: number; rotation: number } | null> {
const m = await getPdfiumModule();
const pagePtr = m.FPDF_LoadPage(docPtr, pageIndex);
if (!pagePtr) return null;
try {
const width = m.FPDF_GetPageWidthF(pagePtr);
const height = m.FPDF_GetPageHeightF(pagePtr);
const rotation = (((m as any).FPDFPage_GetRotation(pagePtr) | 0) & 3) * 90;
return { width, height, rotation };
} finally {
m.FPDF_ClosePage(pagePtr);
}
}

View File

@@ -1,4 +1,12 @@
import { pdfWorkerManager } from "@app/services/pdfWorkerManager";
import {
openRawDocumentSafe,
closeRawDocument,
getPdfiumModule,
} from "@app/services/pdfiumService";
import {
renderPdfiumPageDataUrl,
readPdfiumPageMetadata,
} from "@app/utils/pdfiumPageRender";
export interface ThumbnailWithMetadata {
thumbnail: string; // Always returns a thumbnail (placeholder if needed)
@@ -519,26 +527,88 @@ function drawLargeLockIcon(
ctx.fillRect(keyholeX - 2, keyholeY, 4, 8);
}
/**
* Generate standard PDF thumbnail by rendering first page
*/
async function generateStandardPDFThumbnail(
pdf: any,
scale: number,
): Promise<string> {
const page = await pdf.getPage(1);
const viewport = page.getViewport({ scale });
const canvas = document.createElement("canvas");
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext("2d");
/** PDFium error code 4 = password required (encrypted PDF). */
const PDFIUM_ERR_PASSWORD = 4;
if (!context) {
throw new Error("Could not get canvas context");
interface PdfiumRenderResult {
thumbnail: string;
pageCount: number;
pageRotations: number[];
pageDimensions: Array<{ width: number; height: number }>;
/** Set when the document is password-protected — caller substitutes the
* encrypted placeholder. Thumbnail/metadata fields are empty in that case. */
isEncrypted?: boolean;
}
/**
* Open a PDF with PDFium, render page 1 to a data URL, and optionally
* collect rotation + dimensions for every page. Returns `isEncrypted: true`
* (without rendering) when the document is password-protected.
*
* @param applyRotation When true, bakes the page's own rotation into the
* bitmap (static display). When false, renders upright so callers can
* apply rotation via CSS (PageEditor).
* @param collectAllPagesMetadata When true, reads per-page rotation and
* dimensions for all pages. When false (very large files), only the
* first page's metadata is populated.
*/
async function renderPdfThumbnailPdfium(
data: ArrayBuffer,
scale: number,
applyRotation: boolean,
collectAllPagesMetadata: boolean,
): Promise<PdfiumRenderResult> {
const m = await getPdfiumModule();
let docPtr: number;
try {
docPtr = await openRawDocumentSafe(data);
} catch (error) {
if (
error instanceof Error &&
new RegExp(`error ${PDFIUM_ERR_PASSWORD}`).test(error.message)
) {
return {
thumbnail: "",
pageCount: 1,
pageRotations: [],
pageDimensions: [],
isEncrypted: true,
};
}
throw error;
}
await page.render({ canvasContext: context, viewport }).promise;
return canvas.toDataURL();
try {
const pageCount = m.FPDF_GetPageCount(docPtr);
const thumbnail = await renderPdfiumPageDataUrl(docPtr, 0, scale, {
applyRotation,
});
if (!thumbnail) throw new Error("PDFium: failed to render page 0");
// Page 0 metadata is already available via the render, but read it
// directly for consistency with the later per-page loop.
const firstMeta = await readPdfiumPageMetadata(docPtr, 0);
const pageRotations: number[] = [firstMeta?.rotation ?? 0];
const pageDimensions: Array<{ width: number; height: number }> = [
{
width: firstMeta?.width ?? 0,
height: firstMeta?.height ?? 0,
},
];
if (collectAllPagesMetadata) {
for (let i = 1; i < pageCount; i++) {
const meta = await readPdfiumPageMetadata(docPtr, i);
if (!meta) continue;
pageRotations[i] = meta.rotation;
pageDimensions[i] = { width: meta.width, height: meta.height };
}
}
return { thumbnail, pageCount, pageRotations, pageDimensions };
} finally {
await closeRawDocument(docPtr);
}
}
/**
@@ -590,27 +660,16 @@ async function generatePDFThumbnail(
file: File,
scale: number,
): Promise<string> {
try {
const pdf = await pdfWorkerManager.createDocument(arrayBuffer, {
disableAutoFetch: true,
disableStream: true,
});
const thumbnail = await generateStandardPDFThumbnail(pdf, scale);
// Immediately clean up memory after thumbnail generation using worker manager
pdfWorkerManager.destroyDocument(pdf);
return thumbnail;
} catch (error) {
if (
error &&
typeof error === "object" &&
(error as any).name === "PasswordException"
) {
return generateEncryptedPDFThumbnail(file);
}
throw error; // Not an encryption issue, re-throw
const result = await renderPdfThumbnailPdfium(
arrayBuffer,
scale,
true,
false,
);
if (result.isEncrypted) {
return generateEncryptedPDFThumbnail(file);
}
return result.thumbnail;
}
/**
@@ -643,27 +702,20 @@ export async function generateThumbnailForFile(file: File): Promise<string> {
try {
return await generatePDFThumbnail(arrayBuffer, file, scale);
} catch (error) {
if (error instanceof Error && error.name === "InvalidPDFException") {
} catch {
// PDFium needs the xref table at the end of the file, so the 2MB
// chunk can fail to open for PDFs larger than that. Retry with the
// full buffer before falling back to a placeholder.
try {
const fullArrayBuffer = await file.arrayBuffer();
return await generatePDFThumbnail(fullArrayBuffer, file, scale);
} catch (error) {
console.warn(
`PDF structure issue for ${file.name} - trying with full file`,
`PDF processing failed for ${file.name} - using placeholder:`,
error,
);
try {
// Try with full file instead of chunk
const fullArrayBuffer = await file.arrayBuffer();
return await generatePDFThumbnail(fullArrayBuffer, file, scale);
} catch {
console.warn(
`Full file PDF processing also failed for ${file.name} - using placeholder`,
);
return generatePlaceholderThumbnail(file);
}
return generatePlaceholderThumbnail(file);
}
console.warn(
`PDF processing failed for ${file.name} - using placeholder:`,
error,
);
return generatePlaceholderThumbnail(file);
}
}
@@ -691,76 +743,28 @@ export async function generateThumbnailWithMetadata(
try {
const arrayBuffer = await file.arrayBuffer();
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
const result = await renderPdfThumbnailPdfium(
arrayBuffer,
scale,
applyRotation,
!isVeryLarge,
);
const pageCount = pdf.numPages;
const page = await pdf.getPage(1);
const pageDimensions: Array<{ width: number; height: number }> = [];
// If applyRotation is false, render without rotation (for CSS-based rotation)
// If applyRotation is true, let PDF.js apply rotation (for static display)
const viewport = applyRotation
? page.getViewport({ scale })
: page.getViewport({ scale, rotation: 0 });
const baseViewport = page.getViewport({ scale: 1, rotation: 0 });
pageDimensions[0] = {
width: baseViewport.width,
height: baseViewport.height,
};
const canvas = document.createElement("canvas");
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext("2d");
if (!context) {
pdfWorkerManager.destroyDocument(pdf);
throw new Error("Could not get canvas context");
}
await page.render({ canvasContext: context, viewport, canvas }).promise;
const thumbnail = canvas.toDataURL();
// For very large files, skip reading rotation/dimensions for all pages (just use first page data)
if (isVeryLarge) {
const rotation = page.rotate || 0;
pdfWorkerManager.destroyDocument(pdf);
if (result.isEncrypted) {
return {
thumbnail,
pageCount,
pageRotations: [rotation],
pageDimensions: [pageDimensions[0]],
thumbnail: generateEncryptedPDFThumbnail(file),
pageCount: 1,
isEncrypted: true,
};
}
// Read rotation for all pages
const pageRotations: number[] = [];
for (let i = 1; i <= pageCount; i++) {
const p = await pdf.getPage(i);
const rotation = p.rotate || 0;
pageRotations.push(rotation);
if (!pageDimensions[i - 1]) {
const pageViewport = p.getViewport({ scale: 1, rotation: 0 });
pageDimensions[i - 1] = {
width: pageViewport.width,
height: pageViewport.height,
};
}
}
pdfWorkerManager.destroyDocument(pdf);
return { thumbnail, pageCount, pageRotations, pageDimensions };
} catch (error) {
if (
error &&
typeof error === "object" &&
(error as any).name === "PasswordException"
) {
// Handle encrypted PDFs
const thumbnail = generateEncryptedPDFThumbnail(file);
return { thumbnail, pageCount: 1, isEncrypted: true };
}
return {
thumbnail: result.thumbnail,
pageCount: result.pageCount,
pageRotations: result.pageRotations,
pageDimensions: result.pageDimensions,
};
} catch {
const thumbnail = generatePlaceholderThumbnail(file);
return { thumbnail, pageCount: 1 };
}