Clean up of thumbnail generation

This commit is contained in:
Reece 2025-06-27 20:30:47 +01:00
parent 3730429153
commit 61699a08a5
7 changed files with 232 additions and 225 deletions

22
frontend/public/pdf.js Normal file

File diff suppressed because one or more lines are too long

View File

@ -5,24 +5,20 @@ let pdfJsLoaded = false;
// Import PDF.js properly for worker context
try {
console.log('📦 Attempting to load PDF.js from CDN...');
importScripts('https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js');
console.log('📦 Loading PDF.js locally...');
importScripts('/pdf.js');
if (self.pdfjsLib) {
// Set up PDF.js worker
self.pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
self.pdfjsLib.GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
pdfJsLoaded = true;
console.log('✓ PDF.js loaded successfully from CDN');
console.log('✓ PDF.js loaded successfully from local files');
} else {
throw new Error('pdfjsLib not available after import');
}
} catch (error) {
console.warn('⚠️ Failed to load PDF.js from CDN:', error);
console.error('✗ PDF.js CDN loading failed - worker will not be available');
console.error('✗ Failed to load local PDF.js:', error);
pdfJsLoaded = false;
// Note: Local PDF.js fallback removed as pdf.js file is not available
// The main thread fallback will handle thumbnail generation instead
}
// Log the final status
@ -34,71 +30,68 @@ if (pdfJsLoaded) {
self.onmessage = async function(e) {
const { type, data, jobId } = e.data;
try {
// Handle PING for worker health check
if (type === 'PING') {
console.log('🏓 Worker received PING, sending PONG...');
// Check if PDF.js is loaded before responding
if (pdfJsLoaded && self.pdfjsLib) {
self.postMessage({ type: 'PONG', jobId });
console.log('✓ PONG sent - worker is ready for thumbnail generation');
} else {
console.error('✗ PDF.js not loaded - worker not ready');
self.postMessage({
type: 'ERROR',
jobId,
data: { error: 'PDF.js not loaded in worker' }
self.postMessage({
type: 'ERROR',
jobId,
data: { error: 'PDF.js not loaded in worker' }
});
}
return;
}
if (type === 'GENERATE_THUMBNAILS') {
console.log(`🖼️ Worker starting thumbnail generation for ${data.pageNumbers?.length || 0} pages`);
if (!pdfJsLoaded || !self.pdfjsLib) {
throw new Error('PDF.js not available in worker');
}
const { pdfArrayBuffer, pageNumbers, scale = 0.2, quality = 0.8 } = data;
// Load PDF in worker using imported PDF.js
const pdf = await self.pdfjsLib.getDocument({ data: pdfArrayBuffer }).promise;
const thumbnails = [];
// Process pages in smaller batches for smoother UI
const batchSize = 3; // Process 3 pages at once for smoother UI
for (let i = 0; i < pageNumbers.length; i += batchSize) {
const batch = pageNumbers.slice(i, i + batchSize);
const batchPromises = batch.map(async (pageNumber) => {
try {
const page = await pdf.getPage(pageNumber);
const viewport = page.getViewport({ scale });
// Create OffscreenCanvas for better performance
const canvas = new OffscreenCanvas(viewport.width, viewport.height);
const context = canvas.getContext('2d');
await page.render({ canvasContext: context, viewport }).promise;
// Convert to blob then to base64 (more efficient than toDataURL)
const blob = await canvas.convertToBlob({ type: 'image/jpeg', quality });
const arrayBuffer = await blob.arrayBuffer();
const base64 = btoa(String.fromCharCode(...new Uint8Array(arrayBuffer)));
const thumbnail = `data:image/jpeg;base64,${base64}`;
return { pageNumber, thumbnail, success: true };
} catch (error) {
return { pageNumber, error: error.message, success: false };
}
});
const batchResults = await Promise.all(batchPromises);
thumbnails.push(...batchResults);
// Send progress update
self.postMessage({
type: 'PROGRESS',
@ -109,22 +102,22 @@ self.onmessage = async function(e) {
thumbnails: batchResults.filter(r => r.success)
}
});
// Small delay between batches to keep UI smooth
if (i + batchSize < pageNumbers.length) {
await new Promise(resolve => setTimeout(resolve, 100)); // Increased to 100ms pause between batches for smoother scrolling
}
}
// Clean up
pdf.destroy();
self.postMessage({
type: 'COMPLETE',
jobId,
data: { thumbnails: thumbnails.filter(r => r.success) }
});
}
} catch (error) {
self.postMessage({
@ -133,4 +126,4 @@ self.onmessage = async function(e) {
data: { error: error.message }
});
}
};
};

View File

@ -17,7 +17,7 @@ import {
ToggleSplitCommand
} from "../../commands/pageCommands";
import { pdfExportService } from "../../services/pdfExportService";
import { thumbnailGenerationService } from "../../services/thumbnailGenerationService";
import { useThumbnailGeneration } from "../../hooks/useThumbnailGeneration";
import './pageEditor.module.css';
import PageThumbnail from './PageThumbnail';
import BulkSelectionPanel from './BulkSelectionPanel';
@ -198,90 +198,14 @@ const PageEditor = ({
const [sharedPdfInstance, setSharedPdfInstance] = useState<any>(null);
const [thumbnailGenerationStarted, setThumbnailGenerationStarted] = useState(false);
// Session-based thumbnail cache with 1GB limit
const [thumbnailCache, setThumbnailCache] = useState<Map<string, { thumbnail: string; lastUsed: number; sizeBytes: number }>>(new Map());
const maxCacheSizeBytes = 1024 * 1024 * 1024; // 1GB cache limit
const [currentCacheSize, setCurrentCacheSize] = useState(0);
// Cache management functions
const addThumbnailToCache = useCallback((pageId: string, thumbnail: string) => {
const thumbnailSizeBytes = thumbnail.length * 0.75; // Rough base64 size estimate
setThumbnailCache(prev => {
const newCache = new Map(prev);
const now = Date.now();
// Add new thumbnail
newCache.set(pageId, {
thumbnail,
lastUsed: now,
sizeBytes: thumbnailSizeBytes
});
return newCache;
});
setCurrentCacheSize(prev => {
const newSize = prev + thumbnailSizeBytes;
// If we exceed 1GB, trigger cleanup
if (newSize > maxCacheSizeBytes) {
setTimeout(() => cleanupThumbnailCache(), 0);
}
return newSize;
});
console.log(`Cached thumbnail for ${pageId} (${Math.round(thumbnailSizeBytes / 1024)}KB)`);
}, [maxCacheSizeBytes]);
const getThumbnailFromCache = useCallback((pageId: string): string | null => {
const cached = thumbnailCache.get(pageId);
if (!cached) return null;
// Update last used timestamp
setThumbnailCache(prev => {
const newCache = new Map(prev);
const entry = newCache.get(pageId);
if (entry) {
entry.lastUsed = Date.now();
}
return newCache;
});
return cached.thumbnail;
}, [thumbnailCache]);
const cleanupThumbnailCache = useCallback(() => {
setThumbnailCache(prev => {
const entries = Array.from(prev.entries());
// Sort by last used (oldest first)
entries.sort(([, a], [, b]) => a.lastUsed - b.lastUsed);
const newCache = new Map();
let newSize = 0;
const targetSize = maxCacheSizeBytes * 0.8; // Clean to 80% of limit
// Keep most recently used entries until we hit target size
for (let i = entries.length - 1; i >= 0 && newSize < targetSize; i--) {
const [key, value] = entries[i];
newCache.set(key, value);
newSize += value.sizeBytes;
}
setCurrentCacheSize(newSize);
console.log(`Cleaned thumbnail cache: ${prev.size}${newCache.size} entries (${Math.round(newSize / 1024 / 1024)}MB)`);
return newCache;
});
}, [maxCacheSizeBytes]);
const clearThumbnailCache = useCallback(() => {
setThumbnailCache(new Map());
setCurrentCacheSize(0);
console.log('Cleared thumbnail cache');
}, []);
// Thumbnail generation (opt-in for visual tools)
const {
generateThumbnails,
addThumbnailToCache,
getThumbnailFromCache,
stopGeneration,
destroyThumbnails
} = useThumbnailGeneration();
// Start thumbnail generation process (separate from document loading)
const startThumbnailGeneration = useCallback(() => {
@ -290,24 +214,19 @@ const PageEditor = ({
const file = activeFiles[0];
const totalPages = mergedPdfDocument.totalPages;
console.log(`Starting Web Worker thumbnail generation for ${totalPages} pages`);
setThumbnailGenerationStarted(true);
// Run everything asynchronously to avoid blocking the main thread
setTimeout(async () => {
try {
console.log('📖 Loading PDF array buffer...');
// Load PDF array buffer for Web Workers
const arrayBuffer = await file.arrayBuffer();
console.log('✅ PDF array buffer loaded, starting Web Workers...');
// Generate all page numbers
const pageNumbers = Array.from({ length: totalPages }, (_, i) => i + 1);
// Start parallel thumbnail generation WITHOUT blocking the main thread
thumbnailGenerationService.generateThumbnails(
generateThumbnails(
arrayBuffer,
pageNumbers,
{
@ -318,11 +237,6 @@ const PageEditor = ({
},
// Progress callback (throttled for better performance)
(progress) => {
// Reduce console spam - only log every 10 completions
if (progress.completed % 10 === 0) {
console.log(`Thumbnail progress: ${progress.completed}/${progress.total} completed`);
}
// Batch process thumbnails to reduce main thread work
requestAnimationFrame(() => {
progress.thumbnails.forEach(({ pageNumber, thumbnail }) => {
@ -341,10 +255,8 @@ const PageEditor = ({
});
});
}
).then(thumbnails => {
console.log(`🎉 Web Worker thumbnail generation completed: ${thumbnails.length} thumbnails generated`);
}).catch(error => {
console.error('❌ Web Worker thumbnail generation failed:', error);
).catch(error => {
console.error('Web Worker thumbnail generation failed:', error);
setThumbnailGenerationStarted(false);
});
@ -353,8 +265,6 @@ const PageEditor = ({
setThumbnailGenerationStarted(false);
}
}, 0); // setTimeout with 0ms to defer to next tick
console.log('🚀 Thumbnail generation queued - UI remains responsive');
}, [mergedPdfDocument, activeFiles, thumbnailGenerationStarted, getThumbnailFromCache, addThumbnailToCache]);
// Start thumbnail generation after document loads and UI settles
@ -366,7 +276,7 @@ const PageEditor = ({
}
}, [mergedPdfDocument, startThumbnailGeneration, thumbnailGenerationStarted]);
// Cleanup shared PDF instance, workers, and cache when component unmounts or files change
// Cleanup shared PDF instance when files change (but keep thumbnails cached)
useEffect(() => {
return () => {
if (sharedPdfInstance) {
@ -374,12 +284,10 @@ const PageEditor = ({
setSharedPdfInstance(null);
}
setThumbnailGenerationStarted(false);
clearThumbnailCache(); // Clear cache when leaving/changing documents
// Cancel any ongoing Web Worker operations
thumbnailGenerationService.destroy();
// Stop generation but keep cache and workers alive for cross-tool persistence
stopGeneration();
};
}, [activeFiles, clearThumbnailCache]);
}, [activeFiles, stopGeneration]);
// Clear selections when files change
useEffect(() => {
@ -816,7 +724,10 @@ const PageEditor = ({
setActiveFiles([]);
setMergedPdfDocument(null);
setSelectedPages([]);
}, [setActiveFiles]);
// Only destroy thumbnails and workers on explicit PDF close
destroyThumbnails();
}, [setActiveFiles, destroyThumbnails]);
// PageEditorControls needs onExportSelected and onExportAll
const onExportSelected = useCallback(() => showExportPreview(true), [showExportPreview]);

View File

@ -7,7 +7,9 @@ import RotateRightIcon from '@mui/icons-material/RotateRight';
import DeleteIcon from '@mui/icons-material/Delete';
import ContentCutIcon from '@mui/icons-material/ContentCut';
import DragIndicatorIcon from '@mui/icons-material/DragIndicator';
import { PDFPage } from '../../../types/pageEditor';
import { PDFPage, PDFDocument } from '../../../types/pageEditor';
import { RotatePagesCommand, DeletePagesCommand, ToggleSplitCommand } from '../../../commands/pageCommands';
import { Command } from '../../../hooks/useUndoRedo';
import styles from './PageEditor.module.css';
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
@ -36,14 +38,14 @@ interface PageThumbnailProps {
onDrop: (e: React.DragEvent, pageId: string) => void;
onTogglePage: (pageId: string) => void;
onAnimateReorder: (pageId: string, targetIndex: number) => void;
onExecuteCommand: (command: any) => void;
onExecuteCommand: (command: Command) => void;
onSetStatus: (status: string) => void;
onSetMovingPage: (pageId: string | null) => void;
RotatePagesCommand: any;
DeletePagesCommand: any;
ToggleSplitCommand: any;
pdfDocument: any;
setPdfDocument: any;
RotatePagesCommand: typeof RotatePagesCommand;
DeletePagesCommand: typeof DeletePagesCommand;
ToggleSplitCommand: typeof ToggleSplitCommand;
pdfDocument: PDFDocument;
setPdfDocument: (doc: PDFDocument) => void;
}
const PageThumbnail = React.memo(({
@ -83,10 +85,6 @@ const PageThumbnail = React.memo(({
const handleThumbnailReady = (event: CustomEvent) => {
const { pageNumber, thumbnail, pageId } = event.detail;
if (pageNumber === page.pageNumber && pageId === page.id && !thumbnailUrl) {
// Reduce console spam during scrolling
if (pageNumber % 20 === 0) {
console.log(`Received Web Worker thumbnail for page ${page.pageNumber}`);
}
setThumbnailUrl(thumbnail);
}
};
@ -95,47 +93,6 @@ const PageThumbnail = React.memo(({
return () => window.removeEventListener('thumbnailReady', handleThumbnailReady as EventListener);
}, [page.pageNumber, page.id, thumbnailUrl]);
const loadThumbnailFromSharedPdf = async (sharedPdf: any, addThumbnailToCache?: (pageId: string, thumbnail: string) => void) => {
if (isLoadingThumbnail || thumbnailUrl) return;
setIsLoadingThumbnail(true);
try {
const thumbnail = await generateThumbnailFromPdf(sharedPdf);
// Cache the generated thumbnail
if (addThumbnailToCache) {
addThumbnailToCache(page.id, thumbnail);
}
} catch (error) {
console.error(`Failed to load thumbnail for page ${page.pageNumber}:`, error);
} finally {
setIsLoadingThumbnail(false);
}
};
const generateThumbnailFromPdf = async (pdf: any): Promise<string> => {
const pdfPage = await pdf.getPage(page.pageNumber);
const scale = 0.2; // Low quality for page editor
const viewport = pdfPage.getViewport({ scale });
const canvas = document.createElement('canvas');
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext('2d');
if (!context) {
throw new Error('Could not get canvas context');
}
await pdfPage.render({ canvasContext: context, viewport }).promise;
const thumbnail = canvas.toDataURL('image/jpeg', 0.8);
setThumbnailUrl(thumbnail);
console.log(`Thumbnail generated for page ${page.pageNumber}`);
return thumbnail;
};
// Register this component with pageRefs for animations
const pageElementRef = useCallback((element: HTMLDivElement | null) => {

View File

@ -0,0 +1,56 @@
import { useCallback } from 'react';
import { thumbnailGenerationService } from '../services/thumbnailGenerationService';
/**
* Hook for tools that want to use thumbnail generation
* Tools can choose whether to include visual features
*/
export function useThumbnailGeneration() {
const generateThumbnails = useCallback(async (
pdfArrayBuffer: ArrayBuffer,
pageNumbers: number[],
options: {
scale?: number;
quality?: number;
batchSize?: number;
parallelBatches?: number;
} = {},
onProgress?: (progress: { completed: number; total: number; thumbnails: any[] }) => void
) => {
return thumbnailGenerationService.generateThumbnails(
pdfArrayBuffer,
pageNumbers,
options,
onProgress
);
}, []);
const addThumbnailToCache = useCallback((pageId: string, thumbnail: string) => {
thumbnailGenerationService.addThumbnailToCache(pageId, thumbnail);
}, []);
const getThumbnailFromCache = useCallback((pageId: string): string | null => {
return thumbnailGenerationService.getThumbnailFromCache(pageId);
}, []);
const getCacheStats = useCallback(() => {
return thumbnailGenerationService.getCacheStats();
}, []);
const stopGeneration = useCallback(() => {
thumbnailGenerationService.stopGeneration();
}, []);
const destroyThumbnails = useCallback(() => {
thumbnailGenerationService.destroy();
}, []);
return {
generateThumbnails,
addThumbnailToCache,
getThumbnailFromCache,
getCacheStats,
stopGeneration,
destroyThumbnails
};
}

View File

@ -50,14 +50,12 @@ export class EnhancedPDFProcessingService {
// Check cache first
const cached = this.cache.get(fileKey);
if (cached) {
console.log('Cache hit for:', file.name);
this.updateMetrics('cacheHit');
return cached;
}
// Check if already processing
if (this.processing.has(fileKey)) {
console.log('Already processing:', file.name);
return null;
}
@ -281,11 +279,6 @@ export class EnhancedPDFProcessingService {
state.progress = 100;
this.notifyListeners();
// Queue background processing for remaining pages (only if there are any)
if (priorityCount < totalPages) {
this.queueBackgroundProcessing(file, priorityCount + 1, totalPages);
}
return this.createProcessedFile(file, pages, totalPages);
}
@ -354,11 +347,6 @@ export class EnhancedPDFProcessingService {
state.progress = 100;
this.notifyListeners();
// Queue remaining chunks for background processing (only if there are any)
if (firstChunkEnd < totalPages) {
this.queueChunkedBackgroundProcessing(file, firstChunkEnd + 1, totalPages, chunkSize);
}
return this.createProcessedFile(file, pages, totalPages);
}
@ -433,21 +421,6 @@ export class EnhancedPDFProcessingService {
};
}
/**
* Queue background processing for remaining pages
*/
private queueBackgroundProcessing(file: File, startPage: number, endPage: number): void {
// TODO: Implement background processing queue
console.log(`Queued background processing for ${file.name} pages ${startPage}-${endPage}`);
}
/**
* Queue chunked background processing
*/
private queueChunkedBackgroundProcessing(file: File, startPage: number, endPage: number, chunkSize: number): void {
// TODO: Implement chunked background processing
console.log(`Queued chunked background processing for ${file.name} pages ${startPage}-${endPage} in chunks of ${chunkSize}`);
}
/**
* Generate a unique, collision-resistant cache key

View File

@ -16,11 +16,22 @@ interface ThumbnailGenerationOptions {
parallelBatches?: number;
}
interface CachedThumbnail {
thumbnail: string;
lastUsed: number;
sizeBytes: number;
}
export class ThumbnailGenerationService {
private workers: Worker[] = [];
private activeJobs = new Map<string, { resolve: Function; reject: Function; onProgress?: Function }>();
private jobCounter = 0;
private isGenerating = false;
// Session-based thumbnail cache
private thumbnailCache = new Map<string, CachedThumbnail>();
private maxCacheSizeBytes = 1024 * 1024 * 1024; // 1GB cache limit
private currentCacheSize = 0;
constructor(private maxWorkers: number = 3) {
this.initializeWorkers();
@ -323,13 +334,97 @@ export class ThumbnailGenerationService {
}
/**
* Terminate all workers and stop generation
* Add thumbnail to cache with size management
*/
addThumbnailToCache(pageId: string, thumbnail: string): void {
const thumbnailSizeBytes = thumbnail.length * 0.75; // Rough base64 size estimate
const now = Date.now();
// Add new thumbnail
this.thumbnailCache.set(pageId, {
thumbnail,
lastUsed: now,
sizeBytes: thumbnailSizeBytes
});
this.currentCacheSize += thumbnailSizeBytes;
// If we exceed 1GB, trigger cleanup
if (this.currentCacheSize > this.maxCacheSizeBytes) {
this.cleanupThumbnailCache();
}
}
/**
* Get thumbnail from cache and update last used timestamp
*/
getThumbnailFromCache(pageId: string): string | null {
const cached = this.thumbnailCache.get(pageId);
if (!cached) return null;
// Update last used timestamp
cached.lastUsed = Date.now();
return cached.thumbnail;
}
/**
* Clean up cache using LRU eviction
*/
private cleanupThumbnailCache(): void {
const entries = Array.from(this.thumbnailCache.entries());
// Sort by last used (oldest first)
entries.sort(([, a], [, b]) => a.lastUsed - b.lastUsed);
this.thumbnailCache.clear();
this.currentCacheSize = 0;
const targetSize = this.maxCacheSizeBytes * 0.8; // Clean to 80% of limit
// Keep most recently used entries until we hit target size
for (let i = entries.length - 1; i >= 0 && this.currentCacheSize < targetSize; i--) {
const [key, value] = entries[i];
this.thumbnailCache.set(key, value);
this.currentCacheSize += value.sizeBytes;
}
}
/**
* Clear all cached thumbnails
*/
clearThumbnailCache(): void {
this.thumbnailCache.clear();
this.currentCacheSize = 0;
}
/**
* Get cache statistics
*/
getCacheStats() {
return {
entries: this.thumbnailCache.size,
totalSizeBytes: this.currentCacheSize,
maxSizeBytes: this.maxCacheSizeBytes
};
}
/**
* Stop generation but keep cache and workers alive
*/
stopGeneration(): void {
this.activeJobs.clear();
this.isGenerating = false;
}
/**
* Terminate all workers and clear cache (only on explicit cleanup)
*/
destroy(): void {
this.workers.forEach(worker => worker.terminate());
this.workers = [];
this.activeJobs.clear();
this.isGenerating = false;
this.clearThumbnailCache();
}
}