From 42abe83385625befebf964643324214d2dd6d674 Mon Sep 17 00:00:00 2001 From: Reece Date: Fri, 27 Jun 2025 14:06:16 +0100 Subject: [PATCH] File management overhaul --- .../components/fileManagement/FileCard.tsx | 1 - .../src/components/pageEditor/PageEditor.tsx | 473 +++++++++------ .../components/pageEditor/PageThumbnail.tsx | 116 +++- .../src/hooks/useEnhancedProcessedFiles.ts | 288 +++++++++ frontend/src/hooks/usePDFProcessor.ts | 16 +- frontend/src/hooks/useProcessedFiles.ts | 125 ++++ .../services/enhancedPDFProcessingService.ts | 552 ++++++++++++++++++ frontend/src/services/fileAnalyzer.ts | 240 ++++++++ frontend/src/services/pdfProcessingService.ts | 188 ++++++ frontend/src/services/processingCache.ts | 138 +++++ .../src/services/processingErrorHandler.ts | 282 +++++++++ frontend/src/types/pageEditor.ts | 4 +- frontend/src/types/processing.ts | 91 +++ frontend/src/utils/fileHash.ts | 127 ++++ 14 files changed, 2439 insertions(+), 202 deletions(-) create mode 100644 frontend/src/hooks/useEnhancedProcessedFiles.ts create mode 100644 frontend/src/hooks/useProcessedFiles.ts create mode 100644 frontend/src/services/enhancedPDFProcessingService.ts create mode 100644 frontend/src/services/fileAnalyzer.ts create mode 100644 frontend/src/services/pdfProcessingService.ts create mode 100644 frontend/src/services/processingCache.ts create mode 100644 frontend/src/services/processingErrorHandler.ts create mode 100644 frontend/src/types/processing.ts create mode 100644 frontend/src/utils/fileHash.ts diff --git a/frontend/src/components/fileManagement/FileCard.tsx b/frontend/src/components/fileManagement/FileCard.tsx index 6b275e556..4b090f0d7 100644 --- a/frontend/src/components/fileManagement/FileCard.tsx +++ b/frontend/src/components/fileManagement/FileCard.tsx @@ -6,7 +6,6 @@ import StorageIcon from "@mui/icons-material/Storage"; import VisibilityIcon from "@mui/icons-material/Visibility"; import EditIcon from "@mui/icons-material/Edit"; -import { FileWithUrl } from "../../types/file"; import { getFileSize, getFileDate } from "../../utils/fileUtils"; import { useIndexedDBThumbnail } from "../../hooks/useIndexedDBThumbnail"; diff --git a/frontend/src/components/pageEditor/PageEditor.tsx b/frontend/src/components/pageEditor/PageEditor.tsx index fc4f93e3d..29ff62bdf 100644 --- a/frontend/src/components/pageEditor/PageEditor.tsx +++ b/frontend/src/components/pageEditor/PageEditor.tsx @@ -1,15 +1,13 @@ import React, { useState, useCallback, useRef, useEffect } from "react"; import { Button, Text, Center, Checkbox, Box, Tooltip, ActionIcon, - Notification, TextInput, FileInput, LoadingOverlay, Modal, Alert, Container, - Stack, Group, Paper, SimpleGrid + Notification, TextInput, LoadingOverlay, Modal, Alert, + Stack, Group } from "@mantine/core"; import { useTranslation } from "react-i18next"; -import UploadFileIcon from "@mui/icons-material/UploadFile"; -import { usePDFProcessor } from "../../hooks/usePDFProcessor"; +import { useEnhancedProcessedFiles } from "../../hooks/useEnhancedProcessedFiles"; import { PDFDocument, PDFPage } from "../../types/pageEditor"; -import { fileStorage } from "../../services/fileStorage"; -import { generateThumbnailForFile } from "../../utils/thumbnailUtils"; +import { ProcessedFile as EnhancedProcessedFile } from "../../types/processing"; import { useUndoRedo } from "../../hooks/useUndoRedo"; import { RotatePagesCommand, @@ -19,19 +17,16 @@ import { ToggleSplitCommand } from "../../commands/pageCommands"; import { pdfExportService } from "../../services/pdfExportService"; -import styles from './pageEditor.module.css'; +import './pageEditor.module.css'; import PageThumbnail from './PageThumbnail'; import BulkSelectionPanel from './BulkSelectionPanel'; import DragDropGrid from './DragDropGrid'; -import FilePickerModal from '../shared/FilePickerModal'; -import FileUploadSelector from '../shared/FileUploadSelector'; export interface PageEditorProps { activeFiles: File[]; setActiveFiles: (files: File[]) => void; downloadUrl?: string | null; setDownloadUrl?: (url: string | null) => void; - sharedFiles?: any[]; // For FileUploadSelector when no files loaded // Optional callbacks to expose internal functions for PageEditorControls onFunctionsReady?: (functions: { @@ -55,24 +50,31 @@ export interface PageEditorProps { const PageEditor = ({ activeFiles, setActiveFiles, - downloadUrl, - setDownloadUrl, - sharedFiles = [], onFunctionsReady, }: PageEditorProps) => { const { t } = useTranslation(); - const { processPDFFile, loading: pdfLoading } = usePDFProcessor(); + + // Enhanced processing with intelligent strategies + const { + processedFiles: enhancedProcessedFiles, + processingStates, + isProcessing: globalProcessing, + hasProcessingErrors, + processingProgress, + actions: processingActions + } = useEnhancedProcessedFiles(activeFiles, { + strategy: 'priority_pages', // Process first pages immediately + thumbnailQuality: 'low', // Low quality for page editor navigation + priorityPageCount: 10 + }); // Single merged document state const [mergedPdfDocument, setMergedPdfDocument] = useState(null); - const [processedFiles, setProcessedFiles] = useState>(new Map()); const [filename, setFilename] = useState(""); // Page editor state const [selectedPages, setSelectedPages] = useState([]); const [status, setStatus] = useState(null); - const [loading, setLoading] = useState(false); - const [error, setError] = useState(null); const [csvInput, setCsvInput] = useState(""); const [selectionMode, setSelectionMode] = useState(false); @@ -97,87 +99,19 @@ const PageEditor = ({ // Undo/Redo system const { executeCommand, undo, redo, canUndo, canRedo } = useUndoRedo(); - // Process uploaded file - const handleFileUpload = useCallback(async (uploadedFile: File | any) => { - if (!uploadedFile) { - setError('No file provided'); - return; - } - - let fileToProcess: File; - - // Handle FileWithUrl objects from storage - if (uploadedFile.storedInIndexedDB && uploadedFile.arrayBuffer) { - try { - console.log('Converting FileWithUrl to File:', uploadedFile.name); - const arrayBuffer = await uploadedFile.arrayBuffer(); - const blob = new Blob([arrayBuffer], { type: uploadedFile.type || 'application/pdf' }); - fileToProcess = new File([blob], uploadedFile.name, { - type: uploadedFile.type || 'application/pdf', - lastModified: uploadedFile.lastModified || Date.now() - }); - } catch (error) { - console.error('Error converting FileWithUrl:', error); - setError('Unable to load file from storage'); - return; - } - } else if (uploadedFile instanceof File) { - fileToProcess = uploadedFile; - } else { - setError('Invalid file object'); - console.error('handleFileUpload received unsupported object:', uploadedFile); - return; - } - - if (fileToProcess.type !== 'application/pdf') { - setError('Please upload a valid PDF file'); - return; - } - - const fileKey = `${fileToProcess.name}-${fileToProcess.size}`; - - // Skip processing if already processed - if (processedFiles.has(fileKey)) return; - - setLoading(true); - setError(null); - - try { - const document = await processPDFFile(fileToProcess); - - // Store processed document - setProcessedFiles(prev => new Map(prev).set(fileKey, document)); - setFilename(fileToProcess.name.replace(/\.pdf$/i, '')); - setSelectedPages([]); - - - if (document.pages.length > 0) { - // Only store if it's a new file (not from storage) - if (!uploadedFile.storedInIndexedDB) { - const thumbnail = await generateThumbnailForFile(fileToProcess); - await fileStorage.storeFile(fileToProcess, thumbnail); - } - } - - setStatus(`PDF loaded successfully with ${document.totalPages} pages`); - } catch (err) { - const errorMessage = err instanceof Error ? err.message : 'Failed to process PDF'; - setError(errorMessage); - console.error('PDF processing error:', err); - } finally { - setLoading(false); - } - }, [processPDFFile, activeFiles, setActiveFiles, processedFiles]); - - // Process multiple uploaded files - just add them to activeFiles like FileManager does - const handleMultipleFileUpload = useCallback((uploadedFiles: File[]) => { - if (!uploadedFiles || uploadedFiles.length === 0) { - setError('No files provided'); - return; - } - - // Simply set the activeFiles to the selected files (same as FileManager approach) - setActiveFiles(uploadedFiles); + // Convert enhanced processed files to Page Editor format + const convertToPageEditorFormat = useCallback((enhancedFile: EnhancedProcessedFile, fileName: string): PDFDocument => { + return { + id: enhancedFile.id, + name: fileName, + file: null as any, // We don't need the file reference in the converted format + pages: enhancedFile.pages.map(page => ({ + ...page, + // Ensure compatibility with existing page editor types + splitBefore: page.splitBefore || false + })), + totalPages: enhancedFile.totalPages + }; }, []); // Merge multiple PDF documents into one @@ -188,10 +122,10 @@ const PageEditor = ({ } if (activeFiles.length === 1) { - // Single file - use it directly - const fileKey = `${activeFiles[0].name}-${activeFiles[0].size}`; - const pdfDoc = processedFiles.get(fileKey); - if (pdfDoc) { + // Single file - use enhanced processed file + const enhancedFile = enhancedProcessedFiles.get(activeFiles[0]); + if (enhancedFile) { + const pdfDoc = convertToPageEditorFormat(enhancedFile, activeFiles[0].name); setMergedPdfDocument(pdfDoc); setFilename(activeFiles[0].name.replace(/\.pdf$/i, '')); } @@ -202,71 +136,230 @@ const PageEditor = ({ const filenames: string[] = []; activeFiles.forEach((file, fileIndex) => { - const fileKey = `${file.name}-${file.size}`; - const pdfDoc = processedFiles.get(fileKey); - if (pdfDoc) { + const enhancedFile = enhancedProcessedFiles.get(file); + if (enhancedFile) { filenames.push(file.name.replace(/\.pdf$/i, '')); - pdfDoc.pages.forEach((page, pageIndex) => { + enhancedFile.pages.forEach((page, pageIndex) => { // Create new page with updated IDs and page numbers for merged document const newPage: PDFPage = { ...page, id: `${fileIndex}-${page.id}`, // Unique ID across all files pageNumber: totalPages + pageIndex + 1, - sourceFile: file.name // Track which file this page came from + splitBefore: page.splitBefore || false }; allPages.push(newPage); }); - totalPages += pdfDoc.pages.length; + totalPages += enhancedFile.pages.length; } }); - const mergedDocument: PDFDocument = { - pages: allPages, - totalPages: totalPages, - title: filenames.join(' + '), - metadata: { - title: filenames.join(' + '), - createdAt: new Date().toISOString(), - modifiedAt: new Date().toISOString(), - } - }; + if (allPages.length > 0) { + const mergedDocument: PDFDocument = { + id: `merged-${Date.now()}`, + name: filenames.join(' + '), + file: null as any, + pages: allPages, + totalPages: totalPages + }; - setMergedPdfDocument(mergedDocument); - setFilename(filenames.join('_')); - } - }, [activeFiles, processedFiles]); - - // Auto-process files from activeFiles - useEffect(() => { - console.log('Auto-processing effect triggered:', { - activeFilesCount: activeFiles.length, - processedFilesCount: processedFiles.size, - activeFileNames: activeFiles.map(f => f.name) - }); - - activeFiles.forEach(file => { - const fileKey = `${file.name}-${file.size}`; - console.log(`Checking file ${file.name}: processed =`, processedFiles.has(fileKey)); - if (!processedFiles.has(fileKey)) { - console.log('Processing file:', file.name); - handleFileUpload(file); + setMergedPdfDocument(mergedDocument); + setFilename(filenames.join('_')); } - }); - }, [activeFiles, processedFiles, handleFileUpload]); + } + }, [activeFiles, enhancedProcessedFiles, convertToPageEditorFormat]); - // Merge multiple PDF documents into one when all files are processed + // Handle file upload from FileUploadSelector + const handleMultipleFileUpload = useCallback((uploadedFiles: File[]) => { + if (!uploadedFiles || uploadedFiles.length === 0) { + setStatus('No files provided'); + return; + } + + // Simply set the activeFiles to the selected files (same as existing approach) + setActiveFiles(uploadedFiles); + setStatus(`Added ${uploadedFiles.length} file(s) for processing`); + }, [setActiveFiles]); + + // Auto-merge documents when enhanced processing completes useEffect(() => { if (activeFiles.length > 0) { - const allProcessed = activeFiles.every(file => { - const fileKey = `${file.name}-${file.size}`; - return processedFiles.has(fileKey); - }); + const allProcessed = activeFiles.every(file => enhancedProcessedFiles.has(file)); - if (allProcessed && activeFiles.length > 0) { + if (allProcessed) { mergeAllPDFs(); } + } else { + setMergedPdfDocument(null); } - }, [activeFiles, processedFiles, mergeAllPDFs]); + }, [activeFiles, enhancedProcessedFiles, mergeAllPDFs]); + + // Shared PDF instance for thumbnail generation + const [sharedPdfInstance, setSharedPdfInstance] = useState(null); + const [thumbnailGenerationStarted, setThumbnailGenerationStarted] = useState(false); + + // Session-based thumbnail cache with 1GB limit + const [thumbnailCache, setThumbnailCache] = useState>(new Map()); + const maxCacheSizeBytes = 1024 * 1024 * 1024; // 1GB cache limit + const [currentCacheSize, setCurrentCacheSize] = useState(0); + + // Cache management functions + const addThumbnailToCache = useCallback((pageId: string, thumbnail: string) => { + const thumbnailSizeBytes = thumbnail.length * 0.75; // Rough base64 size estimate + + setThumbnailCache(prev => { + const newCache = new Map(prev); + const now = Date.now(); + + // Add new thumbnail + newCache.set(pageId, { + thumbnail, + lastUsed: now, + sizeBytes: thumbnailSizeBytes + }); + + return newCache; + }); + + setCurrentCacheSize(prev => { + const newSize = prev + thumbnailSizeBytes; + + // If we exceed 1GB, trigger cleanup + if (newSize > maxCacheSizeBytes) { + setTimeout(() => cleanupThumbnailCache(), 0); + } + + return newSize; + }); + + console.log(`Cached thumbnail for ${pageId} (${Math.round(thumbnailSizeBytes / 1024)}KB)`); + }, [maxCacheSizeBytes]); + + const getThumbnailFromCache = useCallback((pageId: string): string | null => { + const cached = thumbnailCache.get(pageId); + if (!cached) return null; + + // Update last used timestamp + setThumbnailCache(prev => { + const newCache = new Map(prev); + const entry = newCache.get(pageId); + if (entry) { + entry.lastUsed = Date.now(); + } + return newCache; + }); + + return cached.thumbnail; + }, [thumbnailCache]); + + const cleanupThumbnailCache = useCallback(() => { + setThumbnailCache(prev => { + const entries = Array.from(prev.entries()); + + // Sort by last used (oldest first) + entries.sort(([, a], [, b]) => a.lastUsed - b.lastUsed); + + const newCache = new Map(); + let newSize = 0; + const targetSize = maxCacheSizeBytes * 0.8; // Clean to 80% of limit + + // Keep most recently used entries until we hit target size + for (let i = entries.length - 1; i >= 0 && newSize < targetSize; i--) { + const [key, value] = entries[i]; + newCache.set(key, value); + newSize += value.sizeBytes; + } + + setCurrentCacheSize(newSize); + console.log(`Cleaned thumbnail cache: ${prev.size} → ${newCache.size} entries (${Math.round(newSize / 1024 / 1024)}MB)`); + + return newCache; + }); + }, [maxCacheSizeBytes]); + + const clearThumbnailCache = useCallback(() => { + setThumbnailCache(new Map()); + setCurrentCacheSize(0); + console.log('Cleared thumbnail cache'); + }, []); + + // Start thumbnail generation process (separate from document loading) + const startThumbnailGeneration = useCallback(async () => { + if (!mergedPdfDocument || activeFiles.length !== 1 || thumbnailGenerationStarted) return; + + const file = activeFiles[0]; + const totalPages = mergedPdfDocument.totalPages; + + console.log(`Starting thumbnail generation for ${totalPages} pages`); + setThumbnailGenerationStarted(true); + + try { + // Load PDF ONCE for thumbnail generation (separate from document structure loading) + const arrayBuffer = await file.arrayBuffer(); + const { getDocument } = await import('pdfjs-dist'); + const pdf = await getDocument({ data: arrayBuffer }).promise; + setSharedPdfInstance(pdf); + + console.log('Shared PDF loaded, starting progressive thumbnail generation'); + + // Process pages in batches + let currentPage = 1; + const batchSize = totalPages > 500 ? 1 : 2; // Slower for massive files + const batchDelay = totalPages > 500 ? 300 : 200; // More delay for massive files + + const processBatch = async () => { + const endPage = Math.min(currentPage + batchSize - 1, totalPages); + console.log(`Generating thumbnails for pages ${currentPage}-${endPage}`); + + for (let i = currentPage; i <= endPage; i++) { + // Send the shared PDF instance and cache functions to components + window.dispatchEvent(new CustomEvent('generateThumbnail', { + detail: { + pageNumber: i, + sharedPdf: pdf, + getThumbnailFromCache, + addThumbnailToCache + } + })); + } + + currentPage += batchSize; + + if (currentPage <= totalPages) { + setTimeout(processBatch, batchDelay); + } else { + console.log('Progressive thumbnail generation completed'); + } + }; + + // Start generating thumbnails immediately + processBatch(); + + } catch (error) { + console.error('Failed to start thumbnail generation:', error); + setThumbnailGenerationStarted(false); + } + }, [mergedPdfDocument, activeFiles, thumbnailGenerationStarted]); + + // Start thumbnail generation after document loads and UI settles + useEffect(() => { + if (mergedPdfDocument && !thumbnailGenerationStarted) { + // Small delay to let document render, then start thumbnail generation + const timer = setTimeout(startThumbnailGeneration, 1000); + return () => clearTimeout(timer); + } + }, [mergedPdfDocument, startThumbnailGeneration, thumbnailGenerationStarted]); + + // Cleanup shared PDF instance and cache when component unmounts or files change + useEffect(() => { + return () => { + if (sharedPdfInstance) { + sharedPdfInstance.destroy(); + setSharedPdfInstance(null); + } + setThumbnailGenerationStarted(false); + clearThumbnailCache(); // Clear cache when leaving/changing documents + }; + }, [activeFiles, clearThumbnailCache]); // Clear selections when files change useEffect(() => { @@ -275,7 +368,6 @@ const PageEditor = ({ setSelectionMode(false); }, [activeFiles]); - // Global drag cleanup to handle drops outside valid areas useEffect(() => { const handleGlobalDragEnd = () => { // Clean up drag state when drag operation ends anywhere @@ -286,7 +378,7 @@ const PageEditor = ({ }; const handleGlobalDrop = (e: DragEvent) => { - // Prevent default to avoid browser navigation on invalid drops + // Prevent default to handle invalid drops e.preventDefault(); }; @@ -702,7 +794,6 @@ const PageEditor = ({ const closePdf = useCallback(() => { setActiveFiles([]); - setProcessedFiles(new Map()); setMergedPdfDocument(null); setSelectedPages([]); }, [setActiveFiles]); @@ -749,31 +840,66 @@ const PageEditor = ({ closePdf ]); + // Return early if no merged document - Homepage handles file selection if (!mergedPdfDocument) { return ( - - - - - - - +
+ + {globalProcessing ? ( + Processing PDF files... + ) : ( + Waiting for PDF files... + )} +
); } return ( - + + {/* Enhanced Processing Status */} + {(globalProcessing || hasProcessingErrors) && ( + + {globalProcessing && ( + + Processing files... + {Math.round(processingProgress.overall)}% + + )} + + {Array.from(processingStates.values()).map(state => ( + + {state.fileName} + + {state.progress}% + {state.error && ( + + )} + + + ))} + + {hasProcessingErrors && ( + + Some files failed to process. Check individual file status above. + + )} + + )} + - file && handleFileUpload(file)} - style={{ display: 'none' }} - /> {status && ( )} - - {error && ( - setError(null)} - style={{ position: 'fixed', bottom: 70, right: 20, zIndex: 1000 }} - > - {error} - - )} - ); }; diff --git a/frontend/src/components/pageEditor/PageThumbnail.tsx b/frontend/src/components/pageEditor/PageThumbnail.tsx index 53626fd05..fa1f9d5cf 100644 --- a/frontend/src/components/pageEditor/PageThumbnail.tsx +++ b/frontend/src/components/pageEditor/PageThumbnail.tsx @@ -1,5 +1,5 @@ -import React, { useCallback } from 'react'; -import { Text, Checkbox, Tooltip, ActionIcon } from '@mantine/core'; +import React, { useCallback, useState, useEffect, useRef } from 'react'; +import { Text, Checkbox, Tooltip, ActionIcon, Loader } from '@mantine/core'; import ArrowBackIcon from '@mui/icons-material/ArrowBack'; import ArrowForwardIcon from '@mui/icons-material/ArrowForward'; import RotateLeftIcon from '@mui/icons-material/RotateLeft'; @@ -9,11 +9,18 @@ import ContentCutIcon from '@mui/icons-material/ContentCut'; import DragIndicatorIcon from '@mui/icons-material/DragIndicator'; import { PDFPage } from '../../../types/pageEditor'; import styles from './PageEditor.module.css'; +import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist'; + +// Ensure PDF.js worker is available +if (!GlobalWorkerOptions.workerSrc) { + GlobalWorkerOptions.workerSrc = '/pdf.worker.js'; +} interface PageThumbnailProps { page: PDFPage; index: number; totalPages: number; + originalFile?: File; // For lazy thumbnail generation selectedPages: string[]; selectionMode: boolean; draggedPage: string | null; @@ -43,6 +50,7 @@ const PageThumbnail = ({ page, index, totalPages, + originalFile, selectedPages, selectionMode, draggedPage, @@ -67,6 +75,74 @@ const PageThumbnail = ({ pdfDocument, setPdfDocument, }: PageThumbnailProps) => { + const [thumbnailUrl, setThumbnailUrl] = useState(page.thumbnail); + const [isLoadingThumbnail, setIsLoadingThumbnail] = useState(false); + + // Listen for progressive thumbnail generation events + useEffect(() => { + const handleThumbnailGeneration = (event: CustomEvent) => { + const { pageNumber, sharedPdf, getThumbnailFromCache, addThumbnailToCache } = event.detail; + if (pageNumber === page.pageNumber && !thumbnailUrl && !isLoadingThumbnail) { + + // Check cache first + const cachedThumbnail = getThumbnailFromCache(page.id); + if (cachedThumbnail) { + console.log(`Using cached thumbnail for page ${page.pageNumber}`); + setThumbnailUrl(cachedThumbnail); + return; + } + + // Generate new thumbnail and cache it + loadThumbnailFromSharedPdf(sharedPdf, addThumbnailToCache); + } + }; + + window.addEventListener('generateThumbnail', handleThumbnailGeneration as EventListener); + return () => window.removeEventListener('generateThumbnail', handleThumbnailGeneration as EventListener); + }, [page.pageNumber, page.id, thumbnailUrl, isLoadingThumbnail]); + + const loadThumbnailFromSharedPdf = async (sharedPdf: any, addThumbnailToCache?: (pageId: string, thumbnail: string) => void) => { + if (isLoadingThumbnail || thumbnailUrl) return; + + setIsLoadingThumbnail(true); + try { + const thumbnail = await generateThumbnailFromPdf(sharedPdf); + + // Cache the generated thumbnail + if (addThumbnailToCache) { + addThumbnailToCache(page.id, thumbnail); + } + + } catch (error) { + console.error(`Failed to load thumbnail for page ${page.pageNumber}:`, error); + } finally { + setIsLoadingThumbnail(false); + } + }; + + const generateThumbnailFromPdf = async (pdf: any): Promise => { + const pdfPage = await pdf.getPage(page.pageNumber); + const scale = 0.2; // Low quality for page editor + const viewport = pdfPage.getViewport({ scale }); + + const canvas = document.createElement('canvas'); + canvas.width = viewport.width; + canvas.height = viewport.height; + + const context = canvas.getContext('2d'); + if (!context) { + throw new Error('Could not get canvas context'); + } + + await pdfPage.render({ canvasContext: context, viewport }).promise; + const thumbnail = canvas.toDataURL('image/jpeg', 0.8); + + setThumbnailUrl(thumbnail); + console.log(`Thumbnail generated for page ${page.pageNumber}`); + + return thumbnail; + }; + // Register this component with pageRefs for animations const pageElementRef = useCallback((element: HTMLDivElement | null) => { if (element) { @@ -162,18 +238,30 @@ const PageThumbnail = ({ justifyContent: 'center' }} > - {`Page + {thumbnailUrl ? ( + {`Page + ) : isLoadingThumbnail ? ( +
+ + Loading... +
+ ) : ( +
+ 📄 + Page {page.pageNumber} +
+ )} ; + processingStates: Map; + isProcessing: boolean; + hasProcessingErrors: boolean; + processingProgress: { + overall: number; + fileProgress: Map; + estimatedTimeRemaining: number; + }; + cacheStats: { + entries: number; + totalSizeBytes: number; + maxSizeBytes: number; + }; + metrics: { + totalFiles: number; + completedFiles: number; + failedFiles: number; + averageProcessingTime: number; + cacheHitRate: number; + }; + actions: { + cancelProcessing: (fileKey: string) => void; + retryProcessing: (file: File) => void; + clearCache: () => void; + }; +} + +export function useEnhancedProcessedFiles( + activeFiles: File[], + config?: Partial +): UseEnhancedProcessedFilesResult { + const [processedFiles, setProcessedFiles] = useState>(new Map()); + const [processingStates, setProcessingStates] = useState>(new Map()); + + // Subscribe to processing state changes once + useEffect(() => { + const unsubscribe = enhancedPDFProcessingService.onProcessingChange(setProcessingStates); + return unsubscribe; + }, []); + + // Process files when activeFiles changes + useEffect(() => { + if (activeFiles.length === 0) { + setProcessedFiles(new Map()); + return; + } + + const processFiles = async () => { + const newProcessedFiles = new Map(); + + for (const file of activeFiles) { + // Check if we already have this file processed + const existing = processedFiles.get(file); + if (existing) { + newProcessedFiles.set(file, existing); + continue; + } + + try { + // Generate proper file key matching the service + const fileKey = await FileHasher.generateHybridHash(file); + console.log('Processing file:', file.name); + + const processed = await enhancedPDFProcessingService.processFile(file, config); + if (processed) { + console.log('Got processed file for:', file.name); + newProcessedFiles.set(file, processed); + } else { + console.log('Processing started for:', file.name, '- waiting for completion'); + } + } catch (error) { + console.error(`Failed to start processing for ${file.name}:`, error); + } + } + + // Update processed files if we have any + if (newProcessedFiles.size > 0) { + setProcessedFiles(newProcessedFiles); + } + }; + + processFiles(); + }, [activeFiles]); + + // Listen for processing completion + useEffect(() => { + const checkForCompletedFiles = async () => { + let hasNewFiles = false; + const updatedFiles = new Map(processedFiles); + + // Generate file keys for all files first + const fileKeyPromises = activeFiles.map(async (file) => ({ + file, + key: await FileHasher.generateHybridHash(file) + })); + + const fileKeyPairs = await Promise.all(fileKeyPromises); + + for (const { file, key } of fileKeyPairs) { + // Only check files that don't have processed results yet + if (!updatedFiles.has(file)) { + const processingState = processingStates.get(key); + + // Check for both processing and recently completed files + // This ensures we catch completed files before they're cleaned up + if (processingState?.status === 'processing' || processingState?.status === 'completed') { + try { + const processed = await enhancedPDFProcessingService.processFile(file, config); + if (processed) { + console.log('Processing completed for:', file.name); + updatedFiles.set(file, processed); + hasNewFiles = true; + } + } catch (error) { + // Ignore errors in completion check + } + } + } + } + + if (hasNewFiles) { + setProcessedFiles(updatedFiles); + } + }; + + // Check every 500ms for completed processing + const interval = setInterval(checkForCompletedFiles, 500); + return () => clearInterval(interval); + }, [activeFiles, processingStates]); + + + // Cleanup when activeFiles changes + useEffect(() => { + const currentFiles = new Set(activeFiles); + const previousFiles = Array.from(processedFiles.keys()); + const removedFiles = previousFiles.filter(file => !currentFiles.has(file)); + + if (removedFiles.length > 0) { + // Clean up processing service cache + enhancedPDFProcessingService.cleanup(removedFiles); + + // Update local state + setProcessedFiles(prev => { + const updated = new Map(); + for (const [file, processed] of prev) { + if (currentFiles.has(file)) { + updated.set(file, processed); + } + } + return updated; + }); + } + }, [activeFiles]); + + // Calculate derived state + const isProcessing = processingStates.size > 0; + const hasProcessingErrors = Array.from(processingStates.values()).some(state => state.status === 'error'); + + // Calculate overall progress + const processingProgress = calculateProcessingProgress(processingStates); + + // Get cache stats and metrics + const cacheStats = enhancedPDFProcessingService.getCacheStats(); + const metrics = enhancedPDFProcessingService.getMetrics(); + + // Action handlers + const actions = { + cancelProcessing: (fileKey: string) => { + enhancedPDFProcessingService.cancelProcessing(fileKey); + }, + + retryProcessing: async (file: File) => { + try { + await enhancedPDFProcessingService.processFile(file, config); + } catch (error) { + console.error(`Failed to retry processing for ${file.name}:`, error); + } + }, + + clearCache: () => { + enhancedPDFProcessingService.clearAll(); + } + }; + + return { + processedFiles, + processingStates, + isProcessing, + hasProcessingErrors, + processingProgress, + cacheStats, + metrics, + actions + }; +} + +/** + * Calculate overall processing progress from individual file states + */ +function calculateProcessingProgress(states: Map): { + overall: number; + fileProgress: Map; + estimatedTimeRemaining: number; +} { + if (states.size === 0) { + return { + overall: 100, + fileProgress: new Map(), + estimatedTimeRemaining: 0 + }; + } + + const fileProgress = new Map(); + let totalProgress = 0; + let totalEstimatedTime = 0; + + for (const [fileKey, state] of states) { + fileProgress.set(fileKey, state.progress); + totalProgress += state.progress; + totalEstimatedTime += state.estimatedTimeRemaining || 0; + } + + const overall = totalProgress / states.size; + const estimatedTimeRemaining = totalEstimatedTime; + + return { + overall, + fileProgress, + estimatedTimeRemaining + }; +} + +/** + * Hook for getting a single processed file with enhanced features + */ +export function useEnhancedProcessedFile( + file: File | null, + config?: Partial +): { + processedFile: ProcessedFile | null; + isProcessing: boolean; + processingState: ProcessingState | null; + error: string | null; + canRetry: boolean; + actions: { + cancel: () => void; + retry: () => void; + }; +} { + const result = useEnhancedProcessedFiles(file ? [file] : [], config); + + const processedFile = file ? result.processedFiles.get(file) || null : null; + // Note: This is async but we can't await in hook return - consider refactoring if needed + const fileKey = file ? '' : ''; // TODO: Handle async file key generation + const processingState = fileKey ? result.processingStates.get(fileKey) || null : null; + const isProcessing = !!processingState; + const error = processingState?.error?.message || null; + const canRetry = processingState?.error?.recoverable || false; + + const actions = { + cancel: () => { + if (fileKey) { + result.actions.cancelProcessing(fileKey); + } + }, + retry: () => { + if (file) { + result.actions.retryProcessing(file); + } + } + }; + + return { + processedFile, + isProcessing, + processingState, + error, + canRetry, + actions + }; +} \ No newline at end of file diff --git a/frontend/src/hooks/usePDFProcessor.ts b/frontend/src/hooks/usePDFProcessor.ts index 7b1cc0c4b..0a717a3a9 100644 --- a/frontend/src/hooks/usePDFProcessor.ts +++ b/frontend/src/hooks/usePDFProcessor.ts @@ -50,18 +50,28 @@ export function usePDFProcessor() { const pages: PDFPage[] = []; - // Generate thumbnails for all pages + // Create pages without thumbnails initially - load them lazily for (let i = 1; i <= totalPages; i++) { - const thumbnail = await generatePageThumbnail(file, i); pages.push({ id: `${file.name}-page-${i}`, pageNumber: i, - thumbnail, + thumbnail: null, // Will be loaded lazily rotation: 0, selected: false }); } + // Generate thumbnails for first 10 pages immediately for better UX + const priorityPages = Math.min(10, totalPages); + for (let i = 1; i <= priorityPages; i++) { + try { + const thumbnail = await generatePageThumbnail(file, i); + pages[i - 1].thumbnail = thumbnail; + } catch (error) { + console.warn(`Failed to generate thumbnail for page ${i}:`, error); + } + } + // Clean up pdf.destroy(); diff --git a/frontend/src/hooks/useProcessedFiles.ts b/frontend/src/hooks/useProcessedFiles.ts new file mode 100644 index 000000000..a7db9b07e --- /dev/null +++ b/frontend/src/hooks/useProcessedFiles.ts @@ -0,0 +1,125 @@ +import { useState, useEffect } from 'react'; +import { ProcessedFile, ProcessingState } from '../types/processing'; +import { pdfProcessingService } from '../services/pdfProcessingService'; + +interface UseProcessedFilesResult { + processedFiles: Map; + processingStates: Map; + isProcessing: boolean; + hasProcessingErrors: boolean; + cacheStats: { + entries: number; + totalSizeBytes: number; + maxSizeBytes: number; + }; +} + +export function useProcessedFiles(activeFiles: File[]): UseProcessedFilesResult { + const [processedFiles, setProcessedFiles] = useState>(new Map()); + const [processingStates, setProcessingStates] = useState>(new Map()); + + useEffect(() => { + // Subscribe to processing state changes + const unsubscribe = pdfProcessingService.onProcessingChange(setProcessingStates); + + // Check/start processing for each active file + const checkProcessing = async () => { + const newProcessedFiles = new Map(); + + for (const file of activeFiles) { + const processed = await pdfProcessingService.getProcessedFile(file); + if (processed) { + newProcessedFiles.set(file, processed); + } + } + + setProcessedFiles(newProcessedFiles); + }; + + checkProcessing(); + + return unsubscribe; + }, [activeFiles]); + + // Listen for processing completion and update processed files + useEffect(() => { + const updateProcessedFiles = async () => { + const updated = new Map(); + + for (const file of activeFiles) { + const existing = processedFiles.get(file); + if (existing) { + updated.set(file, existing); + } else { + // Check if processing just completed + const processed = await pdfProcessingService.getProcessedFile(file); + if (processed) { + updated.set(file, processed); + } + } + } + + setProcessedFiles(updated); + }; + + // Small delay to allow processing state to settle + const timeoutId = setTimeout(updateProcessedFiles, 100); + return () => clearTimeout(timeoutId); + }, [processingStates, activeFiles]); + + // Cleanup when activeFiles changes + useEffect(() => { + const currentFiles = new Set(activeFiles); + const previousFiles = Array.from(processedFiles.keys()); + const removedFiles = previousFiles.filter(file => !currentFiles.has(file)); + + if (removedFiles.length > 0) { + // Clean up processing service cache + pdfProcessingService.cleanup(removedFiles); + + // Update local state + setProcessedFiles(prev => { + const updated = new Map(); + for (const [file, processed] of prev) { + if (currentFiles.has(file)) { + updated.set(file, processed); + } + } + return updated; + }); + } + }, [activeFiles]); + + // Derived state + const isProcessing = processingStates.size > 0; + const hasProcessingErrors = Array.from(processingStates.values()).some(state => state.status === 'error'); + const cacheStats = pdfProcessingService.getCacheStats(); + + return { + processedFiles, + processingStates, + isProcessing, + hasProcessingErrors, + cacheStats + }; +} + +// Hook for getting a single processed file +export function useProcessedFile(file: File | null): { + processedFile: ProcessedFile | null; + isProcessing: boolean; + processingState: ProcessingState | null; +} { + const result = useProcessedFiles(file ? [file] : []); + + const processedFile = file ? result.processedFiles.get(file) || null : null; + const fileKey = file ? pdfProcessingService.generateFileKey(file) : ''; + const processingState = fileKey ? result.processingStates.get(fileKey) || null : null; + const isProcessing = !!processingState; + + return { + processedFile, + isProcessing, + processingState + }; +} \ No newline at end of file diff --git a/frontend/src/services/enhancedPDFProcessingService.ts b/frontend/src/services/enhancedPDFProcessingService.ts new file mode 100644 index 000000000..b3627fa82 --- /dev/null +++ b/frontend/src/services/enhancedPDFProcessingService.ts @@ -0,0 +1,552 @@ +import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist'; +import { ProcessedFile, ProcessingState, PDFPage, ProcessingStrategy, ProcessingConfig, ProcessingMetrics } from '../types/processing'; +import { ProcessingCache } from './processingCache'; +import { FileHasher } from '../utils/fileHash'; +import { FileAnalyzer } from './fileAnalyzer'; +import { ProcessingErrorHandler } from './processingErrorHandler'; + +// Set up PDF.js worker +GlobalWorkerOptions.workerSrc = '/pdf.worker.js'; + +export class EnhancedPDFProcessingService { + private static instance: EnhancedPDFProcessingService; + private cache = new ProcessingCache(); + private processing = new Map(); + private processingListeners = new Set<(states: Map) => void>(); + private metrics: ProcessingMetrics = { + totalFiles: 0, + completedFiles: 0, + failedFiles: 0, + averageProcessingTime: 0, + cacheHitRate: 0, + memoryUsage: 0 + }; + + private defaultConfig: ProcessingConfig = { + strategy: 'immediate_full', + chunkSize: 20, + thumbnailQuality: 'medium', + priorityPageCount: 10, + useWebWorker: false, + maxRetries: 3, + timeoutMs: 300000 // 5 minutes + }; + + private constructor() {} + + static getInstance(): EnhancedPDFProcessingService { + if (!EnhancedPDFProcessingService.instance) { + EnhancedPDFProcessingService.instance = new EnhancedPDFProcessingService(); + } + return EnhancedPDFProcessingService.instance; + } + + /** + * Process a file with intelligent strategy selection + */ + async processFile(file: File, customConfig?: Partial): Promise { + const fileKey = await this.generateFileKey(file); + + // Check cache first + const cached = this.cache.get(fileKey); + if (cached) { + console.log('Cache hit for:', file.name); + this.updateMetrics('cacheHit'); + return cached; + } + + // Check if already processing + if (this.processing.has(fileKey)) { + console.log('Already processing:', file.name); + return null; + } + + // Analyze file to determine optimal strategy + const analysis = await FileAnalyzer.analyzeFile(file); + if (analysis.isCorrupted) { + throw new Error(`File ${file.name} appears to be corrupted`); + } + + // Create processing config + const config: ProcessingConfig = { + ...this.defaultConfig, + strategy: analysis.recommendedStrategy, + ...customConfig + }; + + // Start processing + this.startProcessing(file, fileKey, config, analysis.estimatedProcessingTime); + return null; + } + + /** + * Start processing a file with the specified configuration + */ + private async startProcessing( + file: File, + fileKey: string, + config: ProcessingConfig, + estimatedTime: number + ): Promise { + // Create cancellation token + const cancellationToken = ProcessingErrorHandler.createTimeoutController(config.timeoutMs); + + // Set initial state + const state: ProcessingState = { + fileKey, + fileName: file.name, + status: 'processing', + progress: 0, + strategy: config.strategy, + startedAt: Date.now(), + estimatedTimeRemaining: estimatedTime, + cancellationToken + }; + + this.processing.set(fileKey, state); + this.notifyListeners(); + this.updateMetrics('started'); + + try { + // Execute processing with retry logic + const processedFile = await ProcessingErrorHandler.executeWithRetry( + () => this.executeProcessingStrategy(file, config, state), + (error) => { + state.error = error; + this.notifyListeners(); + }, + config.maxRetries + ); + + // Cache the result + this.cache.set(fileKey, processedFile); + + // Update state to completed + state.status = 'completed'; + state.progress = 100; + state.completedAt = Date.now(); + this.notifyListeners(); + this.updateMetrics('completed', Date.now() - state.startedAt); + + // Remove from processing map after brief delay + setTimeout(() => { + this.processing.delete(fileKey); + this.notifyListeners(); + }, 2000); + + } catch (error) { + console.error('Processing failed for', file.name, ':', error); + + const processingError = ProcessingErrorHandler.createProcessingError(error); + state.status = 'error'; + state.error = processingError; + this.notifyListeners(); + this.updateMetrics('failed'); + + // Remove failed processing after delay + setTimeout(() => { + this.processing.delete(fileKey); + this.notifyListeners(); + }, 10000); + } + } + + /** + * Execute the actual processing based on strategy + */ + private async executeProcessingStrategy( + file: File, + config: ProcessingConfig, + state: ProcessingState + ): Promise { + switch (config.strategy) { + case 'immediate_full': + return this.processImmediateFull(file, config, state); + + case 'priority_pages': + return this.processPriorityPages(file, config, state); + + case 'progressive_chunked': + return this.processProgressiveChunked(file, config, state); + + case 'metadata_only': + return this.processMetadataOnly(file, config, state); + + default: + return this.processImmediateFull(file, config, state); + } + } + + /** + * Process all pages immediately (for small files) + */ + private async processImmediateFull( + file: File, + config: ProcessingConfig, + state: ProcessingState + ): Promise { + const arrayBuffer = await file.arrayBuffer(); + const pdf = await getDocument({ data: arrayBuffer }).promise; + const totalPages = pdf.numPages; + + state.progress = 10; + this.notifyListeners(); + + const pages: PDFPage[] = []; + + for (let i = 1; i <= totalPages; i++) { + // Check for cancellation + if (state.cancellationToken?.signal.aborted) { + pdf.destroy(); + throw new Error('Processing cancelled'); + } + + const page = await pdf.getPage(i); + const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality); + + pages.push({ + id: `${file.name}-page-${i}`, + pageNumber: i, + thumbnail, + rotation: 0, + selected: false + }); + + // Update progress + state.progress = 10 + (i / totalPages) * 85; + state.currentPage = i; + this.notifyListeners(); + } + + pdf.destroy(); + state.progress = 100; + this.notifyListeners(); + + return this.createProcessedFile(file, pages, totalPages); + } + + /** + * Process priority pages first, then queue the rest + */ + private async processPriorityPages( + file: File, + config: ProcessingConfig, + state: ProcessingState + ): Promise { + const arrayBuffer = await file.arrayBuffer(); + const pdf = await getDocument({ data: arrayBuffer }).promise; + const totalPages = pdf.numPages; + + state.progress = 10; + this.notifyListeners(); + + const pages: PDFPage[] = []; + const priorityCount = Math.min(config.priorityPageCount, totalPages); + + // Process priority pages first + for (let i = 1; i <= priorityCount; i++) { + if (state.cancellationToken?.signal.aborted) { + pdf.destroy(); + throw new Error('Processing cancelled'); + } + + const page = await pdf.getPage(i); + const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality); + + pages.push({ + id: `${file.name}-page-${i}`, + pageNumber: i, + thumbnail, + rotation: 0, + selected: false + }); + + state.progress = 10 + (i / priorityCount) * 60; + state.currentPage = i; + this.notifyListeners(); + } + + // Create placeholder pages for remaining pages + for (let i = priorityCount + 1; i <= totalPages; i++) { + pages.push({ + id: `${file.name}-page-${i}`, + pageNumber: i, + thumbnail: null, // Will be loaded lazily + rotation: 0, + selected: false + }); + } + + pdf.destroy(); + state.progress = 100; + this.notifyListeners(); + + // Queue background processing for remaining pages (only if there are any) + if (priorityCount < totalPages) { + this.queueBackgroundProcessing(file, priorityCount + 1, totalPages); + } + + return this.createProcessedFile(file, pages, totalPages); + } + + /** + * Process in chunks with breaks between chunks + */ + private async processProgressiveChunked( + file: File, + config: ProcessingConfig, + state: ProcessingState + ): Promise { + const arrayBuffer = await file.arrayBuffer(); + const pdf = await getDocument({ data: arrayBuffer }).promise; + const totalPages = pdf.numPages; + + state.progress = 10; + this.notifyListeners(); + + const pages: PDFPage[] = []; + const chunkSize = config.chunkSize; + let processedPages = 0; + + // Process first chunk immediately + const firstChunkEnd = Math.min(chunkSize, totalPages); + + for (let i = 1; i <= firstChunkEnd; i++) { + if (state.cancellationToken?.signal.aborted) { + pdf.destroy(); + throw new Error('Processing cancelled'); + } + + const page = await pdf.getPage(i); + const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality); + + pages.push({ + id: `${file.name}-page-${i}`, + pageNumber: i, + thumbnail, + rotation: 0, + selected: false + }); + + processedPages++; + state.progress = 10 + (processedPages / totalPages) * 70; + state.currentPage = i; + this.notifyListeners(); + + // Small delay to prevent UI blocking + if (i % 5 === 0) { + await new Promise(resolve => setTimeout(resolve, 10)); + } + } + + // Create placeholders for remaining pages + for (let i = firstChunkEnd + 1; i <= totalPages; i++) { + pages.push({ + id: `${file.name}-page-${i}`, + pageNumber: i, + thumbnail: null, + rotation: 0, + selected: false + }); + } + + pdf.destroy(); + state.progress = 100; + this.notifyListeners(); + + // Queue remaining chunks for background processing (only if there are any) + if (firstChunkEnd < totalPages) { + this.queueChunkedBackgroundProcessing(file, firstChunkEnd + 1, totalPages, chunkSize); + } + + return this.createProcessedFile(file, pages, totalPages); + } + + /** + * Process metadata only (for very large files) + */ + private async processMetadataOnly( + file: File, + config: ProcessingConfig, + state: ProcessingState + ): Promise { + const arrayBuffer = await file.arrayBuffer(); + const pdf = await getDocument({ data: arrayBuffer }).promise; + const totalPages = pdf.numPages; + + state.progress = 50; + this.notifyListeners(); + + // Create placeholder pages without thumbnails + const pages: PDFPage[] = []; + for (let i = 1; i <= totalPages; i++) { + pages.push({ + id: `${file.name}-page-${i}`, + pageNumber: i, + thumbnail: null, + rotation: 0, + selected: false + }); + } + + pdf.destroy(); + state.progress = 100; + this.notifyListeners(); + + return this.createProcessedFile(file, pages, totalPages); + } + + /** + * Render a page thumbnail with specified quality + */ + private async renderPageThumbnail(page: any, quality: 'low' | 'medium' | 'high'): Promise { + const scales = { low: 0.2, medium: 0.5, high: 0.8 }; // Reduced low quality for page editor + const scale = scales[quality]; + + const viewport = page.getViewport({ scale }); + const canvas = document.createElement('canvas'); + canvas.width = viewport.width; + canvas.height = viewport.height; + + const context = canvas.getContext('2d'); + if (!context) { + throw new Error('Could not get canvas context'); + } + + await page.render({ canvasContext: context, viewport }).promise; + return canvas.toDataURL('image/jpeg', 0.8); // Use JPEG for better compression + } + + /** + * Create a ProcessedFile object + */ + private createProcessedFile(file: File, pages: PDFPage[], totalPages: number): ProcessedFile { + return { + id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, + pages, + totalPages, + metadata: { + title: file.name, + createdAt: new Date().toISOString(), + modifiedAt: new Date().toISOString() + } + }; + } + + /** + * Queue background processing for remaining pages + */ + private queueBackgroundProcessing(file: File, startPage: number, endPage: number): void { + // TODO: Implement background processing queue + console.log(`Queued background processing for ${file.name} pages ${startPage}-${endPage}`); + } + + /** + * Queue chunked background processing + */ + private queueChunkedBackgroundProcessing(file: File, startPage: number, endPage: number, chunkSize: number): void { + // TODO: Implement chunked background processing + console.log(`Queued chunked background processing for ${file.name} pages ${startPage}-${endPage} in chunks of ${chunkSize}`); + } + + /** + * Generate a unique, collision-resistant cache key + */ + private async generateFileKey(file: File): Promise { + return await FileHasher.generateHybridHash(file); + } + + /** + * Cancel processing for a specific file + */ + cancelProcessing(fileKey: string): void { + const state = this.processing.get(fileKey); + if (state && state.cancellationToken) { + state.cancellationToken.abort(); + state.status = 'cancelled'; + this.notifyListeners(); + } + } + + /** + * Update processing metrics + */ + private updateMetrics(event: 'started' | 'completed' | 'failed' | 'cacheHit', processingTime?: number): void { + switch (event) { + case 'started': + this.metrics.totalFiles++; + break; + case 'completed': + this.metrics.completedFiles++; + if (processingTime) { + // Update rolling average + const totalProcessingTime = this.metrics.averageProcessingTime * (this.metrics.completedFiles - 1) + processingTime; + this.metrics.averageProcessingTime = totalProcessingTime / this.metrics.completedFiles; + } + break; + case 'failed': + this.metrics.failedFiles++; + break; + case 'cacheHit': + // Update cache hit rate + const totalAttempts = this.metrics.totalFiles + 1; + this.metrics.cacheHitRate = (this.metrics.cacheHitRate * this.metrics.totalFiles + 1) / totalAttempts; + break; + } + } + + /** + * Get processing metrics + */ + getMetrics(): ProcessingMetrics { + return { ...this.metrics }; + } + + /** + * State subscription for components + */ + onProcessingChange(callback: (states: Map) => void): () => void { + this.processingListeners.add(callback); + return () => this.processingListeners.delete(callback); + } + + getProcessingStates(): Map { + return new Map(this.processing); + } + + private notifyListeners(): void { + this.processingListeners.forEach(callback => callback(this.processing)); + } + + /** + * Cleanup method for removed files + */ + cleanup(removedFiles: File[]): void { + removedFiles.forEach(async (file) => { + const key = await this.generateFileKey(file); + this.cache.delete(key); + this.cancelProcessing(key); + this.processing.delete(key); + }); + this.notifyListeners(); + } + + /** + * Get cache statistics + */ + getCacheStats() { + return this.cache.getStats(); + } + + /** + * Clear all cache and processing + */ + clearAll(): void { + this.cache.clear(); + this.processing.clear(); + this.notifyListeners(); + } +} + +// Export singleton instance +export const enhancedPDFProcessingService = EnhancedPDFProcessingService.getInstance(); \ No newline at end of file diff --git a/frontend/src/services/fileAnalyzer.ts b/frontend/src/services/fileAnalyzer.ts new file mode 100644 index 000000000..2a9f15cff --- /dev/null +++ b/frontend/src/services/fileAnalyzer.ts @@ -0,0 +1,240 @@ +import { getDocument } from 'pdfjs-dist'; +import { FileAnalysis, ProcessingStrategy } from '../types/processing'; + +export class FileAnalyzer { + private static readonly SIZE_THRESHOLDS = { + SMALL: 10 * 1024 * 1024, // 10MB + MEDIUM: 50 * 1024 * 1024, // 50MB + LARGE: 200 * 1024 * 1024, // 200MB + }; + + private static readonly PAGE_THRESHOLDS = { + FEW: 10, // < 10 pages - immediate full processing + MANY: 50, // < 50 pages - priority pages + MASSIVE: 100, // < 100 pages - progressive chunked + // >100 pages = metadata only + }; + + /** + * Analyze a file to determine optimal processing strategy + */ + static async analyzeFile(file: File): Promise { + const analysis: FileAnalysis = { + fileSize: file.size, + isEncrypted: false, + isCorrupted: false, + recommendedStrategy: 'metadata_only', + estimatedProcessingTime: 0, + }; + + try { + // Quick validation and page count estimation + const quickAnalysis = await this.quickPDFAnalysis(file); + analysis.estimatedPageCount = quickAnalysis.pageCount; + analysis.isEncrypted = quickAnalysis.isEncrypted; + analysis.isCorrupted = quickAnalysis.isCorrupted; + + // Determine strategy based on file characteristics + analysis.recommendedStrategy = this.determineStrategy(file.size, quickAnalysis.pageCount); + + // Estimate processing time + analysis.estimatedProcessingTime = this.estimateProcessingTime( + file.size, + quickAnalysis.pageCount, + analysis.recommendedStrategy + ); + + } catch (error) { + console.error('File analysis failed:', error); + analysis.isCorrupted = true; + analysis.recommendedStrategy = 'metadata_only'; + } + + return analysis; + } + + /** + * Quick PDF analysis without full processing + */ + private static async quickPDFAnalysis(file: File): Promise<{ + pageCount: number; + isEncrypted: boolean; + isCorrupted: boolean; + }> { + try { + // For small files, read the whole file + // For large files, try the whole file first (PDF.js needs the complete structure) + const arrayBuffer = await file.arrayBuffer(); + + const pdf = await getDocument({ + data: arrayBuffer, + stopAtErrors: false, // Don't stop at minor errors + verbosity: 0 // Suppress PDF.js warnings + }).promise; + + const pageCount = pdf.numPages; + const isEncrypted = pdf.isEncrypted; + + // Clean up + pdf.destroy(); + + return { + pageCount, + isEncrypted, + isCorrupted: false + }; + + } catch (error) { + // Try to determine if it's corruption vs encryption + const errorMessage = error instanceof Error ? error.message.toLowerCase() : ''; + const isEncrypted = errorMessage.includes('password') || errorMessage.includes('encrypted'); + + return { + pageCount: 0, + isEncrypted, + isCorrupted: !isEncrypted // If not encrypted, probably corrupted + }; + } + } + + /** + * Determine the best processing strategy based on file characteristics + */ + private static determineStrategy(fileSize: number, pageCount?: number): ProcessingStrategy { + // Handle corrupted or encrypted files + if (!pageCount || pageCount === 0) { + return 'metadata_only'; + } + + // Small files with few pages - process everything immediately + if (fileSize <= this.SIZE_THRESHOLDS.SMALL && pageCount <= this.PAGE_THRESHOLDS.FEW) { + return 'immediate_full'; + } + + // Medium files or many pages - priority pages first, then progressive + if (fileSize <= this.SIZE_THRESHOLDS.MEDIUM && pageCount <= this.PAGE_THRESHOLDS.MANY) { + return 'priority_pages'; + } + + // Large files or massive page counts - chunked processing + if (fileSize <= this.SIZE_THRESHOLDS.LARGE && pageCount <= this.PAGE_THRESHOLDS.MASSIVE) { + return 'progressive_chunked'; + } + + // Very large files - metadata only + return 'metadata_only'; + } + + /** + * Estimate processing time based on file characteristics and strategy + */ + private static estimateProcessingTime( + fileSize: number, + pageCount: number = 0, + strategy: ProcessingStrategy + ): number { + const baseTimes = { + immediate_full: 200, // 200ms per page + priority_pages: 150, // 150ms per page (optimized) + progressive_chunked: 100, // 100ms per page (chunked) + metadata_only: 50 // 50ms total + }; + + const baseTime = baseTimes[strategy]; + + switch (strategy) { + case 'metadata_only': + return baseTime; + + case 'immediate_full': + return pageCount * baseTime; + + case 'priority_pages': + // Estimate time for priority pages (first 10) + const priorityPages = Math.min(pageCount, 10); + return priorityPages * baseTime; + + case 'progressive_chunked': + // Estimate time for first chunk (20 pages) + const firstChunk = Math.min(pageCount, 20); + return firstChunk * baseTime; + + default: + return pageCount * baseTime; + } + } + + /** + * Get processing recommendations for a set of files + */ + static async analyzeMultipleFiles(files: File[]): Promise<{ + analyses: Map; + recommendations: { + totalEstimatedTime: number; + suggestedBatchSize: number; + shouldUseWebWorker: boolean; + memoryWarning: boolean; + }; + }> { + const analyses = new Map(); + let totalEstimatedTime = 0; + let totalSize = 0; + let totalPages = 0; + + // Analyze each file + for (const file of files) { + const analysis = await this.analyzeFile(file); + analyses.set(file, analysis); + totalEstimatedTime += analysis.estimatedProcessingTime; + totalSize += file.size; + totalPages += analysis.estimatedPageCount || 0; + } + + // Generate recommendations + const recommendations = { + totalEstimatedTime, + suggestedBatchSize: this.calculateBatchSize(files.length, totalSize), + shouldUseWebWorker: totalPages > 100 || totalSize > this.SIZE_THRESHOLDS.MEDIUM, + memoryWarning: totalSize > this.SIZE_THRESHOLDS.LARGE || totalPages > this.PAGE_THRESHOLDS.MASSIVE + }; + + return { analyses, recommendations }; + } + + /** + * Calculate optimal batch size for processing multiple files + */ + private static calculateBatchSize(fileCount: number, totalSize: number): number { + // Process small batches for large total sizes + if (totalSize > this.SIZE_THRESHOLDS.LARGE) { + return Math.max(1, Math.floor(fileCount / 4)); + } + + if (totalSize > this.SIZE_THRESHOLDS.MEDIUM) { + return Math.max(2, Math.floor(fileCount / 2)); + } + + // Process all at once for smaller total sizes + return fileCount; + } + + /** + * Check if a file appears to be a valid PDF + */ + static async isValidPDF(file: File): Promise { + if (file.type !== 'application/pdf' && !file.name.toLowerCase().endsWith('.pdf')) { + return false; + } + + try { + // Read first few bytes to check PDF header + const header = file.slice(0, 8); + const headerBytes = new Uint8Array(await header.arrayBuffer()); + const headerString = String.fromCharCode(...headerBytes); + + return headerString.startsWith('%PDF-'); + } catch (error) { + return false; + } + } +} \ No newline at end of file diff --git a/frontend/src/services/pdfProcessingService.ts b/frontend/src/services/pdfProcessingService.ts new file mode 100644 index 000000000..5bb6f2ce3 --- /dev/null +++ b/frontend/src/services/pdfProcessingService.ts @@ -0,0 +1,188 @@ +import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist'; +import { ProcessedFile, ProcessingState, PDFPage } from '../types/processing'; +import { ProcessingCache } from './processingCache'; + +// Set up PDF.js worker +GlobalWorkerOptions.workerSrc = '/pdf.worker.js'; + +export class PDFProcessingService { + private static instance: PDFProcessingService; + private cache = new ProcessingCache(); + private processing = new Map(); + private processingListeners = new Set<(states: Map) => void>(); + + private constructor() {} + + static getInstance(): PDFProcessingService { + if (!PDFProcessingService.instance) { + PDFProcessingService.instance = new PDFProcessingService(); + } + return PDFProcessingService.instance; + } + + async getProcessedFile(file: File): Promise { + const fileKey = this.generateFileKey(file); + + // Check cache first + const cached = this.cache.get(fileKey); + if (cached) { + console.log('Cache hit for:', file.name); + return cached; + } + + // Check if already processing + if (this.processing.has(fileKey)) { + console.log('Already processing:', file.name); + return null; // Will be available when processing completes + } + + // Start processing + this.startProcessing(file, fileKey); + return null; + } + + private async startProcessing(file: File, fileKey: string): Promise { + // Set initial state + const state: ProcessingState = { + fileKey, + fileName: file.name, + status: 'processing', + progress: 0, + startedAt: Date.now() + }; + + this.processing.set(fileKey, state); + this.notifyListeners(); + + try { + // Process the file with progress updates + const processedFile = await this.processFileWithProgress(file, (progress) => { + state.progress = progress; + this.notifyListeners(); + }); + + // Cache the result + this.cache.set(fileKey, processedFile); + + // Update state to completed + state.status = 'completed'; + state.progress = 100; + state.completedAt = Date.now(); + this.notifyListeners(); + + // Remove from processing map after brief delay + setTimeout(() => { + this.processing.delete(fileKey); + this.notifyListeners(); + }, 2000); + + } catch (error) { + console.error('Processing failed for', file.name, ':', error); + state.status = 'error'; + state.error = error instanceof Error ? error.message : 'Unknown error'; + this.notifyListeners(); + + // Remove failed processing after delay + setTimeout(() => { + this.processing.delete(fileKey); + this.notifyListeners(); + }, 5000); + } + } + + private async processFileWithProgress( + file: File, + onProgress: (progress: number) => void + ): Promise { + const arrayBuffer = await file.arrayBuffer(); + const pdf = await getDocument({ data: arrayBuffer }).promise; + const totalPages = pdf.numPages; + + onProgress(10); // PDF loaded + + const pages: PDFPage[] = []; + + for (let i = 1; i <= totalPages; i++) { + const page = await pdf.getPage(i); + const viewport = page.getViewport({ scale: 0.5 }); + const canvas = document.createElement('canvas'); + canvas.width = viewport.width; + canvas.height = viewport.height; + + const context = canvas.getContext('2d'); + if (context) { + await page.render({ canvasContext: context, viewport }).promise; + const thumbnail = canvas.toDataURL(); + + pages.push({ + id: `${file.name}-page-${i}`, + pageNumber: i, + thumbnail, + rotation: 0, + selected: false + }); + } + + // Update progress + const progress = 10 + (i / totalPages) * 85; // 10-95% + onProgress(progress); + } + + pdf.destroy(); + onProgress(100); + + return { + id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, + pages, + totalPages, + metadata: { + title: file.name, + createdAt: new Date().toISOString(), + modifiedAt: new Date().toISOString() + } + }; + } + + // State subscription for components + onProcessingChange(callback: (states: Map) => void): () => void { + this.processingListeners.add(callback); + return () => this.processingListeners.delete(callback); + } + + getProcessingStates(): Map { + return new Map(this.processing); + } + + private notifyListeners(): void { + this.processingListeners.forEach(callback => callback(this.processing)); + } + + generateFileKey(file: File): string { + return `${file.name}-${file.size}-${file.lastModified}`; + } + + // Cleanup method for activeFiles changes + cleanup(removedFiles: File[]): void { + removedFiles.forEach(file => { + const key = this.generateFileKey(file); + this.cache.delete(key); + this.processing.delete(key); + }); + this.notifyListeners(); + } + + // Get cache stats (for debugging) + getCacheStats() { + return this.cache.getStats(); + } + + // Clear all cache and processing + clearAll(): void { + this.cache.clear(); + this.processing.clear(); + this.notifyListeners(); + } +} + +// Export singleton instance +export const pdfProcessingService = PDFProcessingService.getInstance(); \ No newline at end of file diff --git a/frontend/src/services/processingCache.ts b/frontend/src/services/processingCache.ts new file mode 100644 index 000000000..820cfcbef --- /dev/null +++ b/frontend/src/services/processingCache.ts @@ -0,0 +1,138 @@ +import { ProcessedFile, CacheConfig, CacheEntry, CacheStats } from '../types/processing'; + +export class ProcessingCache { + private cache = new Map(); + private totalSize = 0; + + constructor(private config: CacheConfig = { + maxFiles: 20, + maxSizeBytes: 2 * 1024 * 1024 * 1024, // 2GB + ttlMs: 30 * 60 * 1000 // 30 minutes + }) {} + + set(key: string, data: ProcessedFile): void { + // Remove expired entries first + this.cleanup(); + + // Calculate entry size (rough estimate) + const size = this.calculateSize(data); + + // Make room if needed + this.makeRoom(size); + + this.cache.set(key, { + data, + size, + lastAccessed: Date.now(), + createdAt: Date.now() + }); + + this.totalSize += size; + } + + get(key: string): ProcessedFile | null { + const entry = this.cache.get(key); + if (!entry) return null; + + // Check TTL + if (Date.now() - entry.createdAt > this.config.ttlMs) { + this.delete(key); + return null; + } + + // Update last accessed + entry.lastAccessed = Date.now(); + return entry.data; + } + + has(key: string): boolean { + const entry = this.cache.get(key); + if (!entry) return false; + + // Check TTL + if (Date.now() - entry.createdAt > this.config.ttlMs) { + this.delete(key); + return false; + } + + return true; + } + + private makeRoom(neededSize: number): void { + // Remove oldest entries until we have space + while ( + this.cache.size >= this.config.maxFiles || + this.totalSize + neededSize > this.config.maxSizeBytes + ) { + const oldestKey = this.findOldestEntry(); + if (oldestKey) { + this.delete(oldestKey); + } else break; + } + } + + private findOldestEntry(): string | null { + let oldest: { key: string; lastAccessed: number } | null = null; + + for (const [key, entry] of this.cache) { + if (!oldest || entry.lastAccessed < oldest.lastAccessed) { + oldest = { key, lastAccessed: entry.lastAccessed }; + } + } + + return oldest?.key || null; + } + + private cleanup(): void { + const now = Date.now(); + for (const [key, entry] of this.cache) { + if (now - entry.createdAt > this.config.ttlMs) { + this.delete(key); + } + } + } + + private calculateSize(data: ProcessedFile): number { + // Rough size estimation + let size = 0; + + // Estimate size of thumbnails (main memory consumer) + data.pages.forEach(page => { + if (page.thumbnail) { + // Base64 thumbnail is roughly 50KB each + size += 50 * 1024; + } + }); + + // Add some overhead for other data + size += 10 * 1024; // 10KB overhead + + return size; + } + + delete(key: string): void { + const entry = this.cache.get(key); + if (entry) { + this.totalSize -= entry.size; + this.cache.delete(key); + } + } + + clear(): void { + this.cache.clear(); + this.totalSize = 0; + } + + getStats(): CacheStats { + return { + entries: this.cache.size, + totalSizeBytes: this.totalSize, + maxSizeBytes: this.config.maxSizeBytes + }; + } + + // Get all cached keys (for debugging and cleanup) + getKeys(): string[] { + return Array.from(this.cache.keys()); + } +} \ No newline at end of file diff --git a/frontend/src/services/processingErrorHandler.ts b/frontend/src/services/processingErrorHandler.ts new file mode 100644 index 000000000..f6871008d --- /dev/null +++ b/frontend/src/services/processingErrorHandler.ts @@ -0,0 +1,282 @@ +import { ProcessingError } from '../types/processing'; + +export class ProcessingErrorHandler { + private static readonly DEFAULT_MAX_RETRIES = 3; + private static readonly RETRY_DELAYS = [1000, 2000, 4000]; // Progressive backoff in ms + + /** + * Create a ProcessingError from an unknown error + */ + static createProcessingError( + error: unknown, + retryCount: number = 0, + maxRetries: number = this.DEFAULT_MAX_RETRIES + ): ProcessingError { + const originalError = error instanceof Error ? error : new Error(String(error)); + const message = originalError.message; + + // Determine error type based on error message and properties + const errorType = this.determineErrorType(originalError, message); + + // Determine if error is recoverable + const recoverable = this.isRecoverable(errorType, retryCount, maxRetries); + + return { + type: errorType, + message: this.formatErrorMessage(errorType, message), + recoverable, + retryCount, + maxRetries, + originalError + }; + } + + /** + * Determine the type of error based on error characteristics + */ + private static determineErrorType(error: Error, message: string): ProcessingError['type'] { + const lowerMessage = message.toLowerCase(); + + // Network-related errors + if (lowerMessage.includes('network') || + lowerMessage.includes('fetch') || + lowerMessage.includes('connection')) { + return 'network'; + } + + // Memory-related errors + if (lowerMessage.includes('memory') || + lowerMessage.includes('quota') || + lowerMessage.includes('allocation') || + error.name === 'QuotaExceededError') { + return 'memory'; + } + + // Timeout errors + if (lowerMessage.includes('timeout') || + lowerMessage.includes('aborted') || + error.name === 'AbortError') { + return 'timeout'; + } + + // Cancellation + if (lowerMessage.includes('cancel') || + lowerMessage.includes('abort') || + error.name === 'AbortError') { + return 'cancelled'; + } + + // PDF corruption/parsing errors + if (lowerMessage.includes('pdf') || + lowerMessage.includes('parse') || + lowerMessage.includes('invalid') || + lowerMessage.includes('corrupt') || + lowerMessage.includes('malformed')) { + return 'corruption'; + } + + // Default to parsing error + return 'parsing'; + } + + /** + * Determine if an error is recoverable based on type and retry count + */ + private static isRecoverable( + errorType: ProcessingError['type'], + retryCount: number, + maxRetries: number + ): boolean { + // Never recoverable + if (errorType === 'cancelled' || errorType === 'corruption') { + return false; + } + + // Recoverable if we haven't exceeded retry count + if (retryCount >= maxRetries) { + return false; + } + + // Memory errors are usually not recoverable + if (errorType === 'memory') { + return retryCount < 1; // Only one retry for memory errors + } + + // Network and timeout errors are usually recoverable + return errorType === 'network' || errorType === 'timeout' || errorType === 'parsing'; + } + + /** + * Format error message for user display + */ + private static formatErrorMessage(errorType: ProcessingError['type'], originalMessage: string): string { + switch (errorType) { + case 'network': + return 'Network connection failed. Please check your internet connection and try again.'; + + case 'memory': + return 'Insufficient memory to process this file. Try closing other applications or processing a smaller file.'; + + case 'timeout': + return 'Processing timed out. This file may be too large or complex to process.'; + + case 'cancelled': + return 'Processing was cancelled by user.'; + + case 'corruption': + return 'This PDF file appears to be corrupted or encrypted. Please try a different file.'; + + case 'parsing': + return `Failed to process PDF: ${originalMessage}`; + + default: + return `Processing failed: ${originalMessage}`; + } + } + + /** + * Execute an operation with automatic retry logic + */ + static async executeWithRetry( + operation: () => Promise, + onError?: (error: ProcessingError) => void, + maxRetries: number = this.DEFAULT_MAX_RETRIES + ): Promise { + let lastError: ProcessingError | null = null; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + return await operation(); + } catch (error) { + lastError = this.createProcessingError(error, attempt, maxRetries); + + // Notify error handler + if (onError) { + onError(lastError); + } + + // Don't retry if not recoverable + if (!lastError.recoverable) { + break; + } + + // Don't retry on last attempt + if (attempt === maxRetries) { + break; + } + + // Wait before retry with progressive backoff + const delay = this.RETRY_DELAYS[Math.min(attempt, this.RETRY_DELAYS.length - 1)]; + await this.delay(delay); + + console.log(`Retrying operation (attempt ${attempt + 2}/${maxRetries + 1}) after ${delay}ms delay`); + } + } + + // All retries exhausted + throw lastError || new Error('Operation failed after all retries'); + } + + /** + * Create a timeout wrapper for operations + */ + static withTimeout( + operation: () => Promise, + timeoutMs: number, + timeoutMessage: string = 'Operation timed out' + ): Promise { + return new Promise((resolve, reject) => { + const timeoutId = setTimeout(() => { + reject(new Error(timeoutMessage)); + }, timeoutMs); + + operation() + .then(result => { + clearTimeout(timeoutId); + resolve(result); + }) + .catch(error => { + clearTimeout(timeoutId); + reject(error); + }); + }); + } + + /** + * Create an AbortController that times out after specified duration + */ + static createTimeoutController(timeoutMs: number): AbortController { + const controller = new AbortController(); + + setTimeout(() => { + controller.abort(); + }, timeoutMs); + + return controller; + } + + /** + * Check if an error indicates the operation should be retried + */ + static shouldRetry(error: ProcessingError): boolean { + return error.recoverable && error.retryCount < error.maxRetries; + } + + /** + * Get user-friendly suggestions based on error type + */ + static getErrorSuggestions(error: ProcessingError): string[] { + switch (error.type) { + case 'network': + return [ + 'Check your internet connection', + 'Try refreshing the page', + 'Try again in a few moments' + ]; + + case 'memory': + return [ + 'Close other browser tabs or applications', + 'Try processing a smaller file', + 'Restart your browser', + 'Use a device with more memory' + ]; + + case 'timeout': + return [ + 'Try processing a smaller file', + 'Break large files into smaller sections', + 'Check your internet connection speed' + ]; + + case 'corruption': + return [ + 'Verify the PDF file opens in other applications', + 'Try re-downloading the file', + 'Try a different PDF file', + 'Contact the file creator if it appears corrupted' + ]; + + case 'parsing': + return [ + 'Verify this is a valid PDF file', + 'Try a different PDF file', + 'Contact support if the problem persists' + ]; + + default: + return [ + 'Try refreshing the page', + 'Try again in a few moments', + 'Contact support if the problem persists' + ]; + } + } + + /** + * Utility function for delays + */ + private static delay(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); + } +} \ No newline at end of file diff --git a/frontend/src/types/pageEditor.ts b/frontend/src/types/pageEditor.ts index c4fc19bdd..ba26d1d75 100644 --- a/frontend/src/types/pageEditor.ts +++ b/frontend/src/types/pageEditor.ts @@ -1,7 +1,7 @@ export interface PDFPage { id: string; pageNumber: number; - thumbnail: string; + thumbnail: string | null; rotation: number; selected: boolean; splitBefore?: boolean; @@ -24,4 +24,4 @@ export interface PageOperation { export interface UndoRedoState { operations: PageOperation[]; currentIndex: number; -} \ No newline at end of file +} diff --git a/frontend/src/types/processing.ts b/frontend/src/types/processing.ts new file mode 100644 index 000000000..65b996d7f --- /dev/null +++ b/frontend/src/types/processing.ts @@ -0,0 +1,91 @@ +export interface ProcessingError { + type: 'network' | 'parsing' | 'memory' | 'corruption' | 'timeout' | 'cancelled'; + message: string; + recoverable: boolean; + retryCount: number; + maxRetries: number; + originalError?: Error; +} + +export interface ProcessingState { + fileKey: string; + fileName: string; + status: 'pending' | 'processing' | 'completed' | 'error' | 'cancelled'; + progress: number; // 0-100 + strategy: ProcessingStrategy; + error?: ProcessingError; + startedAt: number; + completedAt?: number; + estimatedTimeRemaining?: number; + currentPage?: number; + cancellationToken?: AbortController; +} + +export interface ProcessedFile { + id: string; + pages: PDFPage[]; + totalPages: number; + metadata: { + title: string; + createdAt: string; + modifiedAt: string; + }; +} + +export interface PDFPage { + id: string; + pageNumber: number; + thumbnail: string | null; + rotation: number; + selected: boolean; + splitBefore?: boolean; +} + +export interface CacheConfig { + maxFiles: number; + maxSizeBytes: number; + ttlMs: number; +} + +export interface CacheEntry { + data: ProcessedFile; + size: number; + lastAccessed: number; + createdAt: number; +} + +export interface CacheStats { + entries: number; + totalSizeBytes: number; + maxSizeBytes: number; +} + +export type ProcessingStrategy = 'immediate_full' | 'progressive_chunked' | 'metadata_only' | 'priority_pages'; + +export interface ProcessingConfig { + strategy: ProcessingStrategy; + chunkSize: number; // Pages per chunk + thumbnailQuality: 'low' | 'medium' | 'high'; + priorityPageCount: number; // Number of priority pages to process first + useWebWorker: boolean; + maxRetries: number; + timeoutMs: number; +} + +export interface FileAnalysis { + fileSize: number; + estimatedPageCount?: number; + isEncrypted: boolean; + isCorrupted: boolean; + recommendedStrategy: ProcessingStrategy; + estimatedProcessingTime: number; // milliseconds +} + +export interface ProcessingMetrics { + totalFiles: number; + completedFiles: number; + failedFiles: number; + averageProcessingTime: number; + cacheHitRate: number; + memoryUsage: number; +} \ No newline at end of file diff --git a/frontend/src/utils/fileHash.ts b/frontend/src/utils/fileHash.ts new file mode 100644 index 000000000..3ff911a56 --- /dev/null +++ b/frontend/src/utils/fileHash.ts @@ -0,0 +1,127 @@ +/** + * File hashing utilities for cache key generation + */ + +export class FileHasher { + private static readonly CHUNK_SIZE = 64 * 1024; // 64KB chunks for hashing + + /** + * Generate a content-based hash for a file + * Uses first + last + middle chunks to create a reasonably unique hash + * without reading the entire file (which would be expensive for large files) + */ + static async generateContentHash(file: File): Promise { + const chunks = await this.getFileChunks(file); + const combined = await this.combineChunks(chunks); + return await this.hashArrayBuffer(combined); + } + + /** + * Generate a fast hash based on file metadata + * Faster but less collision-resistant than content hash + */ + static generateMetadataHash(file: File): string { + const data = `${file.name}-${file.size}-${file.lastModified}-${file.type}`; + return this.simpleHash(data); + } + + /** + * Generate a hybrid hash that balances speed and uniqueness + * Uses metadata + small content sample + */ + static async generateHybridHash(file: File): Promise { + const metadataHash = this.generateMetadataHash(file); + + // For small files, use full content hash + if (file.size <= 1024 * 1024) { // 1MB + const contentHash = await this.generateContentHash(file); + return `${metadataHash}-${contentHash}`; + } + + // For large files, use first chunk only + const firstChunk = file.slice(0, this.CHUNK_SIZE); + const firstChunkBuffer = await firstChunk.arrayBuffer(); + const firstChunkHash = await this.hashArrayBuffer(firstChunkBuffer); + + return `${metadataHash}-${firstChunkHash}`; + } + + private static async getFileChunks(file: File): Promise { + const chunks: ArrayBuffer[] = []; + + // First chunk + if (file.size > 0) { + const firstChunk = file.slice(0, Math.min(this.CHUNK_SIZE, file.size)); + chunks.push(await firstChunk.arrayBuffer()); + } + + // Middle chunk (if file is large enough) + if (file.size > this.CHUNK_SIZE * 2) { + const middleStart = Math.floor(file.size / 2) - Math.floor(this.CHUNK_SIZE / 2); + const middleEnd = middleStart + this.CHUNK_SIZE; + const middleChunk = file.slice(middleStart, middleEnd); + chunks.push(await middleChunk.arrayBuffer()); + } + + // Last chunk (if file is large enough and different from first) + if (file.size > this.CHUNK_SIZE) { + const lastStart = Math.max(file.size - this.CHUNK_SIZE, this.CHUNK_SIZE); + const lastChunk = file.slice(lastStart); + chunks.push(await lastChunk.arrayBuffer()); + } + + return chunks; + } + + private static async combineChunks(chunks: ArrayBuffer[]): Promise { + const totalLength = chunks.reduce((sum, chunk) => sum + chunk.byteLength, 0); + const combined = new Uint8Array(totalLength); + + let offset = 0; + for (const chunk of chunks) { + combined.set(new Uint8Array(chunk), offset); + offset += chunk.byteLength; + } + + return combined.buffer; + } + + private static async hashArrayBuffer(buffer: ArrayBuffer): Promise { + // Use Web Crypto API for proper hashing + if (crypto.subtle) { + const hashBuffer = await crypto.subtle.digest('SHA-256', buffer); + const hashArray = Array.from(new Uint8Array(hashBuffer)); + return hashArray.map(b => b.toString(16).padStart(2, '0')).join(''); + } + + // Fallback for environments without crypto.subtle + return this.simpleHash(Array.from(new Uint8Array(buffer)).join('')); + } + + private static simpleHash(str: string): string { + let hash = 0; + if (str.length === 0) return hash.toString(); + + for (let i = 0; i < str.length; i++) { + const char = str.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32-bit integer + } + + return Math.abs(hash).toString(16); + } + + /** + * Validate that a file matches its expected hash + * Useful for detecting file corruption or changes + */ + static async validateFileHash(file: File, expectedHash: string): Promise { + try { + const actualHash = await this.generateHybridHash(file); + return actualHash === expectedHash; + } catch (error) { + console.error('Hash validation failed:', error); + return false; + } + } +} \ No newline at end of file