File management overhaul

This commit is contained in:
Reece 2025-06-27 14:06:16 +01:00
parent 09758ea2b8
commit 42abe83385
14 changed files with 2439 additions and 202 deletions

View File

@ -6,7 +6,6 @@ import StorageIcon from "@mui/icons-material/Storage";
import VisibilityIcon from "@mui/icons-material/Visibility";
import EditIcon from "@mui/icons-material/Edit";
import { FileWithUrl } from "../../types/file";
import { getFileSize, getFileDate } from "../../utils/fileUtils";
import { useIndexedDBThumbnail } from "../../hooks/useIndexedDBThumbnail";

View File

@ -1,15 +1,13 @@
import React, { useState, useCallback, useRef, useEffect } from "react";
import {
Button, Text, Center, Checkbox, Box, Tooltip, ActionIcon,
Notification, TextInput, FileInput, LoadingOverlay, Modal, Alert, Container,
Stack, Group, Paper, SimpleGrid
Notification, TextInput, LoadingOverlay, Modal, Alert,
Stack, Group
} from "@mantine/core";
import { useTranslation } from "react-i18next";
import UploadFileIcon from "@mui/icons-material/UploadFile";
import { usePDFProcessor } from "../../hooks/usePDFProcessor";
import { useEnhancedProcessedFiles } from "../../hooks/useEnhancedProcessedFiles";
import { PDFDocument, PDFPage } from "../../types/pageEditor";
import { fileStorage } from "../../services/fileStorage";
import { generateThumbnailForFile } from "../../utils/thumbnailUtils";
import { ProcessedFile as EnhancedProcessedFile } from "../../types/processing";
import { useUndoRedo } from "../../hooks/useUndoRedo";
import {
RotatePagesCommand,
@ -19,19 +17,16 @@ import {
ToggleSplitCommand
} from "../../commands/pageCommands";
import { pdfExportService } from "../../services/pdfExportService";
import styles from './pageEditor.module.css';
import './pageEditor.module.css';
import PageThumbnail from './PageThumbnail';
import BulkSelectionPanel from './BulkSelectionPanel';
import DragDropGrid from './DragDropGrid';
import FilePickerModal from '../shared/FilePickerModal';
import FileUploadSelector from '../shared/FileUploadSelector';
export interface PageEditorProps {
activeFiles: File[];
setActiveFiles: (files: File[]) => void;
downloadUrl?: string | null;
setDownloadUrl?: (url: string | null) => void;
sharedFiles?: any[]; // For FileUploadSelector when no files loaded
// Optional callbacks to expose internal functions for PageEditorControls
onFunctionsReady?: (functions: {
@ -55,24 +50,31 @@ export interface PageEditorProps {
const PageEditor = ({
activeFiles,
setActiveFiles,
downloadUrl,
setDownloadUrl,
sharedFiles = [],
onFunctionsReady,
}: PageEditorProps) => {
const { t } = useTranslation();
const { processPDFFile, loading: pdfLoading } = usePDFProcessor();
// Enhanced processing with intelligent strategies
const {
processedFiles: enhancedProcessedFiles,
processingStates,
isProcessing: globalProcessing,
hasProcessingErrors,
processingProgress,
actions: processingActions
} = useEnhancedProcessedFiles(activeFiles, {
strategy: 'priority_pages', // Process first pages immediately
thumbnailQuality: 'low', // Low quality for page editor navigation
priorityPageCount: 10
});
// Single merged document state
const [mergedPdfDocument, setMergedPdfDocument] = useState<PDFDocument | null>(null);
const [processedFiles, setProcessedFiles] = useState<Map<string, PDFDocument>>(new Map());
const [filename, setFilename] = useState<string>("");
// Page editor state
const [selectedPages, setSelectedPages] = useState<string[]>([]);
const [status, setStatus] = useState<string | null>(null);
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [csvInput, setCsvInput] = useState<string>("");
const [selectionMode, setSelectionMode] = useState(false);
@ -97,87 +99,19 @@ const PageEditor = ({
// Undo/Redo system
const { executeCommand, undo, redo, canUndo, canRedo } = useUndoRedo();
// Process uploaded file
const handleFileUpload = useCallback(async (uploadedFile: File | any) => {
if (!uploadedFile) {
setError('No file provided');
return;
}
let fileToProcess: File;
// Handle FileWithUrl objects from storage
if (uploadedFile.storedInIndexedDB && uploadedFile.arrayBuffer) {
try {
console.log('Converting FileWithUrl to File:', uploadedFile.name);
const arrayBuffer = await uploadedFile.arrayBuffer();
const blob = new Blob([arrayBuffer], { type: uploadedFile.type || 'application/pdf' });
fileToProcess = new File([blob], uploadedFile.name, {
type: uploadedFile.type || 'application/pdf',
lastModified: uploadedFile.lastModified || Date.now()
});
} catch (error) {
console.error('Error converting FileWithUrl:', error);
setError('Unable to load file from storage');
return;
}
} else if (uploadedFile instanceof File) {
fileToProcess = uploadedFile;
} else {
setError('Invalid file object');
console.error('handleFileUpload received unsupported object:', uploadedFile);
return;
}
if (fileToProcess.type !== 'application/pdf') {
setError('Please upload a valid PDF file');
return;
}
const fileKey = `${fileToProcess.name}-${fileToProcess.size}`;
// Skip processing if already processed
if (processedFiles.has(fileKey)) return;
setLoading(true);
setError(null);
try {
const document = await processPDFFile(fileToProcess);
// Store processed document
setProcessedFiles(prev => new Map(prev).set(fileKey, document));
setFilename(fileToProcess.name.replace(/\.pdf$/i, ''));
setSelectedPages([]);
if (document.pages.length > 0) {
// Only store if it's a new file (not from storage)
if (!uploadedFile.storedInIndexedDB) {
const thumbnail = await generateThumbnailForFile(fileToProcess);
await fileStorage.storeFile(fileToProcess, thumbnail);
}
}
setStatus(`PDF loaded successfully with ${document.totalPages} pages`);
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Failed to process PDF';
setError(errorMessage);
console.error('PDF processing error:', err);
} finally {
setLoading(false);
}
}, [processPDFFile, activeFiles, setActiveFiles, processedFiles]);
// Process multiple uploaded files - just add them to activeFiles like FileManager does
const handleMultipleFileUpload = useCallback((uploadedFiles: File[]) => {
if (!uploadedFiles || uploadedFiles.length === 0) {
setError('No files provided');
return;
}
// Simply set the activeFiles to the selected files (same as FileManager approach)
setActiveFiles(uploadedFiles);
// Convert enhanced processed files to Page Editor format
const convertToPageEditorFormat = useCallback((enhancedFile: EnhancedProcessedFile, fileName: string): PDFDocument => {
return {
id: enhancedFile.id,
name: fileName,
file: null as any, // We don't need the file reference in the converted format
pages: enhancedFile.pages.map(page => ({
...page,
// Ensure compatibility with existing page editor types
splitBefore: page.splitBefore || false
})),
totalPages: enhancedFile.totalPages
};
}, []);
// Merge multiple PDF documents into one
@ -188,10 +122,10 @@ const PageEditor = ({
}
if (activeFiles.length === 1) {
// Single file - use it directly
const fileKey = `${activeFiles[0].name}-${activeFiles[0].size}`;
const pdfDoc = processedFiles.get(fileKey);
if (pdfDoc) {
// Single file - use enhanced processed file
const enhancedFile = enhancedProcessedFiles.get(activeFiles[0]);
if (enhancedFile) {
const pdfDoc = convertToPageEditorFormat(enhancedFile, activeFiles[0].name);
setMergedPdfDocument(pdfDoc);
setFilename(activeFiles[0].name.replace(/\.pdf$/i, ''));
}
@ -202,71 +136,230 @@ const PageEditor = ({
const filenames: string[] = [];
activeFiles.forEach((file, fileIndex) => {
const fileKey = `${file.name}-${file.size}`;
const pdfDoc = processedFiles.get(fileKey);
if (pdfDoc) {
const enhancedFile = enhancedProcessedFiles.get(file);
if (enhancedFile) {
filenames.push(file.name.replace(/\.pdf$/i, ''));
pdfDoc.pages.forEach((page, pageIndex) => {
enhancedFile.pages.forEach((page, pageIndex) => {
// Create new page with updated IDs and page numbers for merged document
const newPage: PDFPage = {
...page,
id: `${fileIndex}-${page.id}`, // Unique ID across all files
pageNumber: totalPages + pageIndex + 1,
sourceFile: file.name // Track which file this page came from
splitBefore: page.splitBefore || false
};
allPages.push(newPage);
});
totalPages += pdfDoc.pages.length;
totalPages += enhancedFile.pages.length;
}
});
const mergedDocument: PDFDocument = {
pages: allPages,
totalPages: totalPages,
title: filenames.join(' + '),
metadata: {
title: filenames.join(' + '),
createdAt: new Date().toISOString(),
modifiedAt: new Date().toISOString(),
}
};
if (allPages.length > 0) {
const mergedDocument: PDFDocument = {
id: `merged-${Date.now()}`,
name: filenames.join(' + '),
file: null as any,
pages: allPages,
totalPages: totalPages
};
setMergedPdfDocument(mergedDocument);
setFilename(filenames.join('_'));
}
}, [activeFiles, processedFiles]);
// Auto-process files from activeFiles
useEffect(() => {
console.log('Auto-processing effect triggered:', {
activeFilesCount: activeFiles.length,
processedFilesCount: processedFiles.size,
activeFileNames: activeFiles.map(f => f.name)
});
activeFiles.forEach(file => {
const fileKey = `${file.name}-${file.size}`;
console.log(`Checking file ${file.name}: processed =`, processedFiles.has(fileKey));
if (!processedFiles.has(fileKey)) {
console.log('Processing file:', file.name);
handleFileUpload(file);
setMergedPdfDocument(mergedDocument);
setFilename(filenames.join('_'));
}
});
}, [activeFiles, processedFiles, handleFileUpload]);
}
}, [activeFiles, enhancedProcessedFiles, convertToPageEditorFormat]);
// Merge multiple PDF documents into one when all files are processed
// Handle file upload from FileUploadSelector
const handleMultipleFileUpload = useCallback((uploadedFiles: File[]) => {
if (!uploadedFiles || uploadedFiles.length === 0) {
setStatus('No files provided');
return;
}
// Simply set the activeFiles to the selected files (same as existing approach)
setActiveFiles(uploadedFiles);
setStatus(`Added ${uploadedFiles.length} file(s) for processing`);
}, [setActiveFiles]);
// Auto-merge documents when enhanced processing completes
useEffect(() => {
if (activeFiles.length > 0) {
const allProcessed = activeFiles.every(file => {
const fileKey = `${file.name}-${file.size}`;
return processedFiles.has(fileKey);
});
const allProcessed = activeFiles.every(file => enhancedProcessedFiles.has(file));
if (allProcessed && activeFiles.length > 0) {
if (allProcessed) {
mergeAllPDFs();
}
} else {
setMergedPdfDocument(null);
}
}, [activeFiles, processedFiles, mergeAllPDFs]);
}, [activeFiles, enhancedProcessedFiles, mergeAllPDFs]);
// Shared PDF instance for thumbnail generation
const [sharedPdfInstance, setSharedPdfInstance] = useState<any>(null);
const [thumbnailGenerationStarted, setThumbnailGenerationStarted] = useState(false);
// Session-based thumbnail cache with 1GB limit
const [thumbnailCache, setThumbnailCache] = useState<Map<string, { thumbnail: string; lastUsed: number; sizeBytes: number }>>(new Map());
const maxCacheSizeBytes = 1024 * 1024 * 1024; // 1GB cache limit
const [currentCacheSize, setCurrentCacheSize] = useState(0);
// Cache management functions
const addThumbnailToCache = useCallback((pageId: string, thumbnail: string) => {
const thumbnailSizeBytes = thumbnail.length * 0.75; // Rough base64 size estimate
setThumbnailCache(prev => {
const newCache = new Map(prev);
const now = Date.now();
// Add new thumbnail
newCache.set(pageId, {
thumbnail,
lastUsed: now,
sizeBytes: thumbnailSizeBytes
});
return newCache;
});
setCurrentCacheSize(prev => {
const newSize = prev + thumbnailSizeBytes;
// If we exceed 1GB, trigger cleanup
if (newSize > maxCacheSizeBytes) {
setTimeout(() => cleanupThumbnailCache(), 0);
}
return newSize;
});
console.log(`Cached thumbnail for ${pageId} (${Math.round(thumbnailSizeBytes / 1024)}KB)`);
}, [maxCacheSizeBytes]);
const getThumbnailFromCache = useCallback((pageId: string): string | null => {
const cached = thumbnailCache.get(pageId);
if (!cached) return null;
// Update last used timestamp
setThumbnailCache(prev => {
const newCache = new Map(prev);
const entry = newCache.get(pageId);
if (entry) {
entry.lastUsed = Date.now();
}
return newCache;
});
return cached.thumbnail;
}, [thumbnailCache]);
const cleanupThumbnailCache = useCallback(() => {
setThumbnailCache(prev => {
const entries = Array.from(prev.entries());
// Sort by last used (oldest first)
entries.sort(([, a], [, b]) => a.lastUsed - b.lastUsed);
const newCache = new Map();
let newSize = 0;
const targetSize = maxCacheSizeBytes * 0.8; // Clean to 80% of limit
// Keep most recently used entries until we hit target size
for (let i = entries.length - 1; i >= 0 && newSize < targetSize; i--) {
const [key, value] = entries[i];
newCache.set(key, value);
newSize += value.sizeBytes;
}
setCurrentCacheSize(newSize);
console.log(`Cleaned thumbnail cache: ${prev.size}${newCache.size} entries (${Math.round(newSize / 1024 / 1024)}MB)`);
return newCache;
});
}, [maxCacheSizeBytes]);
const clearThumbnailCache = useCallback(() => {
setThumbnailCache(new Map());
setCurrentCacheSize(0);
console.log('Cleared thumbnail cache');
}, []);
// Start thumbnail generation process (separate from document loading)
const startThumbnailGeneration = useCallback(async () => {
if (!mergedPdfDocument || activeFiles.length !== 1 || thumbnailGenerationStarted) return;
const file = activeFiles[0];
const totalPages = mergedPdfDocument.totalPages;
console.log(`Starting thumbnail generation for ${totalPages} pages`);
setThumbnailGenerationStarted(true);
try {
// Load PDF ONCE for thumbnail generation (separate from document structure loading)
const arrayBuffer = await file.arrayBuffer();
const { getDocument } = await import('pdfjs-dist');
const pdf = await getDocument({ data: arrayBuffer }).promise;
setSharedPdfInstance(pdf);
console.log('Shared PDF loaded, starting progressive thumbnail generation');
// Process pages in batches
let currentPage = 1;
const batchSize = totalPages > 500 ? 1 : 2; // Slower for massive files
const batchDelay = totalPages > 500 ? 300 : 200; // More delay for massive files
const processBatch = async () => {
const endPage = Math.min(currentPage + batchSize - 1, totalPages);
console.log(`Generating thumbnails for pages ${currentPage}-${endPage}`);
for (let i = currentPage; i <= endPage; i++) {
// Send the shared PDF instance and cache functions to components
window.dispatchEvent(new CustomEvent('generateThumbnail', {
detail: {
pageNumber: i,
sharedPdf: pdf,
getThumbnailFromCache,
addThumbnailToCache
}
}));
}
currentPage += batchSize;
if (currentPage <= totalPages) {
setTimeout(processBatch, batchDelay);
} else {
console.log('Progressive thumbnail generation completed');
}
};
// Start generating thumbnails immediately
processBatch();
} catch (error) {
console.error('Failed to start thumbnail generation:', error);
setThumbnailGenerationStarted(false);
}
}, [mergedPdfDocument, activeFiles, thumbnailGenerationStarted]);
// Start thumbnail generation after document loads and UI settles
useEffect(() => {
if (mergedPdfDocument && !thumbnailGenerationStarted) {
// Small delay to let document render, then start thumbnail generation
const timer = setTimeout(startThumbnailGeneration, 1000);
return () => clearTimeout(timer);
}
}, [mergedPdfDocument, startThumbnailGeneration, thumbnailGenerationStarted]);
// Cleanup shared PDF instance and cache when component unmounts or files change
useEffect(() => {
return () => {
if (sharedPdfInstance) {
sharedPdfInstance.destroy();
setSharedPdfInstance(null);
}
setThumbnailGenerationStarted(false);
clearThumbnailCache(); // Clear cache when leaving/changing documents
};
}, [activeFiles, clearThumbnailCache]);
// Clear selections when files change
useEffect(() => {
@ -275,7 +368,6 @@ const PageEditor = ({
setSelectionMode(false);
}, [activeFiles]);
// Global drag cleanup to handle drops outside valid areas
useEffect(() => {
const handleGlobalDragEnd = () => {
// Clean up drag state when drag operation ends anywhere
@ -286,7 +378,7 @@ const PageEditor = ({
};
const handleGlobalDrop = (e: DragEvent) => {
// Prevent default to avoid browser navigation on invalid drops
// Prevent default to handle invalid drops
e.preventDefault();
};
@ -702,7 +794,6 @@ const PageEditor = ({
const closePdf = useCallback(() => {
setActiveFiles([]);
setProcessedFiles(new Map());
setMergedPdfDocument(null);
setSelectedPages([]);
}, [setActiveFiles]);
@ -749,31 +840,66 @@ const PageEditor = ({
closePdf
]);
// Return early if no merged document - Homepage handles file selection
if (!mergedPdfDocument) {
return (
<Box pos="relative" h="100vh" style={{ overflow: 'auto' }}>
<LoadingOverlay visible={loading || pdfLoading} />
<Container size="lg" p="xl" h="100%" style={{ display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
<FileUploadSelector
title="Select PDFs to edit"
subtitle="Choose files from storage or upload PDFs - multiple files will be merged"
sharedFiles={sharedFiles}
onFilesSelect={handleMultipleFileUpload}
accept={["application/pdf"]}
loading={loading || pdfLoading}
/>
</Container>
</Box>
<Center h="100vh">
<LoadingOverlay visible={globalProcessing} />
{globalProcessing ? (
<Text c="dimmed">Processing PDF files...</Text>
) : (
<Text c="dimmed">Waiting for PDF files...</Text>
)}
</Center>
);
}
return (
<Box pos="relative" h="100vh" style={{ overflow: 'auto' }}>
<LoadingOverlay visible={loading || pdfLoading} />
<LoadingOverlay visible={globalProcessing && !mergedPdfDocument} />
<Box p="md" pt="xl">
{/* Enhanced Processing Status */}
{(globalProcessing || hasProcessingErrors) && (
<Box mb="md" p="sm" style={{ backgroundColor: 'var(--mantine-color-blue-0)', borderRadius: 8 }}>
{globalProcessing && (
<Group justify="space-between" mb="xs">
<Text size="sm" fw={500}>Processing files...</Text>
<Text size="sm" c="dimmed">{Math.round(processingProgress.overall)}%</Text>
</Group>
)}
{Array.from(processingStates.values()).map(state => (
<Group key={state.fileKey} justify="space-between" mb={4}>
<Text size="xs">{state.fileName}</Text>
<Group gap="xs">
<Text size="xs" c="dimmed">{state.progress}%</Text>
{state.error && (
<Button
size="xs"
variant="light"
color="red"
onClick={() => {
// Show error details or retry
console.log('Processing error:', state.error);
}}
>
Error
</Button>
)}
</Group>
</Group>
))}
{hasProcessingErrors && (
<Text size="xs" c="red" mt="xs">
Some files failed to process. Check individual file status above.
</Text>
)}
</Box>
)}
<Group mb="md">
<TextInput
value={filename}
@ -834,6 +960,7 @@ const PageEditor = ({
page={page}
index={index}
totalPages={mergedPdfDocument.pages.length}
originalFile={activeFiles.length === 1 ? activeFiles[0] : undefined}
selectedPages={selectedPages}
selectionMode={selectionMode}
draggedPage={draggedPage}
@ -930,12 +1057,6 @@ const PageEditor = ({
)}
</Modal>
<FileInput
ref={fileInputRef}
accept="application/pdf"
onChange={(file) => file && handleFileUpload(file)}
style={{ display: 'none' }}
/>
{status && (
<Notification
@ -947,18 +1068,6 @@ const PageEditor = ({
{status}
</Notification>
)}
{error && (
<Notification
color="red"
mt="md"
onClose={() => setError(null)}
style={{ position: 'fixed', bottom: 70, right: 20, zIndex: 1000 }}
>
{error}
</Notification>
)}
</Box>
);
};

View File

@ -1,5 +1,5 @@
import React, { useCallback } from 'react';
import { Text, Checkbox, Tooltip, ActionIcon } from '@mantine/core';
import React, { useCallback, useState, useEffect, useRef } from 'react';
import { Text, Checkbox, Tooltip, ActionIcon, Loader } from '@mantine/core';
import ArrowBackIcon from '@mui/icons-material/ArrowBack';
import ArrowForwardIcon from '@mui/icons-material/ArrowForward';
import RotateLeftIcon from '@mui/icons-material/RotateLeft';
@ -9,11 +9,18 @@ import ContentCutIcon from '@mui/icons-material/ContentCut';
import DragIndicatorIcon from '@mui/icons-material/DragIndicator';
import { PDFPage } from '../../../types/pageEditor';
import styles from './PageEditor.module.css';
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
// Ensure PDF.js worker is available
if (!GlobalWorkerOptions.workerSrc) {
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
}
interface PageThumbnailProps {
page: PDFPage;
index: number;
totalPages: number;
originalFile?: File; // For lazy thumbnail generation
selectedPages: string[];
selectionMode: boolean;
draggedPage: string | null;
@ -43,6 +50,7 @@ const PageThumbnail = ({
page,
index,
totalPages,
originalFile,
selectedPages,
selectionMode,
draggedPage,
@ -67,6 +75,74 @@ const PageThumbnail = ({
pdfDocument,
setPdfDocument,
}: PageThumbnailProps) => {
const [thumbnailUrl, setThumbnailUrl] = useState<string | null>(page.thumbnail);
const [isLoadingThumbnail, setIsLoadingThumbnail] = useState(false);
// Listen for progressive thumbnail generation events
useEffect(() => {
const handleThumbnailGeneration = (event: CustomEvent) => {
const { pageNumber, sharedPdf, getThumbnailFromCache, addThumbnailToCache } = event.detail;
if (pageNumber === page.pageNumber && !thumbnailUrl && !isLoadingThumbnail) {
// Check cache first
const cachedThumbnail = getThumbnailFromCache(page.id);
if (cachedThumbnail) {
console.log(`Using cached thumbnail for page ${page.pageNumber}`);
setThumbnailUrl(cachedThumbnail);
return;
}
// Generate new thumbnail and cache it
loadThumbnailFromSharedPdf(sharedPdf, addThumbnailToCache);
}
};
window.addEventListener('generateThumbnail', handleThumbnailGeneration as EventListener);
return () => window.removeEventListener('generateThumbnail', handleThumbnailGeneration as EventListener);
}, [page.pageNumber, page.id, thumbnailUrl, isLoadingThumbnail]);
const loadThumbnailFromSharedPdf = async (sharedPdf: any, addThumbnailToCache?: (pageId: string, thumbnail: string) => void) => {
if (isLoadingThumbnail || thumbnailUrl) return;
setIsLoadingThumbnail(true);
try {
const thumbnail = await generateThumbnailFromPdf(sharedPdf);
// Cache the generated thumbnail
if (addThumbnailToCache) {
addThumbnailToCache(page.id, thumbnail);
}
} catch (error) {
console.error(`Failed to load thumbnail for page ${page.pageNumber}:`, error);
} finally {
setIsLoadingThumbnail(false);
}
};
const generateThumbnailFromPdf = async (pdf: any): Promise<string> => {
const pdfPage = await pdf.getPage(page.pageNumber);
const scale = 0.2; // Low quality for page editor
const viewport = pdfPage.getViewport({ scale });
const canvas = document.createElement('canvas');
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext('2d');
if (!context) {
throw new Error('Could not get canvas context');
}
await pdfPage.render({ canvasContext: context, viewport }).promise;
const thumbnail = canvas.toDataURL('image/jpeg', 0.8);
setThumbnailUrl(thumbnail);
console.log(`Thumbnail generated for page ${page.pageNumber}`);
return thumbnail;
};
// Register this component with pageRefs for animations
const pageElementRef = useCallback((element: HTMLDivElement | null) => {
if (element) {
@ -162,18 +238,30 @@ const PageThumbnail = ({
justifyContent: 'center'
}}
>
<img
src={page.thumbnail}
alt={`Page ${page.pageNumber}`}
style={{
maxWidth: '100%',
maxHeight: '100%',
objectFit: 'contain',
borderRadius: 2,
transform: `rotate(${page.rotation}deg)`,
transition: 'transform 0.3s ease-in-out'
}}
/>
{thumbnailUrl ? (
<img
src={thumbnailUrl}
alt={`Page ${page.pageNumber}`}
style={{
maxWidth: '100%',
maxHeight: '100%',
objectFit: 'contain',
borderRadius: 2,
transform: `rotate(${page.rotation}deg)`,
transition: 'transform 0.3s ease-in-out'
}}
/>
) : isLoadingThumbnail ? (
<div style={{ textAlign: 'center' }}>
<Loader size="sm" />
<Text size="xs" c="dimmed" mt={4}>Loading...</Text>
</div>
) : (
<div style={{ textAlign: 'center' }}>
<Text size="lg" c="dimmed">📄</Text>
<Text size="xs" c="dimmed" mt={4}>Page {page.pageNumber}</Text>
</div>
)}
</div>
<Text

View File

@ -0,0 +1,288 @@
import { useState, useEffect } from 'react';
import { ProcessedFile, ProcessingState, ProcessingConfig } from '../types/processing';
import { enhancedPDFProcessingService } from '../services/enhancedPDFProcessingService';
import { FileHasher } from '../utils/fileHash';
interface UseEnhancedProcessedFilesResult {
processedFiles: Map<File, ProcessedFile>;
processingStates: Map<string, ProcessingState>;
isProcessing: boolean;
hasProcessingErrors: boolean;
processingProgress: {
overall: number;
fileProgress: Map<string, number>;
estimatedTimeRemaining: number;
};
cacheStats: {
entries: number;
totalSizeBytes: number;
maxSizeBytes: number;
};
metrics: {
totalFiles: number;
completedFiles: number;
failedFiles: number;
averageProcessingTime: number;
cacheHitRate: number;
};
actions: {
cancelProcessing: (fileKey: string) => void;
retryProcessing: (file: File) => void;
clearCache: () => void;
};
}
export function useEnhancedProcessedFiles(
activeFiles: File[],
config?: Partial<ProcessingConfig>
): UseEnhancedProcessedFilesResult {
const [processedFiles, setProcessedFiles] = useState<Map<File, ProcessedFile>>(new Map());
const [processingStates, setProcessingStates] = useState<Map<string, ProcessingState>>(new Map());
// Subscribe to processing state changes once
useEffect(() => {
const unsubscribe = enhancedPDFProcessingService.onProcessingChange(setProcessingStates);
return unsubscribe;
}, []);
// Process files when activeFiles changes
useEffect(() => {
if (activeFiles.length === 0) {
setProcessedFiles(new Map());
return;
}
const processFiles = async () => {
const newProcessedFiles = new Map<File, ProcessedFile>();
for (const file of activeFiles) {
// Check if we already have this file processed
const existing = processedFiles.get(file);
if (existing) {
newProcessedFiles.set(file, existing);
continue;
}
try {
// Generate proper file key matching the service
const fileKey = await FileHasher.generateHybridHash(file);
console.log('Processing file:', file.name);
const processed = await enhancedPDFProcessingService.processFile(file, config);
if (processed) {
console.log('Got processed file for:', file.name);
newProcessedFiles.set(file, processed);
} else {
console.log('Processing started for:', file.name, '- waiting for completion');
}
} catch (error) {
console.error(`Failed to start processing for ${file.name}:`, error);
}
}
// Update processed files if we have any
if (newProcessedFiles.size > 0) {
setProcessedFiles(newProcessedFiles);
}
};
processFiles();
}, [activeFiles]);
// Listen for processing completion
useEffect(() => {
const checkForCompletedFiles = async () => {
let hasNewFiles = false;
const updatedFiles = new Map(processedFiles);
// Generate file keys for all files first
const fileKeyPromises = activeFiles.map(async (file) => ({
file,
key: await FileHasher.generateHybridHash(file)
}));
const fileKeyPairs = await Promise.all(fileKeyPromises);
for (const { file, key } of fileKeyPairs) {
// Only check files that don't have processed results yet
if (!updatedFiles.has(file)) {
const processingState = processingStates.get(key);
// Check for both processing and recently completed files
// This ensures we catch completed files before they're cleaned up
if (processingState?.status === 'processing' || processingState?.status === 'completed') {
try {
const processed = await enhancedPDFProcessingService.processFile(file, config);
if (processed) {
console.log('Processing completed for:', file.name);
updatedFiles.set(file, processed);
hasNewFiles = true;
}
} catch (error) {
// Ignore errors in completion check
}
}
}
}
if (hasNewFiles) {
setProcessedFiles(updatedFiles);
}
};
// Check every 500ms for completed processing
const interval = setInterval(checkForCompletedFiles, 500);
return () => clearInterval(interval);
}, [activeFiles, processingStates]);
// Cleanup when activeFiles changes
useEffect(() => {
const currentFiles = new Set(activeFiles);
const previousFiles = Array.from(processedFiles.keys());
const removedFiles = previousFiles.filter(file => !currentFiles.has(file));
if (removedFiles.length > 0) {
// Clean up processing service cache
enhancedPDFProcessingService.cleanup(removedFiles);
// Update local state
setProcessedFiles(prev => {
const updated = new Map();
for (const [file, processed] of prev) {
if (currentFiles.has(file)) {
updated.set(file, processed);
}
}
return updated;
});
}
}, [activeFiles]);
// Calculate derived state
const isProcessing = processingStates.size > 0;
const hasProcessingErrors = Array.from(processingStates.values()).some(state => state.status === 'error');
// Calculate overall progress
const processingProgress = calculateProcessingProgress(processingStates);
// Get cache stats and metrics
const cacheStats = enhancedPDFProcessingService.getCacheStats();
const metrics = enhancedPDFProcessingService.getMetrics();
// Action handlers
const actions = {
cancelProcessing: (fileKey: string) => {
enhancedPDFProcessingService.cancelProcessing(fileKey);
},
retryProcessing: async (file: File) => {
try {
await enhancedPDFProcessingService.processFile(file, config);
} catch (error) {
console.error(`Failed to retry processing for ${file.name}:`, error);
}
},
clearCache: () => {
enhancedPDFProcessingService.clearAll();
}
};
return {
processedFiles,
processingStates,
isProcessing,
hasProcessingErrors,
processingProgress,
cacheStats,
metrics,
actions
};
}
/**
* Calculate overall processing progress from individual file states
*/
function calculateProcessingProgress(states: Map<string, ProcessingState>): {
overall: number;
fileProgress: Map<string, number>;
estimatedTimeRemaining: number;
} {
if (states.size === 0) {
return {
overall: 100,
fileProgress: new Map(),
estimatedTimeRemaining: 0
};
}
const fileProgress = new Map<string, number>();
let totalProgress = 0;
let totalEstimatedTime = 0;
for (const [fileKey, state] of states) {
fileProgress.set(fileKey, state.progress);
totalProgress += state.progress;
totalEstimatedTime += state.estimatedTimeRemaining || 0;
}
const overall = totalProgress / states.size;
const estimatedTimeRemaining = totalEstimatedTime;
return {
overall,
fileProgress,
estimatedTimeRemaining
};
}
/**
* Hook for getting a single processed file with enhanced features
*/
export function useEnhancedProcessedFile(
file: File | null,
config?: Partial<ProcessingConfig>
): {
processedFile: ProcessedFile | null;
isProcessing: boolean;
processingState: ProcessingState | null;
error: string | null;
canRetry: boolean;
actions: {
cancel: () => void;
retry: () => void;
};
} {
const result = useEnhancedProcessedFiles(file ? [file] : [], config);
const processedFile = file ? result.processedFiles.get(file) || null : null;
// Note: This is async but we can't await in hook return - consider refactoring if needed
const fileKey = file ? '' : ''; // TODO: Handle async file key generation
const processingState = fileKey ? result.processingStates.get(fileKey) || null : null;
const isProcessing = !!processingState;
const error = processingState?.error?.message || null;
const canRetry = processingState?.error?.recoverable || false;
const actions = {
cancel: () => {
if (fileKey) {
result.actions.cancelProcessing(fileKey);
}
},
retry: () => {
if (file) {
result.actions.retryProcessing(file);
}
}
};
return {
processedFile,
isProcessing,
processingState,
error,
canRetry,
actions
};
}

View File

@ -50,18 +50,28 @@ export function usePDFProcessor() {
const pages: PDFPage[] = [];
// Generate thumbnails for all pages
// Create pages without thumbnails initially - load them lazily
for (let i = 1; i <= totalPages; i++) {
const thumbnail = await generatePageThumbnail(file, i);
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail,
thumbnail: null, // Will be loaded lazily
rotation: 0,
selected: false
});
}
// Generate thumbnails for first 10 pages immediately for better UX
const priorityPages = Math.min(10, totalPages);
for (let i = 1; i <= priorityPages; i++) {
try {
const thumbnail = await generatePageThumbnail(file, i);
pages[i - 1].thumbnail = thumbnail;
} catch (error) {
console.warn(`Failed to generate thumbnail for page ${i}:`, error);
}
}
// Clean up
pdf.destroy();

View File

@ -0,0 +1,125 @@
import { useState, useEffect } from 'react';
import { ProcessedFile, ProcessingState } from '../types/processing';
import { pdfProcessingService } from '../services/pdfProcessingService';
interface UseProcessedFilesResult {
processedFiles: Map<File, ProcessedFile>;
processingStates: Map<string, ProcessingState>;
isProcessing: boolean;
hasProcessingErrors: boolean;
cacheStats: {
entries: number;
totalSizeBytes: number;
maxSizeBytes: number;
};
}
export function useProcessedFiles(activeFiles: File[]): UseProcessedFilesResult {
const [processedFiles, setProcessedFiles] = useState<Map<File, ProcessedFile>>(new Map());
const [processingStates, setProcessingStates] = useState<Map<string, ProcessingState>>(new Map());
useEffect(() => {
// Subscribe to processing state changes
const unsubscribe = pdfProcessingService.onProcessingChange(setProcessingStates);
// Check/start processing for each active file
const checkProcessing = async () => {
const newProcessedFiles = new Map<File, ProcessedFile>();
for (const file of activeFiles) {
const processed = await pdfProcessingService.getProcessedFile(file);
if (processed) {
newProcessedFiles.set(file, processed);
}
}
setProcessedFiles(newProcessedFiles);
};
checkProcessing();
return unsubscribe;
}, [activeFiles]);
// Listen for processing completion and update processed files
useEffect(() => {
const updateProcessedFiles = async () => {
const updated = new Map<File, ProcessedFile>();
for (const file of activeFiles) {
const existing = processedFiles.get(file);
if (existing) {
updated.set(file, existing);
} else {
// Check if processing just completed
const processed = await pdfProcessingService.getProcessedFile(file);
if (processed) {
updated.set(file, processed);
}
}
}
setProcessedFiles(updated);
};
// Small delay to allow processing state to settle
const timeoutId = setTimeout(updateProcessedFiles, 100);
return () => clearTimeout(timeoutId);
}, [processingStates, activeFiles]);
// Cleanup when activeFiles changes
useEffect(() => {
const currentFiles = new Set(activeFiles);
const previousFiles = Array.from(processedFiles.keys());
const removedFiles = previousFiles.filter(file => !currentFiles.has(file));
if (removedFiles.length > 0) {
// Clean up processing service cache
pdfProcessingService.cleanup(removedFiles);
// Update local state
setProcessedFiles(prev => {
const updated = new Map();
for (const [file, processed] of prev) {
if (currentFiles.has(file)) {
updated.set(file, processed);
}
}
return updated;
});
}
}, [activeFiles]);
// Derived state
const isProcessing = processingStates.size > 0;
const hasProcessingErrors = Array.from(processingStates.values()).some(state => state.status === 'error');
const cacheStats = pdfProcessingService.getCacheStats();
return {
processedFiles,
processingStates,
isProcessing,
hasProcessingErrors,
cacheStats
};
}
// Hook for getting a single processed file
export function useProcessedFile(file: File | null): {
processedFile: ProcessedFile | null;
isProcessing: boolean;
processingState: ProcessingState | null;
} {
const result = useProcessedFiles(file ? [file] : []);
const processedFile = file ? result.processedFiles.get(file) || null : null;
const fileKey = file ? pdfProcessingService.generateFileKey(file) : '';
const processingState = fileKey ? result.processingStates.get(fileKey) || null : null;
const isProcessing = !!processingState;
return {
processedFile,
isProcessing,
processingState
};
}

View File

@ -0,0 +1,552 @@
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
import { ProcessedFile, ProcessingState, PDFPage, ProcessingStrategy, ProcessingConfig, ProcessingMetrics } from '../types/processing';
import { ProcessingCache } from './processingCache';
import { FileHasher } from '../utils/fileHash';
import { FileAnalyzer } from './fileAnalyzer';
import { ProcessingErrorHandler } from './processingErrorHandler';
// Set up PDF.js worker
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
export class EnhancedPDFProcessingService {
private static instance: EnhancedPDFProcessingService;
private cache = new ProcessingCache();
private processing = new Map<string, ProcessingState>();
private processingListeners = new Set<(states: Map<string, ProcessingState>) => void>();
private metrics: ProcessingMetrics = {
totalFiles: 0,
completedFiles: 0,
failedFiles: 0,
averageProcessingTime: 0,
cacheHitRate: 0,
memoryUsage: 0
};
private defaultConfig: ProcessingConfig = {
strategy: 'immediate_full',
chunkSize: 20,
thumbnailQuality: 'medium',
priorityPageCount: 10,
useWebWorker: false,
maxRetries: 3,
timeoutMs: 300000 // 5 minutes
};
private constructor() {}
static getInstance(): EnhancedPDFProcessingService {
if (!EnhancedPDFProcessingService.instance) {
EnhancedPDFProcessingService.instance = new EnhancedPDFProcessingService();
}
return EnhancedPDFProcessingService.instance;
}
/**
* Process a file with intelligent strategy selection
*/
async processFile(file: File, customConfig?: Partial<ProcessingConfig>): Promise<ProcessedFile | null> {
const fileKey = await this.generateFileKey(file);
// Check cache first
const cached = this.cache.get(fileKey);
if (cached) {
console.log('Cache hit for:', file.name);
this.updateMetrics('cacheHit');
return cached;
}
// Check if already processing
if (this.processing.has(fileKey)) {
console.log('Already processing:', file.name);
return null;
}
// Analyze file to determine optimal strategy
const analysis = await FileAnalyzer.analyzeFile(file);
if (analysis.isCorrupted) {
throw new Error(`File ${file.name} appears to be corrupted`);
}
// Create processing config
const config: ProcessingConfig = {
...this.defaultConfig,
strategy: analysis.recommendedStrategy,
...customConfig
};
// Start processing
this.startProcessing(file, fileKey, config, analysis.estimatedProcessingTime);
return null;
}
/**
* Start processing a file with the specified configuration
*/
private async startProcessing(
file: File,
fileKey: string,
config: ProcessingConfig,
estimatedTime: number
): Promise<void> {
// Create cancellation token
const cancellationToken = ProcessingErrorHandler.createTimeoutController(config.timeoutMs);
// Set initial state
const state: ProcessingState = {
fileKey,
fileName: file.name,
status: 'processing',
progress: 0,
strategy: config.strategy,
startedAt: Date.now(),
estimatedTimeRemaining: estimatedTime,
cancellationToken
};
this.processing.set(fileKey, state);
this.notifyListeners();
this.updateMetrics('started');
try {
// Execute processing with retry logic
const processedFile = await ProcessingErrorHandler.executeWithRetry(
() => this.executeProcessingStrategy(file, config, state),
(error) => {
state.error = error;
this.notifyListeners();
},
config.maxRetries
);
// Cache the result
this.cache.set(fileKey, processedFile);
// Update state to completed
state.status = 'completed';
state.progress = 100;
state.completedAt = Date.now();
this.notifyListeners();
this.updateMetrics('completed', Date.now() - state.startedAt);
// Remove from processing map after brief delay
setTimeout(() => {
this.processing.delete(fileKey);
this.notifyListeners();
}, 2000);
} catch (error) {
console.error('Processing failed for', file.name, ':', error);
const processingError = ProcessingErrorHandler.createProcessingError(error);
state.status = 'error';
state.error = processingError;
this.notifyListeners();
this.updateMetrics('failed');
// Remove failed processing after delay
setTimeout(() => {
this.processing.delete(fileKey);
this.notifyListeners();
}, 10000);
}
}
/**
* Execute the actual processing based on strategy
*/
private async executeProcessingStrategy(
file: File,
config: ProcessingConfig,
state: ProcessingState
): Promise<ProcessedFile> {
switch (config.strategy) {
case 'immediate_full':
return this.processImmediateFull(file, config, state);
case 'priority_pages':
return this.processPriorityPages(file, config, state);
case 'progressive_chunked':
return this.processProgressiveChunked(file, config, state);
case 'metadata_only':
return this.processMetadataOnly(file, config, state);
default:
return this.processImmediateFull(file, config, state);
}
}
/**
* Process all pages immediately (for small files)
*/
private async processImmediateFull(
file: File,
config: ProcessingConfig,
state: ProcessingState
): Promise<ProcessedFile> {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const totalPages = pdf.numPages;
state.progress = 10;
this.notifyListeners();
const pages: PDFPage[] = [];
for (let i = 1; i <= totalPages; i++) {
// Check for cancellation
if (state.cancellationToken?.signal.aborted) {
pdf.destroy();
throw new Error('Processing cancelled');
}
const page = await pdf.getPage(i);
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail,
rotation: 0,
selected: false
});
// Update progress
state.progress = 10 + (i / totalPages) * 85;
state.currentPage = i;
this.notifyListeners();
}
pdf.destroy();
state.progress = 100;
this.notifyListeners();
return this.createProcessedFile(file, pages, totalPages);
}
/**
* Process priority pages first, then queue the rest
*/
private async processPriorityPages(
file: File,
config: ProcessingConfig,
state: ProcessingState
): Promise<ProcessedFile> {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const totalPages = pdf.numPages;
state.progress = 10;
this.notifyListeners();
const pages: PDFPage[] = [];
const priorityCount = Math.min(config.priorityPageCount, totalPages);
// Process priority pages first
for (let i = 1; i <= priorityCount; i++) {
if (state.cancellationToken?.signal.aborted) {
pdf.destroy();
throw new Error('Processing cancelled');
}
const page = await pdf.getPage(i);
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail,
rotation: 0,
selected: false
});
state.progress = 10 + (i / priorityCount) * 60;
state.currentPage = i;
this.notifyListeners();
}
// Create placeholder pages for remaining pages
for (let i = priorityCount + 1; i <= totalPages; i++) {
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail: null, // Will be loaded lazily
rotation: 0,
selected: false
});
}
pdf.destroy();
state.progress = 100;
this.notifyListeners();
// Queue background processing for remaining pages (only if there are any)
if (priorityCount < totalPages) {
this.queueBackgroundProcessing(file, priorityCount + 1, totalPages);
}
return this.createProcessedFile(file, pages, totalPages);
}
/**
* Process in chunks with breaks between chunks
*/
private async processProgressiveChunked(
file: File,
config: ProcessingConfig,
state: ProcessingState
): Promise<ProcessedFile> {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const totalPages = pdf.numPages;
state.progress = 10;
this.notifyListeners();
const pages: PDFPage[] = [];
const chunkSize = config.chunkSize;
let processedPages = 0;
// Process first chunk immediately
const firstChunkEnd = Math.min(chunkSize, totalPages);
for (let i = 1; i <= firstChunkEnd; i++) {
if (state.cancellationToken?.signal.aborted) {
pdf.destroy();
throw new Error('Processing cancelled');
}
const page = await pdf.getPage(i);
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail,
rotation: 0,
selected: false
});
processedPages++;
state.progress = 10 + (processedPages / totalPages) * 70;
state.currentPage = i;
this.notifyListeners();
// Small delay to prevent UI blocking
if (i % 5 === 0) {
await new Promise(resolve => setTimeout(resolve, 10));
}
}
// Create placeholders for remaining pages
for (let i = firstChunkEnd + 1; i <= totalPages; i++) {
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail: null,
rotation: 0,
selected: false
});
}
pdf.destroy();
state.progress = 100;
this.notifyListeners();
// Queue remaining chunks for background processing (only if there are any)
if (firstChunkEnd < totalPages) {
this.queueChunkedBackgroundProcessing(file, firstChunkEnd + 1, totalPages, chunkSize);
}
return this.createProcessedFile(file, pages, totalPages);
}
/**
* Process metadata only (for very large files)
*/
private async processMetadataOnly(
file: File,
config: ProcessingConfig,
state: ProcessingState
): Promise<ProcessedFile> {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const totalPages = pdf.numPages;
state.progress = 50;
this.notifyListeners();
// Create placeholder pages without thumbnails
const pages: PDFPage[] = [];
for (let i = 1; i <= totalPages; i++) {
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail: null,
rotation: 0,
selected: false
});
}
pdf.destroy();
state.progress = 100;
this.notifyListeners();
return this.createProcessedFile(file, pages, totalPages);
}
/**
* Render a page thumbnail with specified quality
*/
private async renderPageThumbnail(page: any, quality: 'low' | 'medium' | 'high'): Promise<string> {
const scales = { low: 0.2, medium: 0.5, high: 0.8 }; // Reduced low quality for page editor
const scale = scales[quality];
const viewport = page.getViewport({ scale });
const canvas = document.createElement('canvas');
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext('2d');
if (!context) {
throw new Error('Could not get canvas context');
}
await page.render({ canvasContext: context, viewport }).promise;
return canvas.toDataURL('image/jpeg', 0.8); // Use JPEG for better compression
}
/**
* Create a ProcessedFile object
*/
private createProcessedFile(file: File, pages: PDFPage[], totalPages: number): ProcessedFile {
return {
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
pages,
totalPages,
metadata: {
title: file.name,
createdAt: new Date().toISOString(),
modifiedAt: new Date().toISOString()
}
};
}
/**
* Queue background processing for remaining pages
*/
private queueBackgroundProcessing(file: File, startPage: number, endPage: number): void {
// TODO: Implement background processing queue
console.log(`Queued background processing for ${file.name} pages ${startPage}-${endPage}`);
}
/**
* Queue chunked background processing
*/
private queueChunkedBackgroundProcessing(file: File, startPage: number, endPage: number, chunkSize: number): void {
// TODO: Implement chunked background processing
console.log(`Queued chunked background processing for ${file.name} pages ${startPage}-${endPage} in chunks of ${chunkSize}`);
}
/**
* Generate a unique, collision-resistant cache key
*/
private async generateFileKey(file: File): Promise<string> {
return await FileHasher.generateHybridHash(file);
}
/**
* Cancel processing for a specific file
*/
cancelProcessing(fileKey: string): void {
const state = this.processing.get(fileKey);
if (state && state.cancellationToken) {
state.cancellationToken.abort();
state.status = 'cancelled';
this.notifyListeners();
}
}
/**
* Update processing metrics
*/
private updateMetrics(event: 'started' | 'completed' | 'failed' | 'cacheHit', processingTime?: number): void {
switch (event) {
case 'started':
this.metrics.totalFiles++;
break;
case 'completed':
this.metrics.completedFiles++;
if (processingTime) {
// Update rolling average
const totalProcessingTime = this.metrics.averageProcessingTime * (this.metrics.completedFiles - 1) + processingTime;
this.metrics.averageProcessingTime = totalProcessingTime / this.metrics.completedFiles;
}
break;
case 'failed':
this.metrics.failedFiles++;
break;
case 'cacheHit':
// Update cache hit rate
const totalAttempts = this.metrics.totalFiles + 1;
this.metrics.cacheHitRate = (this.metrics.cacheHitRate * this.metrics.totalFiles + 1) / totalAttempts;
break;
}
}
/**
* Get processing metrics
*/
getMetrics(): ProcessingMetrics {
return { ...this.metrics };
}
/**
* State subscription for components
*/
onProcessingChange(callback: (states: Map<string, ProcessingState>) => void): () => void {
this.processingListeners.add(callback);
return () => this.processingListeners.delete(callback);
}
getProcessingStates(): Map<string, ProcessingState> {
return new Map(this.processing);
}
private notifyListeners(): void {
this.processingListeners.forEach(callback => callback(this.processing));
}
/**
* Cleanup method for removed files
*/
cleanup(removedFiles: File[]): void {
removedFiles.forEach(async (file) => {
const key = await this.generateFileKey(file);
this.cache.delete(key);
this.cancelProcessing(key);
this.processing.delete(key);
});
this.notifyListeners();
}
/**
* Get cache statistics
*/
getCacheStats() {
return this.cache.getStats();
}
/**
* Clear all cache and processing
*/
clearAll(): void {
this.cache.clear();
this.processing.clear();
this.notifyListeners();
}
}
// Export singleton instance
export const enhancedPDFProcessingService = EnhancedPDFProcessingService.getInstance();

View File

@ -0,0 +1,240 @@
import { getDocument } from 'pdfjs-dist';
import { FileAnalysis, ProcessingStrategy } from '../types/processing';
export class FileAnalyzer {
private static readonly SIZE_THRESHOLDS = {
SMALL: 10 * 1024 * 1024, // 10MB
MEDIUM: 50 * 1024 * 1024, // 50MB
LARGE: 200 * 1024 * 1024, // 200MB
};
private static readonly PAGE_THRESHOLDS = {
FEW: 10, // < 10 pages - immediate full processing
MANY: 50, // < 50 pages - priority pages
MASSIVE: 100, // < 100 pages - progressive chunked
// >100 pages = metadata only
};
/**
* Analyze a file to determine optimal processing strategy
*/
static async analyzeFile(file: File): Promise<FileAnalysis> {
const analysis: FileAnalysis = {
fileSize: file.size,
isEncrypted: false,
isCorrupted: false,
recommendedStrategy: 'metadata_only',
estimatedProcessingTime: 0,
};
try {
// Quick validation and page count estimation
const quickAnalysis = await this.quickPDFAnalysis(file);
analysis.estimatedPageCount = quickAnalysis.pageCount;
analysis.isEncrypted = quickAnalysis.isEncrypted;
analysis.isCorrupted = quickAnalysis.isCorrupted;
// Determine strategy based on file characteristics
analysis.recommendedStrategy = this.determineStrategy(file.size, quickAnalysis.pageCount);
// Estimate processing time
analysis.estimatedProcessingTime = this.estimateProcessingTime(
file.size,
quickAnalysis.pageCount,
analysis.recommendedStrategy
);
} catch (error) {
console.error('File analysis failed:', error);
analysis.isCorrupted = true;
analysis.recommendedStrategy = 'metadata_only';
}
return analysis;
}
/**
* Quick PDF analysis without full processing
*/
private static async quickPDFAnalysis(file: File): Promise<{
pageCount: number;
isEncrypted: boolean;
isCorrupted: boolean;
}> {
try {
// For small files, read the whole file
// For large files, try the whole file first (PDF.js needs the complete structure)
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({
data: arrayBuffer,
stopAtErrors: false, // Don't stop at minor errors
verbosity: 0 // Suppress PDF.js warnings
}).promise;
const pageCount = pdf.numPages;
const isEncrypted = pdf.isEncrypted;
// Clean up
pdf.destroy();
return {
pageCount,
isEncrypted,
isCorrupted: false
};
} catch (error) {
// Try to determine if it's corruption vs encryption
const errorMessage = error instanceof Error ? error.message.toLowerCase() : '';
const isEncrypted = errorMessage.includes('password') || errorMessage.includes('encrypted');
return {
pageCount: 0,
isEncrypted,
isCorrupted: !isEncrypted // If not encrypted, probably corrupted
};
}
}
/**
* Determine the best processing strategy based on file characteristics
*/
private static determineStrategy(fileSize: number, pageCount?: number): ProcessingStrategy {
// Handle corrupted or encrypted files
if (!pageCount || pageCount === 0) {
return 'metadata_only';
}
// Small files with few pages - process everything immediately
if (fileSize <= this.SIZE_THRESHOLDS.SMALL && pageCount <= this.PAGE_THRESHOLDS.FEW) {
return 'immediate_full';
}
// Medium files or many pages - priority pages first, then progressive
if (fileSize <= this.SIZE_THRESHOLDS.MEDIUM && pageCount <= this.PAGE_THRESHOLDS.MANY) {
return 'priority_pages';
}
// Large files or massive page counts - chunked processing
if (fileSize <= this.SIZE_THRESHOLDS.LARGE && pageCount <= this.PAGE_THRESHOLDS.MASSIVE) {
return 'progressive_chunked';
}
// Very large files - metadata only
return 'metadata_only';
}
/**
* Estimate processing time based on file characteristics and strategy
*/
private static estimateProcessingTime(
fileSize: number,
pageCount: number = 0,
strategy: ProcessingStrategy
): number {
const baseTimes = {
immediate_full: 200, // 200ms per page
priority_pages: 150, // 150ms per page (optimized)
progressive_chunked: 100, // 100ms per page (chunked)
metadata_only: 50 // 50ms total
};
const baseTime = baseTimes[strategy];
switch (strategy) {
case 'metadata_only':
return baseTime;
case 'immediate_full':
return pageCount * baseTime;
case 'priority_pages':
// Estimate time for priority pages (first 10)
const priorityPages = Math.min(pageCount, 10);
return priorityPages * baseTime;
case 'progressive_chunked':
// Estimate time for first chunk (20 pages)
const firstChunk = Math.min(pageCount, 20);
return firstChunk * baseTime;
default:
return pageCount * baseTime;
}
}
/**
* Get processing recommendations for a set of files
*/
static async analyzeMultipleFiles(files: File[]): Promise<{
analyses: Map<File, FileAnalysis>;
recommendations: {
totalEstimatedTime: number;
suggestedBatchSize: number;
shouldUseWebWorker: boolean;
memoryWarning: boolean;
};
}> {
const analyses = new Map<File, FileAnalysis>();
let totalEstimatedTime = 0;
let totalSize = 0;
let totalPages = 0;
// Analyze each file
for (const file of files) {
const analysis = await this.analyzeFile(file);
analyses.set(file, analysis);
totalEstimatedTime += analysis.estimatedProcessingTime;
totalSize += file.size;
totalPages += analysis.estimatedPageCount || 0;
}
// Generate recommendations
const recommendations = {
totalEstimatedTime,
suggestedBatchSize: this.calculateBatchSize(files.length, totalSize),
shouldUseWebWorker: totalPages > 100 || totalSize > this.SIZE_THRESHOLDS.MEDIUM,
memoryWarning: totalSize > this.SIZE_THRESHOLDS.LARGE || totalPages > this.PAGE_THRESHOLDS.MASSIVE
};
return { analyses, recommendations };
}
/**
* Calculate optimal batch size for processing multiple files
*/
private static calculateBatchSize(fileCount: number, totalSize: number): number {
// Process small batches for large total sizes
if (totalSize > this.SIZE_THRESHOLDS.LARGE) {
return Math.max(1, Math.floor(fileCount / 4));
}
if (totalSize > this.SIZE_THRESHOLDS.MEDIUM) {
return Math.max(2, Math.floor(fileCount / 2));
}
// Process all at once for smaller total sizes
return fileCount;
}
/**
* Check if a file appears to be a valid PDF
*/
static async isValidPDF(file: File): Promise<boolean> {
if (file.type !== 'application/pdf' && !file.name.toLowerCase().endsWith('.pdf')) {
return false;
}
try {
// Read first few bytes to check PDF header
const header = file.slice(0, 8);
const headerBytes = new Uint8Array(await header.arrayBuffer());
const headerString = String.fromCharCode(...headerBytes);
return headerString.startsWith('%PDF-');
} catch (error) {
return false;
}
}
}

View File

@ -0,0 +1,188 @@
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
import { ProcessedFile, ProcessingState, PDFPage } from '../types/processing';
import { ProcessingCache } from './processingCache';
// Set up PDF.js worker
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
export class PDFProcessingService {
private static instance: PDFProcessingService;
private cache = new ProcessingCache();
private processing = new Map<string, ProcessingState>();
private processingListeners = new Set<(states: Map<string, ProcessingState>) => void>();
private constructor() {}
static getInstance(): PDFProcessingService {
if (!PDFProcessingService.instance) {
PDFProcessingService.instance = new PDFProcessingService();
}
return PDFProcessingService.instance;
}
async getProcessedFile(file: File): Promise<ProcessedFile | null> {
const fileKey = this.generateFileKey(file);
// Check cache first
const cached = this.cache.get(fileKey);
if (cached) {
console.log('Cache hit for:', file.name);
return cached;
}
// Check if already processing
if (this.processing.has(fileKey)) {
console.log('Already processing:', file.name);
return null; // Will be available when processing completes
}
// Start processing
this.startProcessing(file, fileKey);
return null;
}
private async startProcessing(file: File, fileKey: string): Promise<void> {
// Set initial state
const state: ProcessingState = {
fileKey,
fileName: file.name,
status: 'processing',
progress: 0,
startedAt: Date.now()
};
this.processing.set(fileKey, state);
this.notifyListeners();
try {
// Process the file with progress updates
const processedFile = await this.processFileWithProgress(file, (progress) => {
state.progress = progress;
this.notifyListeners();
});
// Cache the result
this.cache.set(fileKey, processedFile);
// Update state to completed
state.status = 'completed';
state.progress = 100;
state.completedAt = Date.now();
this.notifyListeners();
// Remove from processing map after brief delay
setTimeout(() => {
this.processing.delete(fileKey);
this.notifyListeners();
}, 2000);
} catch (error) {
console.error('Processing failed for', file.name, ':', error);
state.status = 'error';
state.error = error instanceof Error ? error.message : 'Unknown error';
this.notifyListeners();
// Remove failed processing after delay
setTimeout(() => {
this.processing.delete(fileKey);
this.notifyListeners();
}, 5000);
}
}
private async processFileWithProgress(
file: File,
onProgress: (progress: number) => void
): Promise<ProcessedFile> {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const totalPages = pdf.numPages;
onProgress(10); // PDF loaded
const pages: PDFPage[] = [];
for (let i = 1; i <= totalPages; i++) {
const page = await pdf.getPage(i);
const viewport = page.getViewport({ scale: 0.5 });
const canvas = document.createElement('canvas');
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext('2d');
if (context) {
await page.render({ canvasContext: context, viewport }).promise;
const thumbnail = canvas.toDataURL();
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail,
rotation: 0,
selected: false
});
}
// Update progress
const progress = 10 + (i / totalPages) * 85; // 10-95%
onProgress(progress);
}
pdf.destroy();
onProgress(100);
return {
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
pages,
totalPages,
metadata: {
title: file.name,
createdAt: new Date().toISOString(),
modifiedAt: new Date().toISOString()
}
};
}
// State subscription for components
onProcessingChange(callback: (states: Map<string, ProcessingState>) => void): () => void {
this.processingListeners.add(callback);
return () => this.processingListeners.delete(callback);
}
getProcessingStates(): Map<string, ProcessingState> {
return new Map(this.processing);
}
private notifyListeners(): void {
this.processingListeners.forEach(callback => callback(this.processing));
}
generateFileKey(file: File): string {
return `${file.name}-${file.size}-${file.lastModified}`;
}
// Cleanup method for activeFiles changes
cleanup(removedFiles: File[]): void {
removedFiles.forEach(file => {
const key = this.generateFileKey(file);
this.cache.delete(key);
this.processing.delete(key);
});
this.notifyListeners();
}
// Get cache stats (for debugging)
getCacheStats() {
return this.cache.getStats();
}
// Clear all cache and processing
clearAll(): void {
this.cache.clear();
this.processing.clear();
this.notifyListeners();
}
}
// Export singleton instance
export const pdfProcessingService = PDFProcessingService.getInstance();

View File

@ -0,0 +1,138 @@
import { ProcessedFile, CacheConfig, CacheEntry, CacheStats } from '../types/processing';
export class ProcessingCache {
private cache = new Map<string, CacheEntry>();
private totalSize = 0;
constructor(private config: CacheConfig = {
maxFiles: 20,
maxSizeBytes: 2 * 1024 * 1024 * 1024, // 2GB
ttlMs: 30 * 60 * 1000 // 30 minutes
}) {}
set(key: string, data: ProcessedFile): void {
// Remove expired entries first
this.cleanup();
// Calculate entry size (rough estimate)
const size = this.calculateSize(data);
// Make room if needed
this.makeRoom(size);
this.cache.set(key, {
data,
size,
lastAccessed: Date.now(),
createdAt: Date.now()
});
this.totalSize += size;
}
get(key: string): ProcessedFile | null {
const entry = this.cache.get(key);
if (!entry) return null;
// Check TTL
if (Date.now() - entry.createdAt > this.config.ttlMs) {
this.delete(key);
return null;
}
// Update last accessed
entry.lastAccessed = Date.now();
return entry.data;
}
has(key: string): boolean {
const entry = this.cache.get(key);
if (!entry) return false;
// Check TTL
if (Date.now() - entry.createdAt > this.config.ttlMs) {
this.delete(key);
return false;
}
return true;
}
private makeRoom(neededSize: number): void {
// Remove oldest entries until we have space
while (
this.cache.size >= this.config.maxFiles ||
this.totalSize + neededSize > this.config.maxSizeBytes
) {
const oldestKey = this.findOldestEntry();
if (oldestKey) {
this.delete(oldestKey);
} else break;
}
}
private findOldestEntry(): string | null {
let oldest: { key: string; lastAccessed: number } | null = null;
for (const [key, entry] of this.cache) {
if (!oldest || entry.lastAccessed < oldest.lastAccessed) {
oldest = { key, lastAccessed: entry.lastAccessed };
}
}
return oldest?.key || null;
}
private cleanup(): void {
const now = Date.now();
for (const [key, entry] of this.cache) {
if (now - entry.createdAt > this.config.ttlMs) {
this.delete(key);
}
}
}
private calculateSize(data: ProcessedFile): number {
// Rough size estimation
let size = 0;
// Estimate size of thumbnails (main memory consumer)
data.pages.forEach(page => {
if (page.thumbnail) {
// Base64 thumbnail is roughly 50KB each
size += 50 * 1024;
}
});
// Add some overhead for other data
size += 10 * 1024; // 10KB overhead
return size;
}
delete(key: string): void {
const entry = this.cache.get(key);
if (entry) {
this.totalSize -= entry.size;
this.cache.delete(key);
}
}
clear(): void {
this.cache.clear();
this.totalSize = 0;
}
getStats(): CacheStats {
return {
entries: this.cache.size,
totalSizeBytes: this.totalSize,
maxSizeBytes: this.config.maxSizeBytes
};
}
// Get all cached keys (for debugging and cleanup)
getKeys(): string[] {
return Array.from(this.cache.keys());
}
}

View File

@ -0,0 +1,282 @@
import { ProcessingError } from '../types/processing';
export class ProcessingErrorHandler {
private static readonly DEFAULT_MAX_RETRIES = 3;
private static readonly RETRY_DELAYS = [1000, 2000, 4000]; // Progressive backoff in ms
/**
* Create a ProcessingError from an unknown error
*/
static createProcessingError(
error: unknown,
retryCount: number = 0,
maxRetries: number = this.DEFAULT_MAX_RETRIES
): ProcessingError {
const originalError = error instanceof Error ? error : new Error(String(error));
const message = originalError.message;
// Determine error type based on error message and properties
const errorType = this.determineErrorType(originalError, message);
// Determine if error is recoverable
const recoverable = this.isRecoverable(errorType, retryCount, maxRetries);
return {
type: errorType,
message: this.formatErrorMessage(errorType, message),
recoverable,
retryCount,
maxRetries,
originalError
};
}
/**
* Determine the type of error based on error characteristics
*/
private static determineErrorType(error: Error, message: string): ProcessingError['type'] {
const lowerMessage = message.toLowerCase();
// Network-related errors
if (lowerMessage.includes('network') ||
lowerMessage.includes('fetch') ||
lowerMessage.includes('connection')) {
return 'network';
}
// Memory-related errors
if (lowerMessage.includes('memory') ||
lowerMessage.includes('quota') ||
lowerMessage.includes('allocation') ||
error.name === 'QuotaExceededError') {
return 'memory';
}
// Timeout errors
if (lowerMessage.includes('timeout') ||
lowerMessage.includes('aborted') ||
error.name === 'AbortError') {
return 'timeout';
}
// Cancellation
if (lowerMessage.includes('cancel') ||
lowerMessage.includes('abort') ||
error.name === 'AbortError') {
return 'cancelled';
}
// PDF corruption/parsing errors
if (lowerMessage.includes('pdf') ||
lowerMessage.includes('parse') ||
lowerMessage.includes('invalid') ||
lowerMessage.includes('corrupt') ||
lowerMessage.includes('malformed')) {
return 'corruption';
}
// Default to parsing error
return 'parsing';
}
/**
* Determine if an error is recoverable based on type and retry count
*/
private static isRecoverable(
errorType: ProcessingError['type'],
retryCount: number,
maxRetries: number
): boolean {
// Never recoverable
if (errorType === 'cancelled' || errorType === 'corruption') {
return false;
}
// Recoverable if we haven't exceeded retry count
if (retryCount >= maxRetries) {
return false;
}
// Memory errors are usually not recoverable
if (errorType === 'memory') {
return retryCount < 1; // Only one retry for memory errors
}
// Network and timeout errors are usually recoverable
return errorType === 'network' || errorType === 'timeout' || errorType === 'parsing';
}
/**
* Format error message for user display
*/
private static formatErrorMessage(errorType: ProcessingError['type'], originalMessage: string): string {
switch (errorType) {
case 'network':
return 'Network connection failed. Please check your internet connection and try again.';
case 'memory':
return 'Insufficient memory to process this file. Try closing other applications or processing a smaller file.';
case 'timeout':
return 'Processing timed out. This file may be too large or complex to process.';
case 'cancelled':
return 'Processing was cancelled by user.';
case 'corruption':
return 'This PDF file appears to be corrupted or encrypted. Please try a different file.';
case 'parsing':
return `Failed to process PDF: ${originalMessage}`;
default:
return `Processing failed: ${originalMessage}`;
}
}
/**
* Execute an operation with automatic retry logic
*/
static async executeWithRetry<T>(
operation: () => Promise<T>,
onError?: (error: ProcessingError) => void,
maxRetries: number = this.DEFAULT_MAX_RETRIES
): Promise<T> {
let lastError: ProcessingError | null = null;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
return await operation();
} catch (error) {
lastError = this.createProcessingError(error, attempt, maxRetries);
// Notify error handler
if (onError) {
onError(lastError);
}
// Don't retry if not recoverable
if (!lastError.recoverable) {
break;
}
// Don't retry on last attempt
if (attempt === maxRetries) {
break;
}
// Wait before retry with progressive backoff
const delay = this.RETRY_DELAYS[Math.min(attempt, this.RETRY_DELAYS.length - 1)];
await this.delay(delay);
console.log(`Retrying operation (attempt ${attempt + 2}/${maxRetries + 1}) after ${delay}ms delay`);
}
}
// All retries exhausted
throw lastError || new Error('Operation failed after all retries');
}
/**
* Create a timeout wrapper for operations
*/
static withTimeout<T>(
operation: () => Promise<T>,
timeoutMs: number,
timeoutMessage: string = 'Operation timed out'
): Promise<T> {
return new Promise<T>((resolve, reject) => {
const timeoutId = setTimeout(() => {
reject(new Error(timeoutMessage));
}, timeoutMs);
operation()
.then(result => {
clearTimeout(timeoutId);
resolve(result);
})
.catch(error => {
clearTimeout(timeoutId);
reject(error);
});
});
}
/**
* Create an AbortController that times out after specified duration
*/
static createTimeoutController(timeoutMs: number): AbortController {
const controller = new AbortController();
setTimeout(() => {
controller.abort();
}, timeoutMs);
return controller;
}
/**
* Check if an error indicates the operation should be retried
*/
static shouldRetry(error: ProcessingError): boolean {
return error.recoverable && error.retryCount < error.maxRetries;
}
/**
* Get user-friendly suggestions based on error type
*/
static getErrorSuggestions(error: ProcessingError): string[] {
switch (error.type) {
case 'network':
return [
'Check your internet connection',
'Try refreshing the page',
'Try again in a few moments'
];
case 'memory':
return [
'Close other browser tabs or applications',
'Try processing a smaller file',
'Restart your browser',
'Use a device with more memory'
];
case 'timeout':
return [
'Try processing a smaller file',
'Break large files into smaller sections',
'Check your internet connection speed'
];
case 'corruption':
return [
'Verify the PDF file opens in other applications',
'Try re-downloading the file',
'Try a different PDF file',
'Contact the file creator if it appears corrupted'
];
case 'parsing':
return [
'Verify this is a valid PDF file',
'Try a different PDF file',
'Contact support if the problem persists'
];
default:
return [
'Try refreshing the page',
'Try again in a few moments',
'Contact support if the problem persists'
];
}
}
/**
* Utility function for delays
*/
private static delay(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}

View File

@ -1,7 +1,7 @@
export interface PDFPage {
id: string;
pageNumber: number;
thumbnail: string;
thumbnail: string | null;
rotation: number;
selected: boolean;
splitBefore?: boolean;
@ -24,4 +24,4 @@ export interface PageOperation {
export interface UndoRedoState {
operations: PageOperation[];
currentIndex: number;
}
}

View File

@ -0,0 +1,91 @@
export interface ProcessingError {
type: 'network' | 'parsing' | 'memory' | 'corruption' | 'timeout' | 'cancelled';
message: string;
recoverable: boolean;
retryCount: number;
maxRetries: number;
originalError?: Error;
}
export interface ProcessingState {
fileKey: string;
fileName: string;
status: 'pending' | 'processing' | 'completed' | 'error' | 'cancelled';
progress: number; // 0-100
strategy: ProcessingStrategy;
error?: ProcessingError;
startedAt: number;
completedAt?: number;
estimatedTimeRemaining?: number;
currentPage?: number;
cancellationToken?: AbortController;
}
export interface ProcessedFile {
id: string;
pages: PDFPage[];
totalPages: number;
metadata: {
title: string;
createdAt: string;
modifiedAt: string;
};
}
export interface PDFPage {
id: string;
pageNumber: number;
thumbnail: string | null;
rotation: number;
selected: boolean;
splitBefore?: boolean;
}
export interface CacheConfig {
maxFiles: number;
maxSizeBytes: number;
ttlMs: number;
}
export interface CacheEntry {
data: ProcessedFile;
size: number;
lastAccessed: number;
createdAt: number;
}
export interface CacheStats {
entries: number;
totalSizeBytes: number;
maxSizeBytes: number;
}
export type ProcessingStrategy = 'immediate_full' | 'progressive_chunked' | 'metadata_only' | 'priority_pages';
export interface ProcessingConfig {
strategy: ProcessingStrategy;
chunkSize: number; // Pages per chunk
thumbnailQuality: 'low' | 'medium' | 'high';
priorityPageCount: number; // Number of priority pages to process first
useWebWorker: boolean;
maxRetries: number;
timeoutMs: number;
}
export interface FileAnalysis {
fileSize: number;
estimatedPageCount?: number;
isEncrypted: boolean;
isCorrupted: boolean;
recommendedStrategy: ProcessingStrategy;
estimatedProcessingTime: number; // milliseconds
}
export interface ProcessingMetrics {
totalFiles: number;
completedFiles: number;
failedFiles: number;
averageProcessingTime: number;
cacheHitRate: number;
memoryUsage: number;
}

View File

@ -0,0 +1,127 @@
/**
* File hashing utilities for cache key generation
*/
export class FileHasher {
private static readonly CHUNK_SIZE = 64 * 1024; // 64KB chunks for hashing
/**
* Generate a content-based hash for a file
* Uses first + last + middle chunks to create a reasonably unique hash
* without reading the entire file (which would be expensive for large files)
*/
static async generateContentHash(file: File): Promise<string> {
const chunks = await this.getFileChunks(file);
const combined = await this.combineChunks(chunks);
return await this.hashArrayBuffer(combined);
}
/**
* Generate a fast hash based on file metadata
* Faster but less collision-resistant than content hash
*/
static generateMetadataHash(file: File): string {
const data = `${file.name}-${file.size}-${file.lastModified}-${file.type}`;
return this.simpleHash(data);
}
/**
* Generate a hybrid hash that balances speed and uniqueness
* Uses metadata + small content sample
*/
static async generateHybridHash(file: File): Promise<string> {
const metadataHash = this.generateMetadataHash(file);
// For small files, use full content hash
if (file.size <= 1024 * 1024) { // 1MB
const contentHash = await this.generateContentHash(file);
return `${metadataHash}-${contentHash}`;
}
// For large files, use first chunk only
const firstChunk = file.slice(0, this.CHUNK_SIZE);
const firstChunkBuffer = await firstChunk.arrayBuffer();
const firstChunkHash = await this.hashArrayBuffer(firstChunkBuffer);
return `${metadataHash}-${firstChunkHash}`;
}
private static async getFileChunks(file: File): Promise<ArrayBuffer[]> {
const chunks: ArrayBuffer[] = [];
// First chunk
if (file.size > 0) {
const firstChunk = file.slice(0, Math.min(this.CHUNK_SIZE, file.size));
chunks.push(await firstChunk.arrayBuffer());
}
// Middle chunk (if file is large enough)
if (file.size > this.CHUNK_SIZE * 2) {
const middleStart = Math.floor(file.size / 2) - Math.floor(this.CHUNK_SIZE / 2);
const middleEnd = middleStart + this.CHUNK_SIZE;
const middleChunk = file.slice(middleStart, middleEnd);
chunks.push(await middleChunk.arrayBuffer());
}
// Last chunk (if file is large enough and different from first)
if (file.size > this.CHUNK_SIZE) {
const lastStart = Math.max(file.size - this.CHUNK_SIZE, this.CHUNK_SIZE);
const lastChunk = file.slice(lastStart);
chunks.push(await lastChunk.arrayBuffer());
}
return chunks;
}
private static async combineChunks(chunks: ArrayBuffer[]): Promise<ArrayBuffer> {
const totalLength = chunks.reduce((sum, chunk) => sum + chunk.byteLength, 0);
const combined = new Uint8Array(totalLength);
let offset = 0;
for (const chunk of chunks) {
combined.set(new Uint8Array(chunk), offset);
offset += chunk.byteLength;
}
return combined.buffer;
}
private static async hashArrayBuffer(buffer: ArrayBuffer): Promise<string> {
// Use Web Crypto API for proper hashing
if (crypto.subtle) {
const hashBuffer = await crypto.subtle.digest('SHA-256', buffer);
const hashArray = Array.from(new Uint8Array(hashBuffer));
return hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
}
// Fallback for environments without crypto.subtle
return this.simpleHash(Array.from(new Uint8Array(buffer)).join(''));
}
private static simpleHash(str: string): string {
let hash = 0;
if (str.length === 0) return hash.toString();
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32-bit integer
}
return Math.abs(hash).toString(16);
}
/**
* Validate that a file matches its expected hash
* Useful for detecting file corruption or changes
*/
static async validateFileHash(file: File, expectedHash: string): Promise<boolean> {
try {
const actualHash = await this.generateHybridHash(file);
return actualHash === expectedHash;
} catch (error) {
console.error('Hash validation failed:', error);
return false;
}
}
}