mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
File management overhaul
This commit is contained in:
parent
09758ea2b8
commit
42abe83385
@ -6,7 +6,6 @@ import StorageIcon from "@mui/icons-material/Storage";
|
||||
import VisibilityIcon from "@mui/icons-material/Visibility";
|
||||
import EditIcon from "@mui/icons-material/Edit";
|
||||
|
||||
import { FileWithUrl } from "../../types/file";
|
||||
import { getFileSize, getFileDate } from "../../utils/fileUtils";
|
||||
import { useIndexedDBThumbnail } from "../../hooks/useIndexedDBThumbnail";
|
||||
|
||||
|
@ -1,15 +1,13 @@
|
||||
import React, { useState, useCallback, useRef, useEffect } from "react";
|
||||
import {
|
||||
Button, Text, Center, Checkbox, Box, Tooltip, ActionIcon,
|
||||
Notification, TextInput, FileInput, LoadingOverlay, Modal, Alert, Container,
|
||||
Stack, Group, Paper, SimpleGrid
|
||||
Notification, TextInput, LoadingOverlay, Modal, Alert,
|
||||
Stack, Group
|
||||
} from "@mantine/core";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import UploadFileIcon from "@mui/icons-material/UploadFile";
|
||||
import { usePDFProcessor } from "../../hooks/usePDFProcessor";
|
||||
import { useEnhancedProcessedFiles } from "../../hooks/useEnhancedProcessedFiles";
|
||||
import { PDFDocument, PDFPage } from "../../types/pageEditor";
|
||||
import { fileStorage } from "../../services/fileStorage";
|
||||
import { generateThumbnailForFile } from "../../utils/thumbnailUtils";
|
||||
import { ProcessedFile as EnhancedProcessedFile } from "../../types/processing";
|
||||
import { useUndoRedo } from "../../hooks/useUndoRedo";
|
||||
import {
|
||||
RotatePagesCommand,
|
||||
@ -19,19 +17,16 @@ import {
|
||||
ToggleSplitCommand
|
||||
} from "../../commands/pageCommands";
|
||||
import { pdfExportService } from "../../services/pdfExportService";
|
||||
import styles from './pageEditor.module.css';
|
||||
import './pageEditor.module.css';
|
||||
import PageThumbnail from './PageThumbnail';
|
||||
import BulkSelectionPanel from './BulkSelectionPanel';
|
||||
import DragDropGrid from './DragDropGrid';
|
||||
import FilePickerModal from '../shared/FilePickerModal';
|
||||
import FileUploadSelector from '../shared/FileUploadSelector';
|
||||
|
||||
export interface PageEditorProps {
|
||||
activeFiles: File[];
|
||||
setActiveFiles: (files: File[]) => void;
|
||||
downloadUrl?: string | null;
|
||||
setDownloadUrl?: (url: string | null) => void;
|
||||
sharedFiles?: any[]; // For FileUploadSelector when no files loaded
|
||||
|
||||
// Optional callbacks to expose internal functions for PageEditorControls
|
||||
onFunctionsReady?: (functions: {
|
||||
@ -55,24 +50,31 @@ export interface PageEditorProps {
|
||||
const PageEditor = ({
|
||||
activeFiles,
|
||||
setActiveFiles,
|
||||
downloadUrl,
|
||||
setDownloadUrl,
|
||||
sharedFiles = [],
|
||||
onFunctionsReady,
|
||||
}: PageEditorProps) => {
|
||||
const { t } = useTranslation();
|
||||
const { processPDFFile, loading: pdfLoading } = usePDFProcessor();
|
||||
|
||||
// Enhanced processing with intelligent strategies
|
||||
const {
|
||||
processedFiles: enhancedProcessedFiles,
|
||||
processingStates,
|
||||
isProcessing: globalProcessing,
|
||||
hasProcessingErrors,
|
||||
processingProgress,
|
||||
actions: processingActions
|
||||
} = useEnhancedProcessedFiles(activeFiles, {
|
||||
strategy: 'priority_pages', // Process first pages immediately
|
||||
thumbnailQuality: 'low', // Low quality for page editor navigation
|
||||
priorityPageCount: 10
|
||||
});
|
||||
|
||||
// Single merged document state
|
||||
const [mergedPdfDocument, setMergedPdfDocument] = useState<PDFDocument | null>(null);
|
||||
const [processedFiles, setProcessedFiles] = useState<Map<string, PDFDocument>>(new Map());
|
||||
const [filename, setFilename] = useState<string>("");
|
||||
|
||||
// Page editor state
|
||||
const [selectedPages, setSelectedPages] = useState<string[]>([]);
|
||||
const [status, setStatus] = useState<string | null>(null);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [csvInput, setCsvInput] = useState<string>("");
|
||||
const [selectionMode, setSelectionMode] = useState(false);
|
||||
|
||||
@ -97,87 +99,19 @@ const PageEditor = ({
|
||||
// Undo/Redo system
|
||||
const { executeCommand, undo, redo, canUndo, canRedo } = useUndoRedo();
|
||||
|
||||
// Process uploaded file
|
||||
const handleFileUpload = useCallback(async (uploadedFile: File | any) => {
|
||||
if (!uploadedFile) {
|
||||
setError('No file provided');
|
||||
return;
|
||||
}
|
||||
|
||||
let fileToProcess: File;
|
||||
|
||||
// Handle FileWithUrl objects from storage
|
||||
if (uploadedFile.storedInIndexedDB && uploadedFile.arrayBuffer) {
|
||||
try {
|
||||
console.log('Converting FileWithUrl to File:', uploadedFile.name);
|
||||
const arrayBuffer = await uploadedFile.arrayBuffer();
|
||||
const blob = new Blob([arrayBuffer], { type: uploadedFile.type || 'application/pdf' });
|
||||
fileToProcess = new File([blob], uploadedFile.name, {
|
||||
type: uploadedFile.type || 'application/pdf',
|
||||
lastModified: uploadedFile.lastModified || Date.now()
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error converting FileWithUrl:', error);
|
||||
setError('Unable to load file from storage');
|
||||
return;
|
||||
}
|
||||
} else if (uploadedFile instanceof File) {
|
||||
fileToProcess = uploadedFile;
|
||||
} else {
|
||||
setError('Invalid file object');
|
||||
console.error('handleFileUpload received unsupported object:', uploadedFile);
|
||||
return;
|
||||
}
|
||||
|
||||
if (fileToProcess.type !== 'application/pdf') {
|
||||
setError('Please upload a valid PDF file');
|
||||
return;
|
||||
}
|
||||
|
||||
const fileKey = `${fileToProcess.name}-${fileToProcess.size}`;
|
||||
|
||||
// Skip processing if already processed
|
||||
if (processedFiles.has(fileKey)) return;
|
||||
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
const document = await processPDFFile(fileToProcess);
|
||||
|
||||
// Store processed document
|
||||
setProcessedFiles(prev => new Map(prev).set(fileKey, document));
|
||||
setFilename(fileToProcess.name.replace(/\.pdf$/i, ''));
|
||||
setSelectedPages([]);
|
||||
|
||||
|
||||
if (document.pages.length > 0) {
|
||||
// Only store if it's a new file (not from storage)
|
||||
if (!uploadedFile.storedInIndexedDB) {
|
||||
const thumbnail = await generateThumbnailForFile(fileToProcess);
|
||||
await fileStorage.storeFile(fileToProcess, thumbnail);
|
||||
}
|
||||
}
|
||||
|
||||
setStatus(`PDF loaded successfully with ${document.totalPages} pages`);
|
||||
} catch (err) {
|
||||
const errorMessage = err instanceof Error ? err.message : 'Failed to process PDF';
|
||||
setError(errorMessage);
|
||||
console.error('PDF processing error:', err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [processPDFFile, activeFiles, setActiveFiles, processedFiles]);
|
||||
|
||||
// Process multiple uploaded files - just add them to activeFiles like FileManager does
|
||||
const handleMultipleFileUpload = useCallback((uploadedFiles: File[]) => {
|
||||
if (!uploadedFiles || uploadedFiles.length === 0) {
|
||||
setError('No files provided');
|
||||
return;
|
||||
}
|
||||
|
||||
// Simply set the activeFiles to the selected files (same as FileManager approach)
|
||||
setActiveFiles(uploadedFiles);
|
||||
// Convert enhanced processed files to Page Editor format
|
||||
const convertToPageEditorFormat = useCallback((enhancedFile: EnhancedProcessedFile, fileName: string): PDFDocument => {
|
||||
return {
|
||||
id: enhancedFile.id,
|
||||
name: fileName,
|
||||
file: null as any, // We don't need the file reference in the converted format
|
||||
pages: enhancedFile.pages.map(page => ({
|
||||
...page,
|
||||
// Ensure compatibility with existing page editor types
|
||||
splitBefore: page.splitBefore || false
|
||||
})),
|
||||
totalPages: enhancedFile.totalPages
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Merge multiple PDF documents into one
|
||||
@ -188,10 +122,10 @@ const PageEditor = ({
|
||||
}
|
||||
|
||||
if (activeFiles.length === 1) {
|
||||
// Single file - use it directly
|
||||
const fileKey = `${activeFiles[0].name}-${activeFiles[0].size}`;
|
||||
const pdfDoc = processedFiles.get(fileKey);
|
||||
if (pdfDoc) {
|
||||
// Single file - use enhanced processed file
|
||||
const enhancedFile = enhancedProcessedFiles.get(activeFiles[0]);
|
||||
if (enhancedFile) {
|
||||
const pdfDoc = convertToPageEditorFormat(enhancedFile, activeFiles[0].name);
|
||||
setMergedPdfDocument(pdfDoc);
|
||||
setFilename(activeFiles[0].name.replace(/\.pdf$/i, ''));
|
||||
}
|
||||
@ -202,71 +136,230 @@ const PageEditor = ({
|
||||
const filenames: string[] = [];
|
||||
|
||||
activeFiles.forEach((file, fileIndex) => {
|
||||
const fileKey = `${file.name}-${file.size}`;
|
||||
const pdfDoc = processedFiles.get(fileKey);
|
||||
if (pdfDoc) {
|
||||
const enhancedFile = enhancedProcessedFiles.get(file);
|
||||
if (enhancedFile) {
|
||||
filenames.push(file.name.replace(/\.pdf$/i, ''));
|
||||
pdfDoc.pages.forEach((page, pageIndex) => {
|
||||
enhancedFile.pages.forEach((page, pageIndex) => {
|
||||
// Create new page with updated IDs and page numbers for merged document
|
||||
const newPage: PDFPage = {
|
||||
...page,
|
||||
id: `${fileIndex}-${page.id}`, // Unique ID across all files
|
||||
pageNumber: totalPages + pageIndex + 1,
|
||||
sourceFile: file.name // Track which file this page came from
|
||||
splitBefore: page.splitBefore || false
|
||||
};
|
||||
allPages.push(newPage);
|
||||
});
|
||||
totalPages += pdfDoc.pages.length;
|
||||
totalPages += enhancedFile.pages.length;
|
||||
}
|
||||
});
|
||||
|
||||
const mergedDocument: PDFDocument = {
|
||||
pages: allPages,
|
||||
totalPages: totalPages,
|
||||
title: filenames.join(' + '),
|
||||
metadata: {
|
||||
title: filenames.join(' + '),
|
||||
createdAt: new Date().toISOString(),
|
||||
modifiedAt: new Date().toISOString(),
|
||||
}
|
||||
};
|
||||
if (allPages.length > 0) {
|
||||
const mergedDocument: PDFDocument = {
|
||||
id: `merged-${Date.now()}`,
|
||||
name: filenames.join(' + '),
|
||||
file: null as any,
|
||||
pages: allPages,
|
||||
totalPages: totalPages
|
||||
};
|
||||
|
||||
setMergedPdfDocument(mergedDocument);
|
||||
setFilename(filenames.join('_'));
|
||||
}
|
||||
}, [activeFiles, processedFiles]);
|
||||
|
||||
// Auto-process files from activeFiles
|
||||
useEffect(() => {
|
||||
console.log('Auto-processing effect triggered:', {
|
||||
activeFilesCount: activeFiles.length,
|
||||
processedFilesCount: processedFiles.size,
|
||||
activeFileNames: activeFiles.map(f => f.name)
|
||||
});
|
||||
|
||||
activeFiles.forEach(file => {
|
||||
const fileKey = `${file.name}-${file.size}`;
|
||||
console.log(`Checking file ${file.name}: processed =`, processedFiles.has(fileKey));
|
||||
if (!processedFiles.has(fileKey)) {
|
||||
console.log('Processing file:', file.name);
|
||||
handleFileUpload(file);
|
||||
setMergedPdfDocument(mergedDocument);
|
||||
setFilename(filenames.join('_'));
|
||||
}
|
||||
});
|
||||
}, [activeFiles, processedFiles, handleFileUpload]);
|
||||
}
|
||||
}, [activeFiles, enhancedProcessedFiles, convertToPageEditorFormat]);
|
||||
|
||||
// Merge multiple PDF documents into one when all files are processed
|
||||
// Handle file upload from FileUploadSelector
|
||||
const handleMultipleFileUpload = useCallback((uploadedFiles: File[]) => {
|
||||
if (!uploadedFiles || uploadedFiles.length === 0) {
|
||||
setStatus('No files provided');
|
||||
return;
|
||||
}
|
||||
|
||||
// Simply set the activeFiles to the selected files (same as existing approach)
|
||||
setActiveFiles(uploadedFiles);
|
||||
setStatus(`Added ${uploadedFiles.length} file(s) for processing`);
|
||||
}, [setActiveFiles]);
|
||||
|
||||
// Auto-merge documents when enhanced processing completes
|
||||
useEffect(() => {
|
||||
if (activeFiles.length > 0) {
|
||||
const allProcessed = activeFiles.every(file => {
|
||||
const fileKey = `${file.name}-${file.size}`;
|
||||
return processedFiles.has(fileKey);
|
||||
});
|
||||
const allProcessed = activeFiles.every(file => enhancedProcessedFiles.has(file));
|
||||
|
||||
if (allProcessed && activeFiles.length > 0) {
|
||||
if (allProcessed) {
|
||||
mergeAllPDFs();
|
||||
}
|
||||
} else {
|
||||
setMergedPdfDocument(null);
|
||||
}
|
||||
}, [activeFiles, processedFiles, mergeAllPDFs]);
|
||||
}, [activeFiles, enhancedProcessedFiles, mergeAllPDFs]);
|
||||
|
||||
// Shared PDF instance for thumbnail generation
|
||||
const [sharedPdfInstance, setSharedPdfInstance] = useState<any>(null);
|
||||
const [thumbnailGenerationStarted, setThumbnailGenerationStarted] = useState(false);
|
||||
|
||||
// Session-based thumbnail cache with 1GB limit
|
||||
const [thumbnailCache, setThumbnailCache] = useState<Map<string, { thumbnail: string; lastUsed: number; sizeBytes: number }>>(new Map());
|
||||
const maxCacheSizeBytes = 1024 * 1024 * 1024; // 1GB cache limit
|
||||
const [currentCacheSize, setCurrentCacheSize] = useState(0);
|
||||
|
||||
// Cache management functions
|
||||
const addThumbnailToCache = useCallback((pageId: string, thumbnail: string) => {
|
||||
const thumbnailSizeBytes = thumbnail.length * 0.75; // Rough base64 size estimate
|
||||
|
||||
setThumbnailCache(prev => {
|
||||
const newCache = new Map(prev);
|
||||
const now = Date.now();
|
||||
|
||||
// Add new thumbnail
|
||||
newCache.set(pageId, {
|
||||
thumbnail,
|
||||
lastUsed: now,
|
||||
sizeBytes: thumbnailSizeBytes
|
||||
});
|
||||
|
||||
return newCache;
|
||||
});
|
||||
|
||||
setCurrentCacheSize(prev => {
|
||||
const newSize = prev + thumbnailSizeBytes;
|
||||
|
||||
// If we exceed 1GB, trigger cleanup
|
||||
if (newSize > maxCacheSizeBytes) {
|
||||
setTimeout(() => cleanupThumbnailCache(), 0);
|
||||
}
|
||||
|
||||
return newSize;
|
||||
});
|
||||
|
||||
console.log(`Cached thumbnail for ${pageId} (${Math.round(thumbnailSizeBytes / 1024)}KB)`);
|
||||
}, [maxCacheSizeBytes]);
|
||||
|
||||
const getThumbnailFromCache = useCallback((pageId: string): string | null => {
|
||||
const cached = thumbnailCache.get(pageId);
|
||||
if (!cached) return null;
|
||||
|
||||
// Update last used timestamp
|
||||
setThumbnailCache(prev => {
|
||||
const newCache = new Map(prev);
|
||||
const entry = newCache.get(pageId);
|
||||
if (entry) {
|
||||
entry.lastUsed = Date.now();
|
||||
}
|
||||
return newCache;
|
||||
});
|
||||
|
||||
return cached.thumbnail;
|
||||
}, [thumbnailCache]);
|
||||
|
||||
const cleanupThumbnailCache = useCallback(() => {
|
||||
setThumbnailCache(prev => {
|
||||
const entries = Array.from(prev.entries());
|
||||
|
||||
// Sort by last used (oldest first)
|
||||
entries.sort(([, a], [, b]) => a.lastUsed - b.lastUsed);
|
||||
|
||||
const newCache = new Map();
|
||||
let newSize = 0;
|
||||
const targetSize = maxCacheSizeBytes * 0.8; // Clean to 80% of limit
|
||||
|
||||
// Keep most recently used entries until we hit target size
|
||||
for (let i = entries.length - 1; i >= 0 && newSize < targetSize; i--) {
|
||||
const [key, value] = entries[i];
|
||||
newCache.set(key, value);
|
||||
newSize += value.sizeBytes;
|
||||
}
|
||||
|
||||
setCurrentCacheSize(newSize);
|
||||
console.log(`Cleaned thumbnail cache: ${prev.size} → ${newCache.size} entries (${Math.round(newSize / 1024 / 1024)}MB)`);
|
||||
|
||||
return newCache;
|
||||
});
|
||||
}, [maxCacheSizeBytes]);
|
||||
|
||||
const clearThumbnailCache = useCallback(() => {
|
||||
setThumbnailCache(new Map());
|
||||
setCurrentCacheSize(0);
|
||||
console.log('Cleared thumbnail cache');
|
||||
}, []);
|
||||
|
||||
// Start thumbnail generation process (separate from document loading)
|
||||
const startThumbnailGeneration = useCallback(async () => {
|
||||
if (!mergedPdfDocument || activeFiles.length !== 1 || thumbnailGenerationStarted) return;
|
||||
|
||||
const file = activeFiles[0];
|
||||
const totalPages = mergedPdfDocument.totalPages;
|
||||
|
||||
console.log(`Starting thumbnail generation for ${totalPages} pages`);
|
||||
setThumbnailGenerationStarted(true);
|
||||
|
||||
try {
|
||||
// Load PDF ONCE for thumbnail generation (separate from document structure loading)
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const { getDocument } = await import('pdfjs-dist');
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
setSharedPdfInstance(pdf);
|
||||
|
||||
console.log('Shared PDF loaded, starting progressive thumbnail generation');
|
||||
|
||||
// Process pages in batches
|
||||
let currentPage = 1;
|
||||
const batchSize = totalPages > 500 ? 1 : 2; // Slower for massive files
|
||||
const batchDelay = totalPages > 500 ? 300 : 200; // More delay for massive files
|
||||
|
||||
const processBatch = async () => {
|
||||
const endPage = Math.min(currentPage + batchSize - 1, totalPages);
|
||||
console.log(`Generating thumbnails for pages ${currentPage}-${endPage}`);
|
||||
|
||||
for (let i = currentPage; i <= endPage; i++) {
|
||||
// Send the shared PDF instance and cache functions to components
|
||||
window.dispatchEvent(new CustomEvent('generateThumbnail', {
|
||||
detail: {
|
||||
pageNumber: i,
|
||||
sharedPdf: pdf,
|
||||
getThumbnailFromCache,
|
||||
addThumbnailToCache
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
currentPage += batchSize;
|
||||
|
||||
if (currentPage <= totalPages) {
|
||||
setTimeout(processBatch, batchDelay);
|
||||
} else {
|
||||
console.log('Progressive thumbnail generation completed');
|
||||
}
|
||||
};
|
||||
|
||||
// Start generating thumbnails immediately
|
||||
processBatch();
|
||||
|
||||
} catch (error) {
|
||||
console.error('Failed to start thumbnail generation:', error);
|
||||
setThumbnailGenerationStarted(false);
|
||||
}
|
||||
}, [mergedPdfDocument, activeFiles, thumbnailGenerationStarted]);
|
||||
|
||||
// Start thumbnail generation after document loads and UI settles
|
||||
useEffect(() => {
|
||||
if (mergedPdfDocument && !thumbnailGenerationStarted) {
|
||||
// Small delay to let document render, then start thumbnail generation
|
||||
const timer = setTimeout(startThumbnailGeneration, 1000);
|
||||
return () => clearTimeout(timer);
|
||||
}
|
||||
}, [mergedPdfDocument, startThumbnailGeneration, thumbnailGenerationStarted]);
|
||||
|
||||
// Cleanup shared PDF instance and cache when component unmounts or files change
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (sharedPdfInstance) {
|
||||
sharedPdfInstance.destroy();
|
||||
setSharedPdfInstance(null);
|
||||
}
|
||||
setThumbnailGenerationStarted(false);
|
||||
clearThumbnailCache(); // Clear cache when leaving/changing documents
|
||||
};
|
||||
}, [activeFiles, clearThumbnailCache]);
|
||||
|
||||
// Clear selections when files change
|
||||
useEffect(() => {
|
||||
@ -275,7 +368,6 @@ const PageEditor = ({
|
||||
setSelectionMode(false);
|
||||
}, [activeFiles]);
|
||||
|
||||
// Global drag cleanup to handle drops outside valid areas
|
||||
useEffect(() => {
|
||||
const handleGlobalDragEnd = () => {
|
||||
// Clean up drag state when drag operation ends anywhere
|
||||
@ -286,7 +378,7 @@ const PageEditor = ({
|
||||
};
|
||||
|
||||
const handleGlobalDrop = (e: DragEvent) => {
|
||||
// Prevent default to avoid browser navigation on invalid drops
|
||||
// Prevent default to handle invalid drops
|
||||
e.preventDefault();
|
||||
};
|
||||
|
||||
@ -702,7 +794,6 @@ const PageEditor = ({
|
||||
|
||||
const closePdf = useCallback(() => {
|
||||
setActiveFiles([]);
|
||||
setProcessedFiles(new Map());
|
||||
setMergedPdfDocument(null);
|
||||
setSelectedPages([]);
|
||||
}, [setActiveFiles]);
|
||||
@ -749,31 +840,66 @@ const PageEditor = ({
|
||||
closePdf
|
||||
]);
|
||||
|
||||
// Return early if no merged document - Homepage handles file selection
|
||||
if (!mergedPdfDocument) {
|
||||
return (
|
||||
<Box pos="relative" h="100vh" style={{ overflow: 'auto' }}>
|
||||
<LoadingOverlay visible={loading || pdfLoading} />
|
||||
|
||||
<Container size="lg" p="xl" h="100%" style={{ display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
|
||||
<FileUploadSelector
|
||||
title="Select PDFs to edit"
|
||||
subtitle="Choose files from storage or upload PDFs - multiple files will be merged"
|
||||
sharedFiles={sharedFiles}
|
||||
onFilesSelect={handleMultipleFileUpload}
|
||||
accept={["application/pdf"]}
|
||||
loading={loading || pdfLoading}
|
||||
/>
|
||||
</Container>
|
||||
</Box>
|
||||
<Center h="100vh">
|
||||
<LoadingOverlay visible={globalProcessing} />
|
||||
{globalProcessing ? (
|
||||
<Text c="dimmed">Processing PDF files...</Text>
|
||||
) : (
|
||||
<Text c="dimmed">Waiting for PDF files...</Text>
|
||||
)}
|
||||
</Center>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Box pos="relative" h="100vh" style={{ overflow: 'auto' }}>
|
||||
<LoadingOverlay visible={loading || pdfLoading} />
|
||||
<LoadingOverlay visible={globalProcessing && !mergedPdfDocument} />
|
||||
|
||||
|
||||
<Box p="md" pt="xl">
|
||||
{/* Enhanced Processing Status */}
|
||||
{(globalProcessing || hasProcessingErrors) && (
|
||||
<Box mb="md" p="sm" style={{ backgroundColor: 'var(--mantine-color-blue-0)', borderRadius: 8 }}>
|
||||
{globalProcessing && (
|
||||
<Group justify="space-between" mb="xs">
|
||||
<Text size="sm" fw={500}>Processing files...</Text>
|
||||
<Text size="sm" c="dimmed">{Math.round(processingProgress.overall)}%</Text>
|
||||
</Group>
|
||||
)}
|
||||
|
||||
{Array.from(processingStates.values()).map(state => (
|
||||
<Group key={state.fileKey} justify="space-between" mb={4}>
|
||||
<Text size="xs">{state.fileName}</Text>
|
||||
<Group gap="xs">
|
||||
<Text size="xs" c="dimmed">{state.progress}%</Text>
|
||||
{state.error && (
|
||||
<Button
|
||||
size="xs"
|
||||
variant="light"
|
||||
color="red"
|
||||
onClick={() => {
|
||||
// Show error details or retry
|
||||
console.log('Processing error:', state.error);
|
||||
}}
|
||||
>
|
||||
Error
|
||||
</Button>
|
||||
)}
|
||||
</Group>
|
||||
</Group>
|
||||
))}
|
||||
|
||||
{hasProcessingErrors && (
|
||||
<Text size="xs" c="red" mt="xs">
|
||||
Some files failed to process. Check individual file status above.
|
||||
</Text>
|
||||
)}
|
||||
</Box>
|
||||
)}
|
||||
|
||||
<Group mb="md">
|
||||
<TextInput
|
||||
value={filename}
|
||||
@ -834,6 +960,7 @@ const PageEditor = ({
|
||||
page={page}
|
||||
index={index}
|
||||
totalPages={mergedPdfDocument.pages.length}
|
||||
originalFile={activeFiles.length === 1 ? activeFiles[0] : undefined}
|
||||
selectedPages={selectedPages}
|
||||
selectionMode={selectionMode}
|
||||
draggedPage={draggedPage}
|
||||
@ -930,12 +1057,6 @@ const PageEditor = ({
|
||||
)}
|
||||
</Modal>
|
||||
|
||||
<FileInput
|
||||
ref={fileInputRef}
|
||||
accept="application/pdf"
|
||||
onChange={(file) => file && handleFileUpload(file)}
|
||||
style={{ display: 'none' }}
|
||||
/>
|
||||
|
||||
{status && (
|
||||
<Notification
|
||||
@ -947,18 +1068,6 @@ const PageEditor = ({
|
||||
{status}
|
||||
</Notification>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<Notification
|
||||
color="red"
|
||||
mt="md"
|
||||
onClose={() => setError(null)}
|
||||
style={{ position: 'fixed', bottom: 70, right: 20, zIndex: 1000 }}
|
||||
>
|
||||
{error}
|
||||
</Notification>
|
||||
)}
|
||||
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
@ -1,5 +1,5 @@
|
||||
import React, { useCallback } from 'react';
|
||||
import { Text, Checkbox, Tooltip, ActionIcon } from '@mantine/core';
|
||||
import React, { useCallback, useState, useEffect, useRef } from 'react';
|
||||
import { Text, Checkbox, Tooltip, ActionIcon, Loader } from '@mantine/core';
|
||||
import ArrowBackIcon from '@mui/icons-material/ArrowBack';
|
||||
import ArrowForwardIcon from '@mui/icons-material/ArrowForward';
|
||||
import RotateLeftIcon from '@mui/icons-material/RotateLeft';
|
||||
@ -9,11 +9,18 @@ import ContentCutIcon from '@mui/icons-material/ContentCut';
|
||||
import DragIndicatorIcon from '@mui/icons-material/DragIndicator';
|
||||
import { PDFPage } from '../../../types/pageEditor';
|
||||
import styles from './PageEditor.module.css';
|
||||
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
|
||||
|
||||
// Ensure PDF.js worker is available
|
||||
if (!GlobalWorkerOptions.workerSrc) {
|
||||
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
|
||||
}
|
||||
|
||||
interface PageThumbnailProps {
|
||||
page: PDFPage;
|
||||
index: number;
|
||||
totalPages: number;
|
||||
originalFile?: File; // For lazy thumbnail generation
|
||||
selectedPages: string[];
|
||||
selectionMode: boolean;
|
||||
draggedPage: string | null;
|
||||
@ -43,6 +50,7 @@ const PageThumbnail = ({
|
||||
page,
|
||||
index,
|
||||
totalPages,
|
||||
originalFile,
|
||||
selectedPages,
|
||||
selectionMode,
|
||||
draggedPage,
|
||||
@ -67,6 +75,74 @@ const PageThumbnail = ({
|
||||
pdfDocument,
|
||||
setPdfDocument,
|
||||
}: PageThumbnailProps) => {
|
||||
const [thumbnailUrl, setThumbnailUrl] = useState<string | null>(page.thumbnail);
|
||||
const [isLoadingThumbnail, setIsLoadingThumbnail] = useState(false);
|
||||
|
||||
// Listen for progressive thumbnail generation events
|
||||
useEffect(() => {
|
||||
const handleThumbnailGeneration = (event: CustomEvent) => {
|
||||
const { pageNumber, sharedPdf, getThumbnailFromCache, addThumbnailToCache } = event.detail;
|
||||
if (pageNumber === page.pageNumber && !thumbnailUrl && !isLoadingThumbnail) {
|
||||
|
||||
// Check cache first
|
||||
const cachedThumbnail = getThumbnailFromCache(page.id);
|
||||
if (cachedThumbnail) {
|
||||
console.log(`Using cached thumbnail for page ${page.pageNumber}`);
|
||||
setThumbnailUrl(cachedThumbnail);
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate new thumbnail and cache it
|
||||
loadThumbnailFromSharedPdf(sharedPdf, addThumbnailToCache);
|
||||
}
|
||||
};
|
||||
|
||||
window.addEventListener('generateThumbnail', handleThumbnailGeneration as EventListener);
|
||||
return () => window.removeEventListener('generateThumbnail', handleThumbnailGeneration as EventListener);
|
||||
}, [page.pageNumber, page.id, thumbnailUrl, isLoadingThumbnail]);
|
||||
|
||||
const loadThumbnailFromSharedPdf = async (sharedPdf: any, addThumbnailToCache?: (pageId: string, thumbnail: string) => void) => {
|
||||
if (isLoadingThumbnail || thumbnailUrl) return;
|
||||
|
||||
setIsLoadingThumbnail(true);
|
||||
try {
|
||||
const thumbnail = await generateThumbnailFromPdf(sharedPdf);
|
||||
|
||||
// Cache the generated thumbnail
|
||||
if (addThumbnailToCache) {
|
||||
addThumbnailToCache(page.id, thumbnail);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error(`Failed to load thumbnail for page ${page.pageNumber}:`, error);
|
||||
} finally {
|
||||
setIsLoadingThumbnail(false);
|
||||
}
|
||||
};
|
||||
|
||||
const generateThumbnailFromPdf = async (pdf: any): Promise<string> => {
|
||||
const pdfPage = await pdf.getPage(page.pageNumber);
|
||||
const scale = 0.2; // Low quality for page editor
|
||||
const viewport = pdfPage.getViewport({ scale });
|
||||
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
|
||||
const context = canvas.getContext('2d');
|
||||
if (!context) {
|
||||
throw new Error('Could not get canvas context');
|
||||
}
|
||||
|
||||
await pdfPage.render({ canvasContext: context, viewport }).promise;
|
||||
const thumbnail = canvas.toDataURL('image/jpeg', 0.8);
|
||||
|
||||
setThumbnailUrl(thumbnail);
|
||||
console.log(`Thumbnail generated for page ${page.pageNumber}`);
|
||||
|
||||
return thumbnail;
|
||||
};
|
||||
|
||||
// Register this component with pageRefs for animations
|
||||
const pageElementRef = useCallback((element: HTMLDivElement | null) => {
|
||||
if (element) {
|
||||
@ -162,18 +238,30 @@ const PageThumbnail = ({
|
||||
justifyContent: 'center'
|
||||
}}
|
||||
>
|
||||
<img
|
||||
src={page.thumbnail}
|
||||
alt={`Page ${page.pageNumber}`}
|
||||
style={{
|
||||
maxWidth: '100%',
|
||||
maxHeight: '100%',
|
||||
objectFit: 'contain',
|
||||
borderRadius: 2,
|
||||
transform: `rotate(${page.rotation}deg)`,
|
||||
transition: 'transform 0.3s ease-in-out'
|
||||
}}
|
||||
/>
|
||||
{thumbnailUrl ? (
|
||||
<img
|
||||
src={thumbnailUrl}
|
||||
alt={`Page ${page.pageNumber}`}
|
||||
style={{
|
||||
maxWidth: '100%',
|
||||
maxHeight: '100%',
|
||||
objectFit: 'contain',
|
||||
borderRadius: 2,
|
||||
transform: `rotate(${page.rotation}deg)`,
|
||||
transition: 'transform 0.3s ease-in-out'
|
||||
}}
|
||||
/>
|
||||
) : isLoadingThumbnail ? (
|
||||
<div style={{ textAlign: 'center' }}>
|
||||
<Loader size="sm" />
|
||||
<Text size="xs" c="dimmed" mt={4}>Loading...</Text>
|
||||
</div>
|
||||
) : (
|
||||
<div style={{ textAlign: 'center' }}>
|
||||
<Text size="lg" c="dimmed">📄</Text>
|
||||
<Text size="xs" c="dimmed" mt={4}>Page {page.pageNumber}</Text>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<Text
|
||||
|
288
frontend/src/hooks/useEnhancedProcessedFiles.ts
Normal file
288
frontend/src/hooks/useEnhancedProcessedFiles.ts
Normal file
@ -0,0 +1,288 @@
|
||||
import { useState, useEffect } from 'react';
|
||||
import { ProcessedFile, ProcessingState, ProcessingConfig } from '../types/processing';
|
||||
import { enhancedPDFProcessingService } from '../services/enhancedPDFProcessingService';
|
||||
import { FileHasher } from '../utils/fileHash';
|
||||
|
||||
interface UseEnhancedProcessedFilesResult {
|
||||
processedFiles: Map<File, ProcessedFile>;
|
||||
processingStates: Map<string, ProcessingState>;
|
||||
isProcessing: boolean;
|
||||
hasProcessingErrors: boolean;
|
||||
processingProgress: {
|
||||
overall: number;
|
||||
fileProgress: Map<string, number>;
|
||||
estimatedTimeRemaining: number;
|
||||
};
|
||||
cacheStats: {
|
||||
entries: number;
|
||||
totalSizeBytes: number;
|
||||
maxSizeBytes: number;
|
||||
};
|
||||
metrics: {
|
||||
totalFiles: number;
|
||||
completedFiles: number;
|
||||
failedFiles: number;
|
||||
averageProcessingTime: number;
|
||||
cacheHitRate: number;
|
||||
};
|
||||
actions: {
|
||||
cancelProcessing: (fileKey: string) => void;
|
||||
retryProcessing: (file: File) => void;
|
||||
clearCache: () => void;
|
||||
};
|
||||
}
|
||||
|
||||
export function useEnhancedProcessedFiles(
|
||||
activeFiles: File[],
|
||||
config?: Partial<ProcessingConfig>
|
||||
): UseEnhancedProcessedFilesResult {
|
||||
const [processedFiles, setProcessedFiles] = useState<Map<File, ProcessedFile>>(new Map());
|
||||
const [processingStates, setProcessingStates] = useState<Map<string, ProcessingState>>(new Map());
|
||||
|
||||
// Subscribe to processing state changes once
|
||||
useEffect(() => {
|
||||
const unsubscribe = enhancedPDFProcessingService.onProcessingChange(setProcessingStates);
|
||||
return unsubscribe;
|
||||
}, []);
|
||||
|
||||
// Process files when activeFiles changes
|
||||
useEffect(() => {
|
||||
if (activeFiles.length === 0) {
|
||||
setProcessedFiles(new Map());
|
||||
return;
|
||||
}
|
||||
|
||||
const processFiles = async () => {
|
||||
const newProcessedFiles = new Map<File, ProcessedFile>();
|
||||
|
||||
for (const file of activeFiles) {
|
||||
// Check if we already have this file processed
|
||||
const existing = processedFiles.get(file);
|
||||
if (existing) {
|
||||
newProcessedFiles.set(file, existing);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
// Generate proper file key matching the service
|
||||
const fileKey = await FileHasher.generateHybridHash(file);
|
||||
console.log('Processing file:', file.name);
|
||||
|
||||
const processed = await enhancedPDFProcessingService.processFile(file, config);
|
||||
if (processed) {
|
||||
console.log('Got processed file for:', file.name);
|
||||
newProcessedFiles.set(file, processed);
|
||||
} else {
|
||||
console.log('Processing started for:', file.name, '- waiting for completion');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Failed to start processing for ${file.name}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
// Update processed files if we have any
|
||||
if (newProcessedFiles.size > 0) {
|
||||
setProcessedFiles(newProcessedFiles);
|
||||
}
|
||||
};
|
||||
|
||||
processFiles();
|
||||
}, [activeFiles]);
|
||||
|
||||
// Listen for processing completion
|
||||
useEffect(() => {
|
||||
const checkForCompletedFiles = async () => {
|
||||
let hasNewFiles = false;
|
||||
const updatedFiles = new Map(processedFiles);
|
||||
|
||||
// Generate file keys for all files first
|
||||
const fileKeyPromises = activeFiles.map(async (file) => ({
|
||||
file,
|
||||
key: await FileHasher.generateHybridHash(file)
|
||||
}));
|
||||
|
||||
const fileKeyPairs = await Promise.all(fileKeyPromises);
|
||||
|
||||
for (const { file, key } of fileKeyPairs) {
|
||||
// Only check files that don't have processed results yet
|
||||
if (!updatedFiles.has(file)) {
|
||||
const processingState = processingStates.get(key);
|
||||
|
||||
// Check for both processing and recently completed files
|
||||
// This ensures we catch completed files before they're cleaned up
|
||||
if (processingState?.status === 'processing' || processingState?.status === 'completed') {
|
||||
try {
|
||||
const processed = await enhancedPDFProcessingService.processFile(file, config);
|
||||
if (processed) {
|
||||
console.log('Processing completed for:', file.name);
|
||||
updatedFiles.set(file, processed);
|
||||
hasNewFiles = true;
|
||||
}
|
||||
} catch (error) {
|
||||
// Ignore errors in completion check
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasNewFiles) {
|
||||
setProcessedFiles(updatedFiles);
|
||||
}
|
||||
};
|
||||
|
||||
// Check every 500ms for completed processing
|
||||
const interval = setInterval(checkForCompletedFiles, 500);
|
||||
return () => clearInterval(interval);
|
||||
}, [activeFiles, processingStates]);
|
||||
|
||||
|
||||
// Cleanup when activeFiles changes
|
||||
useEffect(() => {
|
||||
const currentFiles = new Set(activeFiles);
|
||||
const previousFiles = Array.from(processedFiles.keys());
|
||||
const removedFiles = previousFiles.filter(file => !currentFiles.has(file));
|
||||
|
||||
if (removedFiles.length > 0) {
|
||||
// Clean up processing service cache
|
||||
enhancedPDFProcessingService.cleanup(removedFiles);
|
||||
|
||||
// Update local state
|
||||
setProcessedFiles(prev => {
|
||||
const updated = new Map();
|
||||
for (const [file, processed] of prev) {
|
||||
if (currentFiles.has(file)) {
|
||||
updated.set(file, processed);
|
||||
}
|
||||
}
|
||||
return updated;
|
||||
});
|
||||
}
|
||||
}, [activeFiles]);
|
||||
|
||||
// Calculate derived state
|
||||
const isProcessing = processingStates.size > 0;
|
||||
const hasProcessingErrors = Array.from(processingStates.values()).some(state => state.status === 'error');
|
||||
|
||||
// Calculate overall progress
|
||||
const processingProgress = calculateProcessingProgress(processingStates);
|
||||
|
||||
// Get cache stats and metrics
|
||||
const cacheStats = enhancedPDFProcessingService.getCacheStats();
|
||||
const metrics = enhancedPDFProcessingService.getMetrics();
|
||||
|
||||
// Action handlers
|
||||
const actions = {
|
||||
cancelProcessing: (fileKey: string) => {
|
||||
enhancedPDFProcessingService.cancelProcessing(fileKey);
|
||||
},
|
||||
|
||||
retryProcessing: async (file: File) => {
|
||||
try {
|
||||
await enhancedPDFProcessingService.processFile(file, config);
|
||||
} catch (error) {
|
||||
console.error(`Failed to retry processing for ${file.name}:`, error);
|
||||
}
|
||||
},
|
||||
|
||||
clearCache: () => {
|
||||
enhancedPDFProcessingService.clearAll();
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
processedFiles,
|
||||
processingStates,
|
||||
isProcessing,
|
||||
hasProcessingErrors,
|
||||
processingProgress,
|
||||
cacheStats,
|
||||
metrics,
|
||||
actions
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate overall processing progress from individual file states
|
||||
*/
|
||||
function calculateProcessingProgress(states: Map<string, ProcessingState>): {
|
||||
overall: number;
|
||||
fileProgress: Map<string, number>;
|
||||
estimatedTimeRemaining: number;
|
||||
} {
|
||||
if (states.size === 0) {
|
||||
return {
|
||||
overall: 100,
|
||||
fileProgress: new Map(),
|
||||
estimatedTimeRemaining: 0
|
||||
};
|
||||
}
|
||||
|
||||
const fileProgress = new Map<string, number>();
|
||||
let totalProgress = 0;
|
||||
let totalEstimatedTime = 0;
|
||||
|
||||
for (const [fileKey, state] of states) {
|
||||
fileProgress.set(fileKey, state.progress);
|
||||
totalProgress += state.progress;
|
||||
totalEstimatedTime += state.estimatedTimeRemaining || 0;
|
||||
}
|
||||
|
||||
const overall = totalProgress / states.size;
|
||||
const estimatedTimeRemaining = totalEstimatedTime;
|
||||
|
||||
return {
|
||||
overall,
|
||||
fileProgress,
|
||||
estimatedTimeRemaining
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook for getting a single processed file with enhanced features
|
||||
*/
|
||||
export function useEnhancedProcessedFile(
|
||||
file: File | null,
|
||||
config?: Partial<ProcessingConfig>
|
||||
): {
|
||||
processedFile: ProcessedFile | null;
|
||||
isProcessing: boolean;
|
||||
processingState: ProcessingState | null;
|
||||
error: string | null;
|
||||
canRetry: boolean;
|
||||
actions: {
|
||||
cancel: () => void;
|
||||
retry: () => void;
|
||||
};
|
||||
} {
|
||||
const result = useEnhancedProcessedFiles(file ? [file] : [], config);
|
||||
|
||||
const processedFile = file ? result.processedFiles.get(file) || null : null;
|
||||
// Note: This is async but we can't await in hook return - consider refactoring if needed
|
||||
const fileKey = file ? '' : ''; // TODO: Handle async file key generation
|
||||
const processingState = fileKey ? result.processingStates.get(fileKey) || null : null;
|
||||
const isProcessing = !!processingState;
|
||||
const error = processingState?.error?.message || null;
|
||||
const canRetry = processingState?.error?.recoverable || false;
|
||||
|
||||
const actions = {
|
||||
cancel: () => {
|
||||
if (fileKey) {
|
||||
result.actions.cancelProcessing(fileKey);
|
||||
}
|
||||
},
|
||||
retry: () => {
|
||||
if (file) {
|
||||
result.actions.retryProcessing(file);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
processedFile,
|
||||
isProcessing,
|
||||
processingState,
|
||||
error,
|
||||
canRetry,
|
||||
actions
|
||||
};
|
||||
}
|
@ -50,18 +50,28 @@ export function usePDFProcessor() {
|
||||
|
||||
const pages: PDFPage[] = [];
|
||||
|
||||
// Generate thumbnails for all pages
|
||||
// Create pages without thumbnails initially - load them lazily
|
||||
for (let i = 1; i <= totalPages; i++) {
|
||||
const thumbnail = await generatePageThumbnail(file, i);
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail,
|
||||
thumbnail: null, // Will be loaded lazily
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
}
|
||||
|
||||
// Generate thumbnails for first 10 pages immediately for better UX
|
||||
const priorityPages = Math.min(10, totalPages);
|
||||
for (let i = 1; i <= priorityPages; i++) {
|
||||
try {
|
||||
const thumbnail = await generatePageThumbnail(file, i);
|
||||
pages[i - 1].thumbnail = thumbnail;
|
||||
} catch (error) {
|
||||
console.warn(`Failed to generate thumbnail for page ${i}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up
|
||||
pdf.destroy();
|
||||
|
||||
|
125
frontend/src/hooks/useProcessedFiles.ts
Normal file
125
frontend/src/hooks/useProcessedFiles.ts
Normal file
@ -0,0 +1,125 @@
|
||||
import { useState, useEffect } from 'react';
|
||||
import { ProcessedFile, ProcessingState } from '../types/processing';
|
||||
import { pdfProcessingService } from '../services/pdfProcessingService';
|
||||
|
||||
interface UseProcessedFilesResult {
|
||||
processedFiles: Map<File, ProcessedFile>;
|
||||
processingStates: Map<string, ProcessingState>;
|
||||
isProcessing: boolean;
|
||||
hasProcessingErrors: boolean;
|
||||
cacheStats: {
|
||||
entries: number;
|
||||
totalSizeBytes: number;
|
||||
maxSizeBytes: number;
|
||||
};
|
||||
}
|
||||
|
||||
export function useProcessedFiles(activeFiles: File[]): UseProcessedFilesResult {
|
||||
const [processedFiles, setProcessedFiles] = useState<Map<File, ProcessedFile>>(new Map());
|
||||
const [processingStates, setProcessingStates] = useState<Map<string, ProcessingState>>(new Map());
|
||||
|
||||
useEffect(() => {
|
||||
// Subscribe to processing state changes
|
||||
const unsubscribe = pdfProcessingService.onProcessingChange(setProcessingStates);
|
||||
|
||||
// Check/start processing for each active file
|
||||
const checkProcessing = async () => {
|
||||
const newProcessedFiles = new Map<File, ProcessedFile>();
|
||||
|
||||
for (const file of activeFiles) {
|
||||
const processed = await pdfProcessingService.getProcessedFile(file);
|
||||
if (processed) {
|
||||
newProcessedFiles.set(file, processed);
|
||||
}
|
||||
}
|
||||
|
||||
setProcessedFiles(newProcessedFiles);
|
||||
};
|
||||
|
||||
checkProcessing();
|
||||
|
||||
return unsubscribe;
|
||||
}, [activeFiles]);
|
||||
|
||||
// Listen for processing completion and update processed files
|
||||
useEffect(() => {
|
||||
const updateProcessedFiles = async () => {
|
||||
const updated = new Map<File, ProcessedFile>();
|
||||
|
||||
for (const file of activeFiles) {
|
||||
const existing = processedFiles.get(file);
|
||||
if (existing) {
|
||||
updated.set(file, existing);
|
||||
} else {
|
||||
// Check if processing just completed
|
||||
const processed = await pdfProcessingService.getProcessedFile(file);
|
||||
if (processed) {
|
||||
updated.set(file, processed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setProcessedFiles(updated);
|
||||
};
|
||||
|
||||
// Small delay to allow processing state to settle
|
||||
const timeoutId = setTimeout(updateProcessedFiles, 100);
|
||||
return () => clearTimeout(timeoutId);
|
||||
}, [processingStates, activeFiles]);
|
||||
|
||||
// Cleanup when activeFiles changes
|
||||
useEffect(() => {
|
||||
const currentFiles = new Set(activeFiles);
|
||||
const previousFiles = Array.from(processedFiles.keys());
|
||||
const removedFiles = previousFiles.filter(file => !currentFiles.has(file));
|
||||
|
||||
if (removedFiles.length > 0) {
|
||||
// Clean up processing service cache
|
||||
pdfProcessingService.cleanup(removedFiles);
|
||||
|
||||
// Update local state
|
||||
setProcessedFiles(prev => {
|
||||
const updated = new Map();
|
||||
for (const [file, processed] of prev) {
|
||||
if (currentFiles.has(file)) {
|
||||
updated.set(file, processed);
|
||||
}
|
||||
}
|
||||
return updated;
|
||||
});
|
||||
}
|
||||
}, [activeFiles]);
|
||||
|
||||
// Derived state
|
||||
const isProcessing = processingStates.size > 0;
|
||||
const hasProcessingErrors = Array.from(processingStates.values()).some(state => state.status === 'error');
|
||||
const cacheStats = pdfProcessingService.getCacheStats();
|
||||
|
||||
return {
|
||||
processedFiles,
|
||||
processingStates,
|
||||
isProcessing,
|
||||
hasProcessingErrors,
|
||||
cacheStats
|
||||
};
|
||||
}
|
||||
|
||||
// Hook for getting a single processed file
|
||||
export function useProcessedFile(file: File | null): {
|
||||
processedFile: ProcessedFile | null;
|
||||
isProcessing: boolean;
|
||||
processingState: ProcessingState | null;
|
||||
} {
|
||||
const result = useProcessedFiles(file ? [file] : []);
|
||||
|
||||
const processedFile = file ? result.processedFiles.get(file) || null : null;
|
||||
const fileKey = file ? pdfProcessingService.generateFileKey(file) : '';
|
||||
const processingState = fileKey ? result.processingStates.get(fileKey) || null : null;
|
||||
const isProcessing = !!processingState;
|
||||
|
||||
return {
|
||||
processedFile,
|
||||
isProcessing,
|
||||
processingState
|
||||
};
|
||||
}
|
552
frontend/src/services/enhancedPDFProcessingService.ts
Normal file
552
frontend/src/services/enhancedPDFProcessingService.ts
Normal file
@ -0,0 +1,552 @@
|
||||
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
|
||||
import { ProcessedFile, ProcessingState, PDFPage, ProcessingStrategy, ProcessingConfig, ProcessingMetrics } from '../types/processing';
|
||||
import { ProcessingCache } from './processingCache';
|
||||
import { FileHasher } from '../utils/fileHash';
|
||||
import { FileAnalyzer } from './fileAnalyzer';
|
||||
import { ProcessingErrorHandler } from './processingErrorHandler';
|
||||
|
||||
// Set up PDF.js worker
|
||||
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
|
||||
|
||||
export class EnhancedPDFProcessingService {
|
||||
private static instance: EnhancedPDFProcessingService;
|
||||
private cache = new ProcessingCache();
|
||||
private processing = new Map<string, ProcessingState>();
|
||||
private processingListeners = new Set<(states: Map<string, ProcessingState>) => void>();
|
||||
private metrics: ProcessingMetrics = {
|
||||
totalFiles: 0,
|
||||
completedFiles: 0,
|
||||
failedFiles: 0,
|
||||
averageProcessingTime: 0,
|
||||
cacheHitRate: 0,
|
||||
memoryUsage: 0
|
||||
};
|
||||
|
||||
private defaultConfig: ProcessingConfig = {
|
||||
strategy: 'immediate_full',
|
||||
chunkSize: 20,
|
||||
thumbnailQuality: 'medium',
|
||||
priorityPageCount: 10,
|
||||
useWebWorker: false,
|
||||
maxRetries: 3,
|
||||
timeoutMs: 300000 // 5 minutes
|
||||
};
|
||||
|
||||
private constructor() {}
|
||||
|
||||
static getInstance(): EnhancedPDFProcessingService {
|
||||
if (!EnhancedPDFProcessingService.instance) {
|
||||
EnhancedPDFProcessingService.instance = new EnhancedPDFProcessingService();
|
||||
}
|
||||
return EnhancedPDFProcessingService.instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a file with intelligent strategy selection
|
||||
*/
|
||||
async processFile(file: File, customConfig?: Partial<ProcessingConfig>): Promise<ProcessedFile | null> {
|
||||
const fileKey = await this.generateFileKey(file);
|
||||
|
||||
// Check cache first
|
||||
const cached = this.cache.get(fileKey);
|
||||
if (cached) {
|
||||
console.log('Cache hit for:', file.name);
|
||||
this.updateMetrics('cacheHit');
|
||||
return cached;
|
||||
}
|
||||
|
||||
// Check if already processing
|
||||
if (this.processing.has(fileKey)) {
|
||||
console.log('Already processing:', file.name);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Analyze file to determine optimal strategy
|
||||
const analysis = await FileAnalyzer.analyzeFile(file);
|
||||
if (analysis.isCorrupted) {
|
||||
throw new Error(`File ${file.name} appears to be corrupted`);
|
||||
}
|
||||
|
||||
// Create processing config
|
||||
const config: ProcessingConfig = {
|
||||
...this.defaultConfig,
|
||||
strategy: analysis.recommendedStrategy,
|
||||
...customConfig
|
||||
};
|
||||
|
||||
// Start processing
|
||||
this.startProcessing(file, fileKey, config, analysis.estimatedProcessingTime);
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start processing a file with the specified configuration
|
||||
*/
|
||||
private async startProcessing(
|
||||
file: File,
|
||||
fileKey: string,
|
||||
config: ProcessingConfig,
|
||||
estimatedTime: number
|
||||
): Promise<void> {
|
||||
// Create cancellation token
|
||||
const cancellationToken = ProcessingErrorHandler.createTimeoutController(config.timeoutMs);
|
||||
|
||||
// Set initial state
|
||||
const state: ProcessingState = {
|
||||
fileKey,
|
||||
fileName: file.name,
|
||||
status: 'processing',
|
||||
progress: 0,
|
||||
strategy: config.strategy,
|
||||
startedAt: Date.now(),
|
||||
estimatedTimeRemaining: estimatedTime,
|
||||
cancellationToken
|
||||
};
|
||||
|
||||
this.processing.set(fileKey, state);
|
||||
this.notifyListeners();
|
||||
this.updateMetrics('started');
|
||||
|
||||
try {
|
||||
// Execute processing with retry logic
|
||||
const processedFile = await ProcessingErrorHandler.executeWithRetry(
|
||||
() => this.executeProcessingStrategy(file, config, state),
|
||||
(error) => {
|
||||
state.error = error;
|
||||
this.notifyListeners();
|
||||
},
|
||||
config.maxRetries
|
||||
);
|
||||
|
||||
// Cache the result
|
||||
this.cache.set(fileKey, processedFile);
|
||||
|
||||
// Update state to completed
|
||||
state.status = 'completed';
|
||||
state.progress = 100;
|
||||
state.completedAt = Date.now();
|
||||
this.notifyListeners();
|
||||
this.updateMetrics('completed', Date.now() - state.startedAt);
|
||||
|
||||
// Remove from processing map after brief delay
|
||||
setTimeout(() => {
|
||||
this.processing.delete(fileKey);
|
||||
this.notifyListeners();
|
||||
}, 2000);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Processing failed for', file.name, ':', error);
|
||||
|
||||
const processingError = ProcessingErrorHandler.createProcessingError(error);
|
||||
state.status = 'error';
|
||||
state.error = processingError;
|
||||
this.notifyListeners();
|
||||
this.updateMetrics('failed');
|
||||
|
||||
// Remove failed processing after delay
|
||||
setTimeout(() => {
|
||||
this.processing.delete(fileKey);
|
||||
this.notifyListeners();
|
||||
}, 10000);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the actual processing based on strategy
|
||||
*/
|
||||
private async executeProcessingStrategy(
|
||||
file: File,
|
||||
config: ProcessingConfig,
|
||||
state: ProcessingState
|
||||
): Promise<ProcessedFile> {
|
||||
switch (config.strategy) {
|
||||
case 'immediate_full':
|
||||
return this.processImmediateFull(file, config, state);
|
||||
|
||||
case 'priority_pages':
|
||||
return this.processPriorityPages(file, config, state);
|
||||
|
||||
case 'progressive_chunked':
|
||||
return this.processProgressiveChunked(file, config, state);
|
||||
|
||||
case 'metadata_only':
|
||||
return this.processMetadataOnly(file, config, state);
|
||||
|
||||
default:
|
||||
return this.processImmediateFull(file, config, state);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process all pages immediately (for small files)
|
||||
*/
|
||||
private async processImmediateFull(
|
||||
file: File,
|
||||
config: ProcessingConfig,
|
||||
state: ProcessingState
|
||||
): Promise<ProcessedFile> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const totalPages = pdf.numPages;
|
||||
|
||||
state.progress = 10;
|
||||
this.notifyListeners();
|
||||
|
||||
const pages: PDFPage[] = [];
|
||||
|
||||
for (let i = 1; i <= totalPages; i++) {
|
||||
// Check for cancellation
|
||||
if (state.cancellationToken?.signal.aborted) {
|
||||
pdf.destroy();
|
||||
throw new Error('Processing cancelled');
|
||||
}
|
||||
|
||||
const page = await pdf.getPage(i);
|
||||
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
|
||||
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail,
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
|
||||
// Update progress
|
||||
state.progress = 10 + (i / totalPages) * 85;
|
||||
state.currentPage = i;
|
||||
this.notifyListeners();
|
||||
}
|
||||
|
||||
pdf.destroy();
|
||||
state.progress = 100;
|
||||
this.notifyListeners();
|
||||
|
||||
return this.createProcessedFile(file, pages, totalPages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process priority pages first, then queue the rest
|
||||
*/
|
||||
private async processPriorityPages(
|
||||
file: File,
|
||||
config: ProcessingConfig,
|
||||
state: ProcessingState
|
||||
): Promise<ProcessedFile> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const totalPages = pdf.numPages;
|
||||
|
||||
state.progress = 10;
|
||||
this.notifyListeners();
|
||||
|
||||
const pages: PDFPage[] = [];
|
||||
const priorityCount = Math.min(config.priorityPageCount, totalPages);
|
||||
|
||||
// Process priority pages first
|
||||
for (let i = 1; i <= priorityCount; i++) {
|
||||
if (state.cancellationToken?.signal.aborted) {
|
||||
pdf.destroy();
|
||||
throw new Error('Processing cancelled');
|
||||
}
|
||||
|
||||
const page = await pdf.getPage(i);
|
||||
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
|
||||
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail,
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
|
||||
state.progress = 10 + (i / priorityCount) * 60;
|
||||
state.currentPage = i;
|
||||
this.notifyListeners();
|
||||
}
|
||||
|
||||
// Create placeholder pages for remaining pages
|
||||
for (let i = priorityCount + 1; i <= totalPages; i++) {
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail: null, // Will be loaded lazily
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
}
|
||||
|
||||
pdf.destroy();
|
||||
state.progress = 100;
|
||||
this.notifyListeners();
|
||||
|
||||
// Queue background processing for remaining pages (only if there are any)
|
||||
if (priorityCount < totalPages) {
|
||||
this.queueBackgroundProcessing(file, priorityCount + 1, totalPages);
|
||||
}
|
||||
|
||||
return this.createProcessedFile(file, pages, totalPages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process in chunks with breaks between chunks
|
||||
*/
|
||||
private async processProgressiveChunked(
|
||||
file: File,
|
||||
config: ProcessingConfig,
|
||||
state: ProcessingState
|
||||
): Promise<ProcessedFile> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const totalPages = pdf.numPages;
|
||||
|
||||
state.progress = 10;
|
||||
this.notifyListeners();
|
||||
|
||||
const pages: PDFPage[] = [];
|
||||
const chunkSize = config.chunkSize;
|
||||
let processedPages = 0;
|
||||
|
||||
// Process first chunk immediately
|
||||
const firstChunkEnd = Math.min(chunkSize, totalPages);
|
||||
|
||||
for (let i = 1; i <= firstChunkEnd; i++) {
|
||||
if (state.cancellationToken?.signal.aborted) {
|
||||
pdf.destroy();
|
||||
throw new Error('Processing cancelled');
|
||||
}
|
||||
|
||||
const page = await pdf.getPage(i);
|
||||
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
|
||||
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail,
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
|
||||
processedPages++;
|
||||
state.progress = 10 + (processedPages / totalPages) * 70;
|
||||
state.currentPage = i;
|
||||
this.notifyListeners();
|
||||
|
||||
// Small delay to prevent UI blocking
|
||||
if (i % 5 === 0) {
|
||||
await new Promise(resolve => setTimeout(resolve, 10));
|
||||
}
|
||||
}
|
||||
|
||||
// Create placeholders for remaining pages
|
||||
for (let i = firstChunkEnd + 1; i <= totalPages; i++) {
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail: null,
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
}
|
||||
|
||||
pdf.destroy();
|
||||
state.progress = 100;
|
||||
this.notifyListeners();
|
||||
|
||||
// Queue remaining chunks for background processing (only if there are any)
|
||||
if (firstChunkEnd < totalPages) {
|
||||
this.queueChunkedBackgroundProcessing(file, firstChunkEnd + 1, totalPages, chunkSize);
|
||||
}
|
||||
|
||||
return this.createProcessedFile(file, pages, totalPages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process metadata only (for very large files)
|
||||
*/
|
||||
private async processMetadataOnly(
|
||||
file: File,
|
||||
config: ProcessingConfig,
|
||||
state: ProcessingState
|
||||
): Promise<ProcessedFile> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const totalPages = pdf.numPages;
|
||||
|
||||
state.progress = 50;
|
||||
this.notifyListeners();
|
||||
|
||||
// Create placeholder pages without thumbnails
|
||||
const pages: PDFPage[] = [];
|
||||
for (let i = 1; i <= totalPages; i++) {
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail: null,
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
}
|
||||
|
||||
pdf.destroy();
|
||||
state.progress = 100;
|
||||
this.notifyListeners();
|
||||
|
||||
return this.createProcessedFile(file, pages, totalPages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a page thumbnail with specified quality
|
||||
*/
|
||||
private async renderPageThumbnail(page: any, quality: 'low' | 'medium' | 'high'): Promise<string> {
|
||||
const scales = { low: 0.2, medium: 0.5, high: 0.8 }; // Reduced low quality for page editor
|
||||
const scale = scales[quality];
|
||||
|
||||
const viewport = page.getViewport({ scale });
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
|
||||
const context = canvas.getContext('2d');
|
||||
if (!context) {
|
||||
throw new Error('Could not get canvas context');
|
||||
}
|
||||
|
||||
await page.render({ canvasContext: context, viewport }).promise;
|
||||
return canvas.toDataURL('image/jpeg', 0.8); // Use JPEG for better compression
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a ProcessedFile object
|
||||
*/
|
||||
private createProcessedFile(file: File, pages: PDFPage[], totalPages: number): ProcessedFile {
|
||||
return {
|
||||
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
||||
pages,
|
||||
totalPages,
|
||||
metadata: {
|
||||
title: file.name,
|
||||
createdAt: new Date().toISOString(),
|
||||
modifiedAt: new Date().toISOString()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Queue background processing for remaining pages
|
||||
*/
|
||||
private queueBackgroundProcessing(file: File, startPage: number, endPage: number): void {
|
||||
// TODO: Implement background processing queue
|
||||
console.log(`Queued background processing for ${file.name} pages ${startPage}-${endPage}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Queue chunked background processing
|
||||
*/
|
||||
private queueChunkedBackgroundProcessing(file: File, startPage: number, endPage: number, chunkSize: number): void {
|
||||
// TODO: Implement chunked background processing
|
||||
console.log(`Queued chunked background processing for ${file.name} pages ${startPage}-${endPage} in chunks of ${chunkSize}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a unique, collision-resistant cache key
|
||||
*/
|
||||
private async generateFileKey(file: File): Promise<string> {
|
||||
return await FileHasher.generateHybridHash(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel processing for a specific file
|
||||
*/
|
||||
cancelProcessing(fileKey: string): void {
|
||||
const state = this.processing.get(fileKey);
|
||||
if (state && state.cancellationToken) {
|
||||
state.cancellationToken.abort();
|
||||
state.status = 'cancelled';
|
||||
this.notifyListeners();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update processing metrics
|
||||
*/
|
||||
private updateMetrics(event: 'started' | 'completed' | 'failed' | 'cacheHit', processingTime?: number): void {
|
||||
switch (event) {
|
||||
case 'started':
|
||||
this.metrics.totalFiles++;
|
||||
break;
|
||||
case 'completed':
|
||||
this.metrics.completedFiles++;
|
||||
if (processingTime) {
|
||||
// Update rolling average
|
||||
const totalProcessingTime = this.metrics.averageProcessingTime * (this.metrics.completedFiles - 1) + processingTime;
|
||||
this.metrics.averageProcessingTime = totalProcessingTime / this.metrics.completedFiles;
|
||||
}
|
||||
break;
|
||||
case 'failed':
|
||||
this.metrics.failedFiles++;
|
||||
break;
|
||||
case 'cacheHit':
|
||||
// Update cache hit rate
|
||||
const totalAttempts = this.metrics.totalFiles + 1;
|
||||
this.metrics.cacheHitRate = (this.metrics.cacheHitRate * this.metrics.totalFiles + 1) / totalAttempts;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get processing metrics
|
||||
*/
|
||||
getMetrics(): ProcessingMetrics {
|
||||
return { ...this.metrics };
|
||||
}
|
||||
|
||||
/**
|
||||
* State subscription for components
|
||||
*/
|
||||
onProcessingChange(callback: (states: Map<string, ProcessingState>) => void): () => void {
|
||||
this.processingListeners.add(callback);
|
||||
return () => this.processingListeners.delete(callback);
|
||||
}
|
||||
|
||||
getProcessingStates(): Map<string, ProcessingState> {
|
||||
return new Map(this.processing);
|
||||
}
|
||||
|
||||
private notifyListeners(): void {
|
||||
this.processingListeners.forEach(callback => callback(this.processing));
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup method for removed files
|
||||
*/
|
||||
cleanup(removedFiles: File[]): void {
|
||||
removedFiles.forEach(async (file) => {
|
||||
const key = await this.generateFileKey(file);
|
||||
this.cache.delete(key);
|
||||
this.cancelProcessing(key);
|
||||
this.processing.delete(key);
|
||||
});
|
||||
this.notifyListeners();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cache statistics
|
||||
*/
|
||||
getCacheStats() {
|
||||
return this.cache.getStats();
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all cache and processing
|
||||
*/
|
||||
clearAll(): void {
|
||||
this.cache.clear();
|
||||
this.processing.clear();
|
||||
this.notifyListeners();
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
export const enhancedPDFProcessingService = EnhancedPDFProcessingService.getInstance();
|
240
frontend/src/services/fileAnalyzer.ts
Normal file
240
frontend/src/services/fileAnalyzer.ts
Normal file
@ -0,0 +1,240 @@
|
||||
import { getDocument } from 'pdfjs-dist';
|
||||
import { FileAnalysis, ProcessingStrategy } from '../types/processing';
|
||||
|
||||
export class FileAnalyzer {
|
||||
private static readonly SIZE_THRESHOLDS = {
|
||||
SMALL: 10 * 1024 * 1024, // 10MB
|
||||
MEDIUM: 50 * 1024 * 1024, // 50MB
|
||||
LARGE: 200 * 1024 * 1024, // 200MB
|
||||
};
|
||||
|
||||
private static readonly PAGE_THRESHOLDS = {
|
||||
FEW: 10, // < 10 pages - immediate full processing
|
||||
MANY: 50, // < 50 pages - priority pages
|
||||
MASSIVE: 100, // < 100 pages - progressive chunked
|
||||
// >100 pages = metadata only
|
||||
};
|
||||
|
||||
/**
|
||||
* Analyze a file to determine optimal processing strategy
|
||||
*/
|
||||
static async analyzeFile(file: File): Promise<FileAnalysis> {
|
||||
const analysis: FileAnalysis = {
|
||||
fileSize: file.size,
|
||||
isEncrypted: false,
|
||||
isCorrupted: false,
|
||||
recommendedStrategy: 'metadata_only',
|
||||
estimatedProcessingTime: 0,
|
||||
};
|
||||
|
||||
try {
|
||||
// Quick validation and page count estimation
|
||||
const quickAnalysis = await this.quickPDFAnalysis(file);
|
||||
analysis.estimatedPageCount = quickAnalysis.pageCount;
|
||||
analysis.isEncrypted = quickAnalysis.isEncrypted;
|
||||
analysis.isCorrupted = quickAnalysis.isCorrupted;
|
||||
|
||||
// Determine strategy based on file characteristics
|
||||
analysis.recommendedStrategy = this.determineStrategy(file.size, quickAnalysis.pageCount);
|
||||
|
||||
// Estimate processing time
|
||||
analysis.estimatedProcessingTime = this.estimateProcessingTime(
|
||||
file.size,
|
||||
quickAnalysis.pageCount,
|
||||
analysis.recommendedStrategy
|
||||
);
|
||||
|
||||
} catch (error) {
|
||||
console.error('File analysis failed:', error);
|
||||
analysis.isCorrupted = true;
|
||||
analysis.recommendedStrategy = 'metadata_only';
|
||||
}
|
||||
|
||||
return analysis;
|
||||
}
|
||||
|
||||
/**
|
||||
* Quick PDF analysis without full processing
|
||||
*/
|
||||
private static async quickPDFAnalysis(file: File): Promise<{
|
||||
pageCount: number;
|
||||
isEncrypted: boolean;
|
||||
isCorrupted: boolean;
|
||||
}> {
|
||||
try {
|
||||
// For small files, read the whole file
|
||||
// For large files, try the whole file first (PDF.js needs the complete structure)
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
|
||||
const pdf = await getDocument({
|
||||
data: arrayBuffer,
|
||||
stopAtErrors: false, // Don't stop at minor errors
|
||||
verbosity: 0 // Suppress PDF.js warnings
|
||||
}).promise;
|
||||
|
||||
const pageCount = pdf.numPages;
|
||||
const isEncrypted = pdf.isEncrypted;
|
||||
|
||||
// Clean up
|
||||
pdf.destroy();
|
||||
|
||||
return {
|
||||
pageCount,
|
||||
isEncrypted,
|
||||
isCorrupted: false
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
// Try to determine if it's corruption vs encryption
|
||||
const errorMessage = error instanceof Error ? error.message.toLowerCase() : '';
|
||||
const isEncrypted = errorMessage.includes('password') || errorMessage.includes('encrypted');
|
||||
|
||||
return {
|
||||
pageCount: 0,
|
||||
isEncrypted,
|
||||
isCorrupted: !isEncrypted // If not encrypted, probably corrupted
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the best processing strategy based on file characteristics
|
||||
*/
|
||||
private static determineStrategy(fileSize: number, pageCount?: number): ProcessingStrategy {
|
||||
// Handle corrupted or encrypted files
|
||||
if (!pageCount || pageCount === 0) {
|
||||
return 'metadata_only';
|
||||
}
|
||||
|
||||
// Small files with few pages - process everything immediately
|
||||
if (fileSize <= this.SIZE_THRESHOLDS.SMALL && pageCount <= this.PAGE_THRESHOLDS.FEW) {
|
||||
return 'immediate_full';
|
||||
}
|
||||
|
||||
// Medium files or many pages - priority pages first, then progressive
|
||||
if (fileSize <= this.SIZE_THRESHOLDS.MEDIUM && pageCount <= this.PAGE_THRESHOLDS.MANY) {
|
||||
return 'priority_pages';
|
||||
}
|
||||
|
||||
// Large files or massive page counts - chunked processing
|
||||
if (fileSize <= this.SIZE_THRESHOLDS.LARGE && pageCount <= this.PAGE_THRESHOLDS.MASSIVE) {
|
||||
return 'progressive_chunked';
|
||||
}
|
||||
|
||||
// Very large files - metadata only
|
||||
return 'metadata_only';
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate processing time based on file characteristics and strategy
|
||||
*/
|
||||
private static estimateProcessingTime(
|
||||
fileSize: number,
|
||||
pageCount: number = 0,
|
||||
strategy: ProcessingStrategy
|
||||
): number {
|
||||
const baseTimes = {
|
||||
immediate_full: 200, // 200ms per page
|
||||
priority_pages: 150, // 150ms per page (optimized)
|
||||
progressive_chunked: 100, // 100ms per page (chunked)
|
||||
metadata_only: 50 // 50ms total
|
||||
};
|
||||
|
||||
const baseTime = baseTimes[strategy];
|
||||
|
||||
switch (strategy) {
|
||||
case 'metadata_only':
|
||||
return baseTime;
|
||||
|
||||
case 'immediate_full':
|
||||
return pageCount * baseTime;
|
||||
|
||||
case 'priority_pages':
|
||||
// Estimate time for priority pages (first 10)
|
||||
const priorityPages = Math.min(pageCount, 10);
|
||||
return priorityPages * baseTime;
|
||||
|
||||
case 'progressive_chunked':
|
||||
// Estimate time for first chunk (20 pages)
|
||||
const firstChunk = Math.min(pageCount, 20);
|
||||
return firstChunk * baseTime;
|
||||
|
||||
default:
|
||||
return pageCount * baseTime;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get processing recommendations for a set of files
|
||||
*/
|
||||
static async analyzeMultipleFiles(files: File[]): Promise<{
|
||||
analyses: Map<File, FileAnalysis>;
|
||||
recommendations: {
|
||||
totalEstimatedTime: number;
|
||||
suggestedBatchSize: number;
|
||||
shouldUseWebWorker: boolean;
|
||||
memoryWarning: boolean;
|
||||
};
|
||||
}> {
|
||||
const analyses = new Map<File, FileAnalysis>();
|
||||
let totalEstimatedTime = 0;
|
||||
let totalSize = 0;
|
||||
let totalPages = 0;
|
||||
|
||||
// Analyze each file
|
||||
for (const file of files) {
|
||||
const analysis = await this.analyzeFile(file);
|
||||
analyses.set(file, analysis);
|
||||
totalEstimatedTime += analysis.estimatedProcessingTime;
|
||||
totalSize += file.size;
|
||||
totalPages += analysis.estimatedPageCount || 0;
|
||||
}
|
||||
|
||||
// Generate recommendations
|
||||
const recommendations = {
|
||||
totalEstimatedTime,
|
||||
suggestedBatchSize: this.calculateBatchSize(files.length, totalSize),
|
||||
shouldUseWebWorker: totalPages > 100 || totalSize > this.SIZE_THRESHOLDS.MEDIUM,
|
||||
memoryWarning: totalSize > this.SIZE_THRESHOLDS.LARGE || totalPages > this.PAGE_THRESHOLDS.MASSIVE
|
||||
};
|
||||
|
||||
return { analyses, recommendations };
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate optimal batch size for processing multiple files
|
||||
*/
|
||||
private static calculateBatchSize(fileCount: number, totalSize: number): number {
|
||||
// Process small batches for large total sizes
|
||||
if (totalSize > this.SIZE_THRESHOLDS.LARGE) {
|
||||
return Math.max(1, Math.floor(fileCount / 4));
|
||||
}
|
||||
|
||||
if (totalSize > this.SIZE_THRESHOLDS.MEDIUM) {
|
||||
return Math.max(2, Math.floor(fileCount / 2));
|
||||
}
|
||||
|
||||
// Process all at once for smaller total sizes
|
||||
return fileCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a file appears to be a valid PDF
|
||||
*/
|
||||
static async isValidPDF(file: File): Promise<boolean> {
|
||||
if (file.type !== 'application/pdf' && !file.name.toLowerCase().endsWith('.pdf')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
// Read first few bytes to check PDF header
|
||||
const header = file.slice(0, 8);
|
||||
const headerBytes = new Uint8Array(await header.arrayBuffer());
|
||||
const headerString = String.fromCharCode(...headerBytes);
|
||||
|
||||
return headerString.startsWith('%PDF-');
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
188
frontend/src/services/pdfProcessingService.ts
Normal file
188
frontend/src/services/pdfProcessingService.ts
Normal file
@ -0,0 +1,188 @@
|
||||
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
|
||||
import { ProcessedFile, ProcessingState, PDFPage } from '../types/processing';
|
||||
import { ProcessingCache } from './processingCache';
|
||||
|
||||
// Set up PDF.js worker
|
||||
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
|
||||
|
||||
export class PDFProcessingService {
|
||||
private static instance: PDFProcessingService;
|
||||
private cache = new ProcessingCache();
|
||||
private processing = new Map<string, ProcessingState>();
|
||||
private processingListeners = new Set<(states: Map<string, ProcessingState>) => void>();
|
||||
|
||||
private constructor() {}
|
||||
|
||||
static getInstance(): PDFProcessingService {
|
||||
if (!PDFProcessingService.instance) {
|
||||
PDFProcessingService.instance = new PDFProcessingService();
|
||||
}
|
||||
return PDFProcessingService.instance;
|
||||
}
|
||||
|
||||
async getProcessedFile(file: File): Promise<ProcessedFile | null> {
|
||||
const fileKey = this.generateFileKey(file);
|
||||
|
||||
// Check cache first
|
||||
const cached = this.cache.get(fileKey);
|
||||
if (cached) {
|
||||
console.log('Cache hit for:', file.name);
|
||||
return cached;
|
||||
}
|
||||
|
||||
// Check if already processing
|
||||
if (this.processing.has(fileKey)) {
|
||||
console.log('Already processing:', file.name);
|
||||
return null; // Will be available when processing completes
|
||||
}
|
||||
|
||||
// Start processing
|
||||
this.startProcessing(file, fileKey);
|
||||
return null;
|
||||
}
|
||||
|
||||
private async startProcessing(file: File, fileKey: string): Promise<void> {
|
||||
// Set initial state
|
||||
const state: ProcessingState = {
|
||||
fileKey,
|
||||
fileName: file.name,
|
||||
status: 'processing',
|
||||
progress: 0,
|
||||
startedAt: Date.now()
|
||||
};
|
||||
|
||||
this.processing.set(fileKey, state);
|
||||
this.notifyListeners();
|
||||
|
||||
try {
|
||||
// Process the file with progress updates
|
||||
const processedFile = await this.processFileWithProgress(file, (progress) => {
|
||||
state.progress = progress;
|
||||
this.notifyListeners();
|
||||
});
|
||||
|
||||
// Cache the result
|
||||
this.cache.set(fileKey, processedFile);
|
||||
|
||||
// Update state to completed
|
||||
state.status = 'completed';
|
||||
state.progress = 100;
|
||||
state.completedAt = Date.now();
|
||||
this.notifyListeners();
|
||||
|
||||
// Remove from processing map after brief delay
|
||||
setTimeout(() => {
|
||||
this.processing.delete(fileKey);
|
||||
this.notifyListeners();
|
||||
}, 2000);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Processing failed for', file.name, ':', error);
|
||||
state.status = 'error';
|
||||
state.error = error instanceof Error ? error.message : 'Unknown error';
|
||||
this.notifyListeners();
|
||||
|
||||
// Remove failed processing after delay
|
||||
setTimeout(() => {
|
||||
this.processing.delete(fileKey);
|
||||
this.notifyListeners();
|
||||
}, 5000);
|
||||
}
|
||||
}
|
||||
|
||||
private async processFileWithProgress(
|
||||
file: File,
|
||||
onProgress: (progress: number) => void
|
||||
): Promise<ProcessedFile> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const totalPages = pdf.numPages;
|
||||
|
||||
onProgress(10); // PDF loaded
|
||||
|
||||
const pages: PDFPage[] = [];
|
||||
|
||||
for (let i = 1; i <= totalPages; i++) {
|
||||
const page = await pdf.getPage(i);
|
||||
const viewport = page.getViewport({ scale: 0.5 });
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
|
||||
const context = canvas.getContext('2d');
|
||||
if (context) {
|
||||
await page.render({ canvasContext: context, viewport }).promise;
|
||||
const thumbnail = canvas.toDataURL();
|
||||
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail,
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
}
|
||||
|
||||
// Update progress
|
||||
const progress = 10 + (i / totalPages) * 85; // 10-95%
|
||||
onProgress(progress);
|
||||
}
|
||||
|
||||
pdf.destroy();
|
||||
onProgress(100);
|
||||
|
||||
return {
|
||||
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
||||
pages,
|
||||
totalPages,
|
||||
metadata: {
|
||||
title: file.name,
|
||||
createdAt: new Date().toISOString(),
|
||||
modifiedAt: new Date().toISOString()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// State subscription for components
|
||||
onProcessingChange(callback: (states: Map<string, ProcessingState>) => void): () => void {
|
||||
this.processingListeners.add(callback);
|
||||
return () => this.processingListeners.delete(callback);
|
||||
}
|
||||
|
||||
getProcessingStates(): Map<string, ProcessingState> {
|
||||
return new Map(this.processing);
|
||||
}
|
||||
|
||||
private notifyListeners(): void {
|
||||
this.processingListeners.forEach(callback => callback(this.processing));
|
||||
}
|
||||
|
||||
generateFileKey(file: File): string {
|
||||
return `${file.name}-${file.size}-${file.lastModified}`;
|
||||
}
|
||||
|
||||
// Cleanup method for activeFiles changes
|
||||
cleanup(removedFiles: File[]): void {
|
||||
removedFiles.forEach(file => {
|
||||
const key = this.generateFileKey(file);
|
||||
this.cache.delete(key);
|
||||
this.processing.delete(key);
|
||||
});
|
||||
this.notifyListeners();
|
||||
}
|
||||
|
||||
// Get cache stats (for debugging)
|
||||
getCacheStats() {
|
||||
return this.cache.getStats();
|
||||
}
|
||||
|
||||
// Clear all cache and processing
|
||||
clearAll(): void {
|
||||
this.cache.clear();
|
||||
this.processing.clear();
|
||||
this.notifyListeners();
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
export const pdfProcessingService = PDFProcessingService.getInstance();
|
138
frontend/src/services/processingCache.ts
Normal file
138
frontend/src/services/processingCache.ts
Normal file
@ -0,0 +1,138 @@
|
||||
import { ProcessedFile, CacheConfig, CacheEntry, CacheStats } from '../types/processing';
|
||||
|
||||
export class ProcessingCache {
|
||||
private cache = new Map<string, CacheEntry>();
|
||||
private totalSize = 0;
|
||||
|
||||
constructor(private config: CacheConfig = {
|
||||
maxFiles: 20,
|
||||
maxSizeBytes: 2 * 1024 * 1024 * 1024, // 2GB
|
||||
ttlMs: 30 * 60 * 1000 // 30 minutes
|
||||
}) {}
|
||||
|
||||
set(key: string, data: ProcessedFile): void {
|
||||
// Remove expired entries first
|
||||
this.cleanup();
|
||||
|
||||
// Calculate entry size (rough estimate)
|
||||
const size = this.calculateSize(data);
|
||||
|
||||
// Make room if needed
|
||||
this.makeRoom(size);
|
||||
|
||||
this.cache.set(key, {
|
||||
data,
|
||||
size,
|
||||
lastAccessed: Date.now(),
|
||||
createdAt: Date.now()
|
||||
});
|
||||
|
||||
this.totalSize += size;
|
||||
}
|
||||
|
||||
get(key: string): ProcessedFile | null {
|
||||
const entry = this.cache.get(key);
|
||||
if (!entry) return null;
|
||||
|
||||
// Check TTL
|
||||
if (Date.now() - entry.createdAt > this.config.ttlMs) {
|
||||
this.delete(key);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Update last accessed
|
||||
entry.lastAccessed = Date.now();
|
||||
return entry.data;
|
||||
}
|
||||
|
||||
has(key: string): boolean {
|
||||
const entry = this.cache.get(key);
|
||||
if (!entry) return false;
|
||||
|
||||
// Check TTL
|
||||
if (Date.now() - entry.createdAt > this.config.ttlMs) {
|
||||
this.delete(key);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private makeRoom(neededSize: number): void {
|
||||
// Remove oldest entries until we have space
|
||||
while (
|
||||
this.cache.size >= this.config.maxFiles ||
|
||||
this.totalSize + neededSize > this.config.maxSizeBytes
|
||||
) {
|
||||
const oldestKey = this.findOldestEntry();
|
||||
if (oldestKey) {
|
||||
this.delete(oldestKey);
|
||||
} else break;
|
||||
}
|
||||
}
|
||||
|
||||
private findOldestEntry(): string | null {
|
||||
let oldest: { key: string; lastAccessed: number } | null = null;
|
||||
|
||||
for (const [key, entry] of this.cache) {
|
||||
if (!oldest || entry.lastAccessed < oldest.lastAccessed) {
|
||||
oldest = { key, lastAccessed: entry.lastAccessed };
|
||||
}
|
||||
}
|
||||
|
||||
return oldest?.key || null;
|
||||
}
|
||||
|
||||
private cleanup(): void {
|
||||
const now = Date.now();
|
||||
for (const [key, entry] of this.cache) {
|
||||
if (now - entry.createdAt > this.config.ttlMs) {
|
||||
this.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private calculateSize(data: ProcessedFile): number {
|
||||
// Rough size estimation
|
||||
let size = 0;
|
||||
|
||||
// Estimate size of thumbnails (main memory consumer)
|
||||
data.pages.forEach(page => {
|
||||
if (page.thumbnail) {
|
||||
// Base64 thumbnail is roughly 50KB each
|
||||
size += 50 * 1024;
|
||||
}
|
||||
});
|
||||
|
||||
// Add some overhead for other data
|
||||
size += 10 * 1024; // 10KB overhead
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
delete(key: string): void {
|
||||
const entry = this.cache.get(key);
|
||||
if (entry) {
|
||||
this.totalSize -= entry.size;
|
||||
this.cache.delete(key);
|
||||
}
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.cache.clear();
|
||||
this.totalSize = 0;
|
||||
}
|
||||
|
||||
getStats(): CacheStats {
|
||||
return {
|
||||
entries: this.cache.size,
|
||||
totalSizeBytes: this.totalSize,
|
||||
maxSizeBytes: this.config.maxSizeBytes
|
||||
};
|
||||
}
|
||||
|
||||
// Get all cached keys (for debugging and cleanup)
|
||||
getKeys(): string[] {
|
||||
return Array.from(this.cache.keys());
|
||||
}
|
||||
}
|
282
frontend/src/services/processingErrorHandler.ts
Normal file
282
frontend/src/services/processingErrorHandler.ts
Normal file
@ -0,0 +1,282 @@
|
||||
import { ProcessingError } from '../types/processing';
|
||||
|
||||
export class ProcessingErrorHandler {
|
||||
private static readonly DEFAULT_MAX_RETRIES = 3;
|
||||
private static readonly RETRY_DELAYS = [1000, 2000, 4000]; // Progressive backoff in ms
|
||||
|
||||
/**
|
||||
* Create a ProcessingError from an unknown error
|
||||
*/
|
||||
static createProcessingError(
|
||||
error: unknown,
|
||||
retryCount: number = 0,
|
||||
maxRetries: number = this.DEFAULT_MAX_RETRIES
|
||||
): ProcessingError {
|
||||
const originalError = error instanceof Error ? error : new Error(String(error));
|
||||
const message = originalError.message;
|
||||
|
||||
// Determine error type based on error message and properties
|
||||
const errorType = this.determineErrorType(originalError, message);
|
||||
|
||||
// Determine if error is recoverable
|
||||
const recoverable = this.isRecoverable(errorType, retryCount, maxRetries);
|
||||
|
||||
return {
|
||||
type: errorType,
|
||||
message: this.formatErrorMessage(errorType, message),
|
||||
recoverable,
|
||||
retryCount,
|
||||
maxRetries,
|
||||
originalError
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the type of error based on error characteristics
|
||||
*/
|
||||
private static determineErrorType(error: Error, message: string): ProcessingError['type'] {
|
||||
const lowerMessage = message.toLowerCase();
|
||||
|
||||
// Network-related errors
|
||||
if (lowerMessage.includes('network') ||
|
||||
lowerMessage.includes('fetch') ||
|
||||
lowerMessage.includes('connection')) {
|
||||
return 'network';
|
||||
}
|
||||
|
||||
// Memory-related errors
|
||||
if (lowerMessage.includes('memory') ||
|
||||
lowerMessage.includes('quota') ||
|
||||
lowerMessage.includes('allocation') ||
|
||||
error.name === 'QuotaExceededError') {
|
||||
return 'memory';
|
||||
}
|
||||
|
||||
// Timeout errors
|
||||
if (lowerMessage.includes('timeout') ||
|
||||
lowerMessage.includes('aborted') ||
|
||||
error.name === 'AbortError') {
|
||||
return 'timeout';
|
||||
}
|
||||
|
||||
// Cancellation
|
||||
if (lowerMessage.includes('cancel') ||
|
||||
lowerMessage.includes('abort') ||
|
||||
error.name === 'AbortError') {
|
||||
return 'cancelled';
|
||||
}
|
||||
|
||||
// PDF corruption/parsing errors
|
||||
if (lowerMessage.includes('pdf') ||
|
||||
lowerMessage.includes('parse') ||
|
||||
lowerMessage.includes('invalid') ||
|
||||
lowerMessage.includes('corrupt') ||
|
||||
lowerMessage.includes('malformed')) {
|
||||
return 'corruption';
|
||||
}
|
||||
|
||||
// Default to parsing error
|
||||
return 'parsing';
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if an error is recoverable based on type and retry count
|
||||
*/
|
||||
private static isRecoverable(
|
||||
errorType: ProcessingError['type'],
|
||||
retryCount: number,
|
||||
maxRetries: number
|
||||
): boolean {
|
||||
// Never recoverable
|
||||
if (errorType === 'cancelled' || errorType === 'corruption') {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Recoverable if we haven't exceeded retry count
|
||||
if (retryCount >= maxRetries) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Memory errors are usually not recoverable
|
||||
if (errorType === 'memory') {
|
||||
return retryCount < 1; // Only one retry for memory errors
|
||||
}
|
||||
|
||||
// Network and timeout errors are usually recoverable
|
||||
return errorType === 'network' || errorType === 'timeout' || errorType === 'parsing';
|
||||
}
|
||||
|
||||
/**
|
||||
* Format error message for user display
|
||||
*/
|
||||
private static formatErrorMessage(errorType: ProcessingError['type'], originalMessage: string): string {
|
||||
switch (errorType) {
|
||||
case 'network':
|
||||
return 'Network connection failed. Please check your internet connection and try again.';
|
||||
|
||||
case 'memory':
|
||||
return 'Insufficient memory to process this file. Try closing other applications or processing a smaller file.';
|
||||
|
||||
case 'timeout':
|
||||
return 'Processing timed out. This file may be too large or complex to process.';
|
||||
|
||||
case 'cancelled':
|
||||
return 'Processing was cancelled by user.';
|
||||
|
||||
case 'corruption':
|
||||
return 'This PDF file appears to be corrupted or encrypted. Please try a different file.';
|
||||
|
||||
case 'parsing':
|
||||
return `Failed to process PDF: ${originalMessage}`;
|
||||
|
||||
default:
|
||||
return `Processing failed: ${originalMessage}`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute an operation with automatic retry logic
|
||||
*/
|
||||
static async executeWithRetry<T>(
|
||||
operation: () => Promise<T>,
|
||||
onError?: (error: ProcessingError) => void,
|
||||
maxRetries: number = this.DEFAULT_MAX_RETRIES
|
||||
): Promise<T> {
|
||||
let lastError: ProcessingError | null = null;
|
||||
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
return await operation();
|
||||
} catch (error) {
|
||||
lastError = this.createProcessingError(error, attempt, maxRetries);
|
||||
|
||||
// Notify error handler
|
||||
if (onError) {
|
||||
onError(lastError);
|
||||
}
|
||||
|
||||
// Don't retry if not recoverable
|
||||
if (!lastError.recoverable) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Don't retry on last attempt
|
||||
if (attempt === maxRetries) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Wait before retry with progressive backoff
|
||||
const delay = this.RETRY_DELAYS[Math.min(attempt, this.RETRY_DELAYS.length - 1)];
|
||||
await this.delay(delay);
|
||||
|
||||
console.log(`Retrying operation (attempt ${attempt + 2}/${maxRetries + 1}) after ${delay}ms delay`);
|
||||
}
|
||||
}
|
||||
|
||||
// All retries exhausted
|
||||
throw lastError || new Error('Operation failed after all retries');
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a timeout wrapper for operations
|
||||
*/
|
||||
static withTimeout<T>(
|
||||
operation: () => Promise<T>,
|
||||
timeoutMs: number,
|
||||
timeoutMessage: string = 'Operation timed out'
|
||||
): Promise<T> {
|
||||
return new Promise<T>((resolve, reject) => {
|
||||
const timeoutId = setTimeout(() => {
|
||||
reject(new Error(timeoutMessage));
|
||||
}, timeoutMs);
|
||||
|
||||
operation()
|
||||
.then(result => {
|
||||
clearTimeout(timeoutId);
|
||||
resolve(result);
|
||||
})
|
||||
.catch(error => {
|
||||
clearTimeout(timeoutId);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an AbortController that times out after specified duration
|
||||
*/
|
||||
static createTimeoutController(timeoutMs: number): AbortController {
|
||||
const controller = new AbortController();
|
||||
|
||||
setTimeout(() => {
|
||||
controller.abort();
|
||||
}, timeoutMs);
|
||||
|
||||
return controller;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an error indicates the operation should be retried
|
||||
*/
|
||||
static shouldRetry(error: ProcessingError): boolean {
|
||||
return error.recoverable && error.retryCount < error.maxRetries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get user-friendly suggestions based on error type
|
||||
*/
|
||||
static getErrorSuggestions(error: ProcessingError): string[] {
|
||||
switch (error.type) {
|
||||
case 'network':
|
||||
return [
|
||||
'Check your internet connection',
|
||||
'Try refreshing the page',
|
||||
'Try again in a few moments'
|
||||
];
|
||||
|
||||
case 'memory':
|
||||
return [
|
||||
'Close other browser tabs or applications',
|
||||
'Try processing a smaller file',
|
||||
'Restart your browser',
|
||||
'Use a device with more memory'
|
||||
];
|
||||
|
||||
case 'timeout':
|
||||
return [
|
||||
'Try processing a smaller file',
|
||||
'Break large files into smaller sections',
|
||||
'Check your internet connection speed'
|
||||
];
|
||||
|
||||
case 'corruption':
|
||||
return [
|
||||
'Verify the PDF file opens in other applications',
|
||||
'Try re-downloading the file',
|
||||
'Try a different PDF file',
|
||||
'Contact the file creator if it appears corrupted'
|
||||
];
|
||||
|
||||
case 'parsing':
|
||||
return [
|
||||
'Verify this is a valid PDF file',
|
||||
'Try a different PDF file',
|
||||
'Contact support if the problem persists'
|
||||
];
|
||||
|
||||
default:
|
||||
return [
|
||||
'Try refreshing the page',
|
||||
'Try again in a few moments',
|
||||
'Contact support if the problem persists'
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function for delays
|
||||
*/
|
||||
private static delay(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
export interface PDFPage {
|
||||
id: string;
|
||||
pageNumber: number;
|
||||
thumbnail: string;
|
||||
thumbnail: string | null;
|
||||
rotation: number;
|
||||
selected: boolean;
|
||||
splitBefore?: boolean;
|
||||
@ -24,4 +24,4 @@ export interface PageOperation {
|
||||
export interface UndoRedoState {
|
||||
operations: PageOperation[];
|
||||
currentIndex: number;
|
||||
}
|
||||
}
|
||||
|
91
frontend/src/types/processing.ts
Normal file
91
frontend/src/types/processing.ts
Normal file
@ -0,0 +1,91 @@
|
||||
export interface ProcessingError {
|
||||
type: 'network' | 'parsing' | 'memory' | 'corruption' | 'timeout' | 'cancelled';
|
||||
message: string;
|
||||
recoverable: boolean;
|
||||
retryCount: number;
|
||||
maxRetries: number;
|
||||
originalError?: Error;
|
||||
}
|
||||
|
||||
export interface ProcessingState {
|
||||
fileKey: string;
|
||||
fileName: string;
|
||||
status: 'pending' | 'processing' | 'completed' | 'error' | 'cancelled';
|
||||
progress: number; // 0-100
|
||||
strategy: ProcessingStrategy;
|
||||
error?: ProcessingError;
|
||||
startedAt: number;
|
||||
completedAt?: number;
|
||||
estimatedTimeRemaining?: number;
|
||||
currentPage?: number;
|
||||
cancellationToken?: AbortController;
|
||||
}
|
||||
|
||||
export interface ProcessedFile {
|
||||
id: string;
|
||||
pages: PDFPage[];
|
||||
totalPages: number;
|
||||
metadata: {
|
||||
title: string;
|
||||
createdAt: string;
|
||||
modifiedAt: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface PDFPage {
|
||||
id: string;
|
||||
pageNumber: number;
|
||||
thumbnail: string | null;
|
||||
rotation: number;
|
||||
selected: boolean;
|
||||
splitBefore?: boolean;
|
||||
}
|
||||
|
||||
export interface CacheConfig {
|
||||
maxFiles: number;
|
||||
maxSizeBytes: number;
|
||||
ttlMs: number;
|
||||
}
|
||||
|
||||
export interface CacheEntry {
|
||||
data: ProcessedFile;
|
||||
size: number;
|
||||
lastAccessed: number;
|
||||
createdAt: number;
|
||||
}
|
||||
|
||||
export interface CacheStats {
|
||||
entries: number;
|
||||
totalSizeBytes: number;
|
||||
maxSizeBytes: number;
|
||||
}
|
||||
|
||||
export type ProcessingStrategy = 'immediate_full' | 'progressive_chunked' | 'metadata_only' | 'priority_pages';
|
||||
|
||||
export interface ProcessingConfig {
|
||||
strategy: ProcessingStrategy;
|
||||
chunkSize: number; // Pages per chunk
|
||||
thumbnailQuality: 'low' | 'medium' | 'high';
|
||||
priorityPageCount: number; // Number of priority pages to process first
|
||||
useWebWorker: boolean;
|
||||
maxRetries: number;
|
||||
timeoutMs: number;
|
||||
}
|
||||
|
||||
export interface FileAnalysis {
|
||||
fileSize: number;
|
||||
estimatedPageCount?: number;
|
||||
isEncrypted: boolean;
|
||||
isCorrupted: boolean;
|
||||
recommendedStrategy: ProcessingStrategy;
|
||||
estimatedProcessingTime: number; // milliseconds
|
||||
}
|
||||
|
||||
export interface ProcessingMetrics {
|
||||
totalFiles: number;
|
||||
completedFiles: number;
|
||||
failedFiles: number;
|
||||
averageProcessingTime: number;
|
||||
cacheHitRate: number;
|
||||
memoryUsage: number;
|
||||
}
|
127
frontend/src/utils/fileHash.ts
Normal file
127
frontend/src/utils/fileHash.ts
Normal file
@ -0,0 +1,127 @@
|
||||
/**
|
||||
* File hashing utilities for cache key generation
|
||||
*/
|
||||
|
||||
export class FileHasher {
|
||||
private static readonly CHUNK_SIZE = 64 * 1024; // 64KB chunks for hashing
|
||||
|
||||
/**
|
||||
* Generate a content-based hash for a file
|
||||
* Uses first + last + middle chunks to create a reasonably unique hash
|
||||
* without reading the entire file (which would be expensive for large files)
|
||||
*/
|
||||
static async generateContentHash(file: File): Promise<string> {
|
||||
const chunks = await this.getFileChunks(file);
|
||||
const combined = await this.combineChunks(chunks);
|
||||
return await this.hashArrayBuffer(combined);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a fast hash based on file metadata
|
||||
* Faster but less collision-resistant than content hash
|
||||
*/
|
||||
static generateMetadataHash(file: File): string {
|
||||
const data = `${file.name}-${file.size}-${file.lastModified}-${file.type}`;
|
||||
return this.simpleHash(data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a hybrid hash that balances speed and uniqueness
|
||||
* Uses metadata + small content sample
|
||||
*/
|
||||
static async generateHybridHash(file: File): Promise<string> {
|
||||
const metadataHash = this.generateMetadataHash(file);
|
||||
|
||||
// For small files, use full content hash
|
||||
if (file.size <= 1024 * 1024) { // 1MB
|
||||
const contentHash = await this.generateContentHash(file);
|
||||
return `${metadataHash}-${contentHash}`;
|
||||
}
|
||||
|
||||
// For large files, use first chunk only
|
||||
const firstChunk = file.slice(0, this.CHUNK_SIZE);
|
||||
const firstChunkBuffer = await firstChunk.arrayBuffer();
|
||||
const firstChunkHash = await this.hashArrayBuffer(firstChunkBuffer);
|
||||
|
||||
return `${metadataHash}-${firstChunkHash}`;
|
||||
}
|
||||
|
||||
private static async getFileChunks(file: File): Promise<ArrayBuffer[]> {
|
||||
const chunks: ArrayBuffer[] = [];
|
||||
|
||||
// First chunk
|
||||
if (file.size > 0) {
|
||||
const firstChunk = file.slice(0, Math.min(this.CHUNK_SIZE, file.size));
|
||||
chunks.push(await firstChunk.arrayBuffer());
|
||||
}
|
||||
|
||||
// Middle chunk (if file is large enough)
|
||||
if (file.size > this.CHUNK_SIZE * 2) {
|
||||
const middleStart = Math.floor(file.size / 2) - Math.floor(this.CHUNK_SIZE / 2);
|
||||
const middleEnd = middleStart + this.CHUNK_SIZE;
|
||||
const middleChunk = file.slice(middleStart, middleEnd);
|
||||
chunks.push(await middleChunk.arrayBuffer());
|
||||
}
|
||||
|
||||
// Last chunk (if file is large enough and different from first)
|
||||
if (file.size > this.CHUNK_SIZE) {
|
||||
const lastStart = Math.max(file.size - this.CHUNK_SIZE, this.CHUNK_SIZE);
|
||||
const lastChunk = file.slice(lastStart);
|
||||
chunks.push(await lastChunk.arrayBuffer());
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
private static async combineChunks(chunks: ArrayBuffer[]): Promise<ArrayBuffer> {
|
||||
const totalLength = chunks.reduce((sum, chunk) => sum + chunk.byteLength, 0);
|
||||
const combined = new Uint8Array(totalLength);
|
||||
|
||||
let offset = 0;
|
||||
for (const chunk of chunks) {
|
||||
combined.set(new Uint8Array(chunk), offset);
|
||||
offset += chunk.byteLength;
|
||||
}
|
||||
|
||||
return combined.buffer;
|
||||
}
|
||||
|
||||
private static async hashArrayBuffer(buffer: ArrayBuffer): Promise<string> {
|
||||
// Use Web Crypto API for proper hashing
|
||||
if (crypto.subtle) {
|
||||
const hashBuffer = await crypto.subtle.digest('SHA-256', buffer);
|
||||
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
||||
return hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
|
||||
}
|
||||
|
||||
// Fallback for environments without crypto.subtle
|
||||
return this.simpleHash(Array.from(new Uint8Array(buffer)).join(''));
|
||||
}
|
||||
|
||||
private static simpleHash(str: string): string {
|
||||
let hash = 0;
|
||||
if (str.length === 0) return hash.toString();
|
||||
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
const char = str.charCodeAt(i);
|
||||
hash = ((hash << 5) - hash) + char;
|
||||
hash = hash & hash; // Convert to 32-bit integer
|
||||
}
|
||||
|
||||
return Math.abs(hash).toString(16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that a file matches its expected hash
|
||||
* Useful for detecting file corruption or changes
|
||||
*/
|
||||
static async validateFileHash(file: File, expectedHash: string): Promise<boolean> {
|
||||
try {
|
||||
const actualHash = await this.generateHybridHash(file);
|
||||
return actualHash === expectedHash;
|
||||
} catch (error) {
|
||||
console.error('Hash validation failed:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user