mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-08 17:51:20 +02:00
File management overhaul
This commit is contained in:
parent
09758ea2b8
commit
42abe83385
@ -6,7 +6,6 @@ import StorageIcon from "@mui/icons-material/Storage";
|
|||||||
import VisibilityIcon from "@mui/icons-material/Visibility";
|
import VisibilityIcon from "@mui/icons-material/Visibility";
|
||||||
import EditIcon from "@mui/icons-material/Edit";
|
import EditIcon from "@mui/icons-material/Edit";
|
||||||
|
|
||||||
import { FileWithUrl } from "../../types/file";
|
|
||||||
import { getFileSize, getFileDate } from "../../utils/fileUtils";
|
import { getFileSize, getFileDate } from "../../utils/fileUtils";
|
||||||
import { useIndexedDBThumbnail } from "../../hooks/useIndexedDBThumbnail";
|
import { useIndexedDBThumbnail } from "../../hooks/useIndexedDBThumbnail";
|
||||||
|
|
||||||
|
@ -1,15 +1,13 @@
|
|||||||
import React, { useState, useCallback, useRef, useEffect } from "react";
|
import React, { useState, useCallback, useRef, useEffect } from "react";
|
||||||
import {
|
import {
|
||||||
Button, Text, Center, Checkbox, Box, Tooltip, ActionIcon,
|
Button, Text, Center, Checkbox, Box, Tooltip, ActionIcon,
|
||||||
Notification, TextInput, FileInput, LoadingOverlay, Modal, Alert, Container,
|
Notification, TextInput, LoadingOverlay, Modal, Alert,
|
||||||
Stack, Group, Paper, SimpleGrid
|
Stack, Group
|
||||||
} from "@mantine/core";
|
} from "@mantine/core";
|
||||||
import { useTranslation } from "react-i18next";
|
import { useTranslation } from "react-i18next";
|
||||||
import UploadFileIcon from "@mui/icons-material/UploadFile";
|
import { useEnhancedProcessedFiles } from "../../hooks/useEnhancedProcessedFiles";
|
||||||
import { usePDFProcessor } from "../../hooks/usePDFProcessor";
|
|
||||||
import { PDFDocument, PDFPage } from "../../types/pageEditor";
|
import { PDFDocument, PDFPage } from "../../types/pageEditor";
|
||||||
import { fileStorage } from "../../services/fileStorage";
|
import { ProcessedFile as EnhancedProcessedFile } from "../../types/processing";
|
||||||
import { generateThumbnailForFile } from "../../utils/thumbnailUtils";
|
|
||||||
import { useUndoRedo } from "../../hooks/useUndoRedo";
|
import { useUndoRedo } from "../../hooks/useUndoRedo";
|
||||||
import {
|
import {
|
||||||
RotatePagesCommand,
|
RotatePagesCommand,
|
||||||
@ -19,19 +17,16 @@ import {
|
|||||||
ToggleSplitCommand
|
ToggleSplitCommand
|
||||||
} from "../../commands/pageCommands";
|
} from "../../commands/pageCommands";
|
||||||
import { pdfExportService } from "../../services/pdfExportService";
|
import { pdfExportService } from "../../services/pdfExportService";
|
||||||
import styles from './pageEditor.module.css';
|
import './pageEditor.module.css';
|
||||||
import PageThumbnail from './PageThumbnail';
|
import PageThumbnail from './PageThumbnail';
|
||||||
import BulkSelectionPanel from './BulkSelectionPanel';
|
import BulkSelectionPanel from './BulkSelectionPanel';
|
||||||
import DragDropGrid from './DragDropGrid';
|
import DragDropGrid from './DragDropGrid';
|
||||||
import FilePickerModal from '../shared/FilePickerModal';
|
|
||||||
import FileUploadSelector from '../shared/FileUploadSelector';
|
|
||||||
|
|
||||||
export interface PageEditorProps {
|
export interface PageEditorProps {
|
||||||
activeFiles: File[];
|
activeFiles: File[];
|
||||||
setActiveFiles: (files: File[]) => void;
|
setActiveFiles: (files: File[]) => void;
|
||||||
downloadUrl?: string | null;
|
downloadUrl?: string | null;
|
||||||
setDownloadUrl?: (url: string | null) => void;
|
setDownloadUrl?: (url: string | null) => void;
|
||||||
sharedFiles?: any[]; // For FileUploadSelector when no files loaded
|
|
||||||
|
|
||||||
// Optional callbacks to expose internal functions for PageEditorControls
|
// Optional callbacks to expose internal functions for PageEditorControls
|
||||||
onFunctionsReady?: (functions: {
|
onFunctionsReady?: (functions: {
|
||||||
@ -55,24 +50,31 @@ export interface PageEditorProps {
|
|||||||
const PageEditor = ({
|
const PageEditor = ({
|
||||||
activeFiles,
|
activeFiles,
|
||||||
setActiveFiles,
|
setActiveFiles,
|
||||||
downloadUrl,
|
|
||||||
setDownloadUrl,
|
|
||||||
sharedFiles = [],
|
|
||||||
onFunctionsReady,
|
onFunctionsReady,
|
||||||
}: PageEditorProps) => {
|
}: PageEditorProps) => {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const { processPDFFile, loading: pdfLoading } = usePDFProcessor();
|
|
||||||
|
// Enhanced processing with intelligent strategies
|
||||||
|
const {
|
||||||
|
processedFiles: enhancedProcessedFiles,
|
||||||
|
processingStates,
|
||||||
|
isProcessing: globalProcessing,
|
||||||
|
hasProcessingErrors,
|
||||||
|
processingProgress,
|
||||||
|
actions: processingActions
|
||||||
|
} = useEnhancedProcessedFiles(activeFiles, {
|
||||||
|
strategy: 'priority_pages', // Process first pages immediately
|
||||||
|
thumbnailQuality: 'low', // Low quality for page editor navigation
|
||||||
|
priorityPageCount: 10
|
||||||
|
});
|
||||||
|
|
||||||
// Single merged document state
|
// Single merged document state
|
||||||
const [mergedPdfDocument, setMergedPdfDocument] = useState<PDFDocument | null>(null);
|
const [mergedPdfDocument, setMergedPdfDocument] = useState<PDFDocument | null>(null);
|
||||||
const [processedFiles, setProcessedFiles] = useState<Map<string, PDFDocument>>(new Map());
|
|
||||||
const [filename, setFilename] = useState<string>("");
|
const [filename, setFilename] = useState<string>("");
|
||||||
|
|
||||||
// Page editor state
|
// Page editor state
|
||||||
const [selectedPages, setSelectedPages] = useState<string[]>([]);
|
const [selectedPages, setSelectedPages] = useState<string[]>([]);
|
||||||
const [status, setStatus] = useState<string | null>(null);
|
const [status, setStatus] = useState<string | null>(null);
|
||||||
const [loading, setLoading] = useState(false);
|
|
||||||
const [error, setError] = useState<string | null>(null);
|
|
||||||
const [csvInput, setCsvInput] = useState<string>("");
|
const [csvInput, setCsvInput] = useState<string>("");
|
||||||
const [selectionMode, setSelectionMode] = useState(false);
|
const [selectionMode, setSelectionMode] = useState(false);
|
||||||
|
|
||||||
@ -97,87 +99,19 @@ const PageEditor = ({
|
|||||||
// Undo/Redo system
|
// Undo/Redo system
|
||||||
const { executeCommand, undo, redo, canUndo, canRedo } = useUndoRedo();
|
const { executeCommand, undo, redo, canUndo, canRedo } = useUndoRedo();
|
||||||
|
|
||||||
// Process uploaded file
|
// Convert enhanced processed files to Page Editor format
|
||||||
const handleFileUpload = useCallback(async (uploadedFile: File | any) => {
|
const convertToPageEditorFormat = useCallback((enhancedFile: EnhancedProcessedFile, fileName: string): PDFDocument => {
|
||||||
if (!uploadedFile) {
|
return {
|
||||||
setError('No file provided');
|
id: enhancedFile.id,
|
||||||
return;
|
name: fileName,
|
||||||
}
|
file: null as any, // We don't need the file reference in the converted format
|
||||||
|
pages: enhancedFile.pages.map(page => ({
|
||||||
let fileToProcess: File;
|
...page,
|
||||||
|
// Ensure compatibility with existing page editor types
|
||||||
// Handle FileWithUrl objects from storage
|
splitBefore: page.splitBefore || false
|
||||||
if (uploadedFile.storedInIndexedDB && uploadedFile.arrayBuffer) {
|
})),
|
||||||
try {
|
totalPages: enhancedFile.totalPages
|
||||||
console.log('Converting FileWithUrl to File:', uploadedFile.name);
|
};
|
||||||
const arrayBuffer = await uploadedFile.arrayBuffer();
|
|
||||||
const blob = new Blob([arrayBuffer], { type: uploadedFile.type || 'application/pdf' });
|
|
||||||
fileToProcess = new File([blob], uploadedFile.name, {
|
|
||||||
type: uploadedFile.type || 'application/pdf',
|
|
||||||
lastModified: uploadedFile.lastModified || Date.now()
|
|
||||||
});
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Error converting FileWithUrl:', error);
|
|
||||||
setError('Unable to load file from storage');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else if (uploadedFile instanceof File) {
|
|
||||||
fileToProcess = uploadedFile;
|
|
||||||
} else {
|
|
||||||
setError('Invalid file object');
|
|
||||||
console.error('handleFileUpload received unsupported object:', uploadedFile);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fileToProcess.type !== 'application/pdf') {
|
|
||||||
setError('Please upload a valid PDF file');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const fileKey = `${fileToProcess.name}-${fileToProcess.size}`;
|
|
||||||
|
|
||||||
// Skip processing if already processed
|
|
||||||
if (processedFiles.has(fileKey)) return;
|
|
||||||
|
|
||||||
setLoading(true);
|
|
||||||
setError(null);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const document = await processPDFFile(fileToProcess);
|
|
||||||
|
|
||||||
// Store processed document
|
|
||||||
setProcessedFiles(prev => new Map(prev).set(fileKey, document));
|
|
||||||
setFilename(fileToProcess.name.replace(/\.pdf$/i, ''));
|
|
||||||
setSelectedPages([]);
|
|
||||||
|
|
||||||
|
|
||||||
if (document.pages.length > 0) {
|
|
||||||
// Only store if it's a new file (not from storage)
|
|
||||||
if (!uploadedFile.storedInIndexedDB) {
|
|
||||||
const thumbnail = await generateThumbnailForFile(fileToProcess);
|
|
||||||
await fileStorage.storeFile(fileToProcess, thumbnail);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
setStatus(`PDF loaded successfully with ${document.totalPages} pages`);
|
|
||||||
} catch (err) {
|
|
||||||
const errorMessage = err instanceof Error ? err.message : 'Failed to process PDF';
|
|
||||||
setError(errorMessage);
|
|
||||||
console.error('PDF processing error:', err);
|
|
||||||
} finally {
|
|
||||||
setLoading(false);
|
|
||||||
}
|
|
||||||
}, [processPDFFile, activeFiles, setActiveFiles, processedFiles]);
|
|
||||||
|
|
||||||
// Process multiple uploaded files - just add them to activeFiles like FileManager does
|
|
||||||
const handleMultipleFileUpload = useCallback((uploadedFiles: File[]) => {
|
|
||||||
if (!uploadedFiles || uploadedFiles.length === 0) {
|
|
||||||
setError('No files provided');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Simply set the activeFiles to the selected files (same as FileManager approach)
|
|
||||||
setActiveFiles(uploadedFiles);
|
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
// Merge multiple PDF documents into one
|
// Merge multiple PDF documents into one
|
||||||
@ -188,10 +122,10 @@ const PageEditor = ({
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (activeFiles.length === 1) {
|
if (activeFiles.length === 1) {
|
||||||
// Single file - use it directly
|
// Single file - use enhanced processed file
|
||||||
const fileKey = `${activeFiles[0].name}-${activeFiles[0].size}`;
|
const enhancedFile = enhancedProcessedFiles.get(activeFiles[0]);
|
||||||
const pdfDoc = processedFiles.get(fileKey);
|
if (enhancedFile) {
|
||||||
if (pdfDoc) {
|
const pdfDoc = convertToPageEditorFormat(enhancedFile, activeFiles[0].name);
|
||||||
setMergedPdfDocument(pdfDoc);
|
setMergedPdfDocument(pdfDoc);
|
||||||
setFilename(activeFiles[0].name.replace(/\.pdf$/i, ''));
|
setFilename(activeFiles[0].name.replace(/\.pdf$/i, ''));
|
||||||
}
|
}
|
||||||
@ -202,71 +136,230 @@ const PageEditor = ({
|
|||||||
const filenames: string[] = [];
|
const filenames: string[] = [];
|
||||||
|
|
||||||
activeFiles.forEach((file, fileIndex) => {
|
activeFiles.forEach((file, fileIndex) => {
|
||||||
const fileKey = `${file.name}-${file.size}`;
|
const enhancedFile = enhancedProcessedFiles.get(file);
|
||||||
const pdfDoc = processedFiles.get(fileKey);
|
if (enhancedFile) {
|
||||||
if (pdfDoc) {
|
|
||||||
filenames.push(file.name.replace(/\.pdf$/i, ''));
|
filenames.push(file.name.replace(/\.pdf$/i, ''));
|
||||||
pdfDoc.pages.forEach((page, pageIndex) => {
|
enhancedFile.pages.forEach((page, pageIndex) => {
|
||||||
// Create new page with updated IDs and page numbers for merged document
|
// Create new page with updated IDs and page numbers for merged document
|
||||||
const newPage: PDFPage = {
|
const newPage: PDFPage = {
|
||||||
...page,
|
...page,
|
||||||
id: `${fileIndex}-${page.id}`, // Unique ID across all files
|
id: `${fileIndex}-${page.id}`, // Unique ID across all files
|
||||||
pageNumber: totalPages + pageIndex + 1,
|
pageNumber: totalPages + pageIndex + 1,
|
||||||
sourceFile: file.name // Track which file this page came from
|
splitBefore: page.splitBefore || false
|
||||||
};
|
};
|
||||||
allPages.push(newPage);
|
allPages.push(newPage);
|
||||||
});
|
});
|
||||||
totalPages += pdfDoc.pages.length;
|
totalPages += enhancedFile.pages.length;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
const mergedDocument: PDFDocument = {
|
if (allPages.length > 0) {
|
||||||
pages: allPages,
|
const mergedDocument: PDFDocument = {
|
||||||
totalPages: totalPages,
|
id: `merged-${Date.now()}`,
|
||||||
title: filenames.join(' + '),
|
name: filenames.join(' + '),
|
||||||
metadata: {
|
file: null as any,
|
||||||
title: filenames.join(' + '),
|
pages: allPages,
|
||||||
createdAt: new Date().toISOString(),
|
totalPages: totalPages
|
||||||
modifiedAt: new Date().toISOString(),
|
};
|
||||||
|
|
||||||
|
setMergedPdfDocument(mergedDocument);
|
||||||
|
setFilename(filenames.join('_'));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, [activeFiles, enhancedProcessedFiles, convertToPageEditorFormat]);
|
||||||
|
|
||||||
|
// Handle file upload from FileUploadSelector
|
||||||
|
const handleMultipleFileUpload = useCallback((uploadedFiles: File[]) => {
|
||||||
|
if (!uploadedFiles || uploadedFiles.length === 0) {
|
||||||
|
setStatus('No files provided');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simply set the activeFiles to the selected files (same as existing approach)
|
||||||
|
setActiveFiles(uploadedFiles);
|
||||||
|
setStatus(`Added ${uploadedFiles.length} file(s) for processing`);
|
||||||
|
}, [setActiveFiles]);
|
||||||
|
|
||||||
|
// Auto-merge documents when enhanced processing completes
|
||||||
|
useEffect(() => {
|
||||||
|
if (activeFiles.length > 0) {
|
||||||
|
const allProcessed = activeFiles.every(file => enhancedProcessedFiles.has(file));
|
||||||
|
|
||||||
|
if (allProcessed) {
|
||||||
|
mergeAllPDFs();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
setMergedPdfDocument(null);
|
||||||
|
}
|
||||||
|
}, [activeFiles, enhancedProcessedFiles, mergeAllPDFs]);
|
||||||
|
|
||||||
|
// Shared PDF instance for thumbnail generation
|
||||||
|
const [sharedPdfInstance, setSharedPdfInstance] = useState<any>(null);
|
||||||
|
const [thumbnailGenerationStarted, setThumbnailGenerationStarted] = useState(false);
|
||||||
|
|
||||||
|
// Session-based thumbnail cache with 1GB limit
|
||||||
|
const [thumbnailCache, setThumbnailCache] = useState<Map<string, { thumbnail: string; lastUsed: number; sizeBytes: number }>>(new Map());
|
||||||
|
const maxCacheSizeBytes = 1024 * 1024 * 1024; // 1GB cache limit
|
||||||
|
const [currentCacheSize, setCurrentCacheSize] = useState(0);
|
||||||
|
|
||||||
|
// Cache management functions
|
||||||
|
const addThumbnailToCache = useCallback((pageId: string, thumbnail: string) => {
|
||||||
|
const thumbnailSizeBytes = thumbnail.length * 0.75; // Rough base64 size estimate
|
||||||
|
|
||||||
|
setThumbnailCache(prev => {
|
||||||
|
const newCache = new Map(prev);
|
||||||
|
const now = Date.now();
|
||||||
|
|
||||||
|
// Add new thumbnail
|
||||||
|
newCache.set(pageId, {
|
||||||
|
thumbnail,
|
||||||
|
lastUsed: now,
|
||||||
|
sizeBytes: thumbnailSizeBytes
|
||||||
|
});
|
||||||
|
|
||||||
|
return newCache;
|
||||||
|
});
|
||||||
|
|
||||||
|
setCurrentCacheSize(prev => {
|
||||||
|
const newSize = prev + thumbnailSizeBytes;
|
||||||
|
|
||||||
|
// If we exceed 1GB, trigger cleanup
|
||||||
|
if (newSize > maxCacheSizeBytes) {
|
||||||
|
setTimeout(() => cleanupThumbnailCache(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return newSize;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Cached thumbnail for ${pageId} (${Math.round(thumbnailSizeBytes / 1024)}KB)`);
|
||||||
|
}, [maxCacheSizeBytes]);
|
||||||
|
|
||||||
|
const getThumbnailFromCache = useCallback((pageId: string): string | null => {
|
||||||
|
const cached = thumbnailCache.get(pageId);
|
||||||
|
if (!cached) return null;
|
||||||
|
|
||||||
|
// Update last used timestamp
|
||||||
|
setThumbnailCache(prev => {
|
||||||
|
const newCache = new Map(prev);
|
||||||
|
const entry = newCache.get(pageId);
|
||||||
|
if (entry) {
|
||||||
|
entry.lastUsed = Date.now();
|
||||||
|
}
|
||||||
|
return newCache;
|
||||||
|
});
|
||||||
|
|
||||||
|
return cached.thumbnail;
|
||||||
|
}, [thumbnailCache]);
|
||||||
|
|
||||||
|
const cleanupThumbnailCache = useCallback(() => {
|
||||||
|
setThumbnailCache(prev => {
|
||||||
|
const entries = Array.from(prev.entries());
|
||||||
|
|
||||||
|
// Sort by last used (oldest first)
|
||||||
|
entries.sort(([, a], [, b]) => a.lastUsed - b.lastUsed);
|
||||||
|
|
||||||
|
const newCache = new Map();
|
||||||
|
let newSize = 0;
|
||||||
|
const targetSize = maxCacheSizeBytes * 0.8; // Clean to 80% of limit
|
||||||
|
|
||||||
|
// Keep most recently used entries until we hit target size
|
||||||
|
for (let i = entries.length - 1; i >= 0 && newSize < targetSize; i--) {
|
||||||
|
const [key, value] = entries[i];
|
||||||
|
newCache.set(key, value);
|
||||||
|
newSize += value.sizeBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
setCurrentCacheSize(newSize);
|
||||||
|
console.log(`Cleaned thumbnail cache: ${prev.size} → ${newCache.size} entries (${Math.round(newSize / 1024 / 1024)}MB)`);
|
||||||
|
|
||||||
|
return newCache;
|
||||||
|
});
|
||||||
|
}, [maxCacheSizeBytes]);
|
||||||
|
|
||||||
|
const clearThumbnailCache = useCallback(() => {
|
||||||
|
setThumbnailCache(new Map());
|
||||||
|
setCurrentCacheSize(0);
|
||||||
|
console.log('Cleared thumbnail cache');
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Start thumbnail generation process (separate from document loading)
|
||||||
|
const startThumbnailGeneration = useCallback(async () => {
|
||||||
|
if (!mergedPdfDocument || activeFiles.length !== 1 || thumbnailGenerationStarted) return;
|
||||||
|
|
||||||
|
const file = activeFiles[0];
|
||||||
|
const totalPages = mergedPdfDocument.totalPages;
|
||||||
|
|
||||||
|
console.log(`Starting thumbnail generation for ${totalPages} pages`);
|
||||||
|
setThumbnailGenerationStarted(true);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Load PDF ONCE for thumbnail generation (separate from document structure loading)
|
||||||
|
const arrayBuffer = await file.arrayBuffer();
|
||||||
|
const { getDocument } = await import('pdfjs-dist');
|
||||||
|
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||||
|
setSharedPdfInstance(pdf);
|
||||||
|
|
||||||
|
console.log('Shared PDF loaded, starting progressive thumbnail generation');
|
||||||
|
|
||||||
|
// Process pages in batches
|
||||||
|
let currentPage = 1;
|
||||||
|
const batchSize = totalPages > 500 ? 1 : 2; // Slower for massive files
|
||||||
|
const batchDelay = totalPages > 500 ? 300 : 200; // More delay for massive files
|
||||||
|
|
||||||
|
const processBatch = async () => {
|
||||||
|
const endPage = Math.min(currentPage + batchSize - 1, totalPages);
|
||||||
|
console.log(`Generating thumbnails for pages ${currentPage}-${endPage}`);
|
||||||
|
|
||||||
|
for (let i = currentPage; i <= endPage; i++) {
|
||||||
|
// Send the shared PDF instance and cache functions to components
|
||||||
|
window.dispatchEvent(new CustomEvent('generateThumbnail', {
|
||||||
|
detail: {
|
||||||
|
pageNumber: i,
|
||||||
|
sharedPdf: pdf,
|
||||||
|
getThumbnailFromCache,
|
||||||
|
addThumbnailToCache
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
currentPage += batchSize;
|
||||||
|
|
||||||
|
if (currentPage <= totalPages) {
|
||||||
|
setTimeout(processBatch, batchDelay);
|
||||||
|
} else {
|
||||||
|
console.log('Progressive thumbnail generation completed');
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
setMergedPdfDocument(mergedDocument);
|
// Start generating thumbnails immediately
|
||||||
setFilename(filenames.join('_'));
|
processBatch();
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to start thumbnail generation:', error);
|
||||||
|
setThumbnailGenerationStarted(false);
|
||||||
}
|
}
|
||||||
}, [activeFiles, processedFiles]);
|
}, [mergedPdfDocument, activeFiles, thumbnailGenerationStarted]);
|
||||||
|
|
||||||
// Auto-process files from activeFiles
|
// Start thumbnail generation after document loads and UI settles
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
console.log('Auto-processing effect triggered:', {
|
if (mergedPdfDocument && !thumbnailGenerationStarted) {
|
||||||
activeFilesCount: activeFiles.length,
|
// Small delay to let document render, then start thumbnail generation
|
||||||
processedFilesCount: processedFiles.size,
|
const timer = setTimeout(startThumbnailGeneration, 1000);
|
||||||
activeFileNames: activeFiles.map(f => f.name)
|
return () => clearTimeout(timer);
|
||||||
});
|
|
||||||
|
|
||||||
activeFiles.forEach(file => {
|
|
||||||
const fileKey = `${file.name}-${file.size}`;
|
|
||||||
console.log(`Checking file ${file.name}: processed =`, processedFiles.has(fileKey));
|
|
||||||
if (!processedFiles.has(fileKey)) {
|
|
||||||
console.log('Processing file:', file.name);
|
|
||||||
handleFileUpload(file);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}, [activeFiles, processedFiles, handleFileUpload]);
|
|
||||||
|
|
||||||
// Merge multiple PDF documents into one when all files are processed
|
|
||||||
useEffect(() => {
|
|
||||||
if (activeFiles.length > 0) {
|
|
||||||
const allProcessed = activeFiles.every(file => {
|
|
||||||
const fileKey = `${file.name}-${file.size}`;
|
|
||||||
return processedFiles.has(fileKey);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (allProcessed && activeFiles.length > 0) {
|
|
||||||
mergeAllPDFs();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}, [activeFiles, processedFiles, mergeAllPDFs]);
|
}, [mergedPdfDocument, startThumbnailGeneration, thumbnailGenerationStarted]);
|
||||||
|
|
||||||
|
// Cleanup shared PDF instance and cache when component unmounts or files change
|
||||||
|
useEffect(() => {
|
||||||
|
return () => {
|
||||||
|
if (sharedPdfInstance) {
|
||||||
|
sharedPdfInstance.destroy();
|
||||||
|
setSharedPdfInstance(null);
|
||||||
|
}
|
||||||
|
setThumbnailGenerationStarted(false);
|
||||||
|
clearThumbnailCache(); // Clear cache when leaving/changing documents
|
||||||
|
};
|
||||||
|
}, [activeFiles, clearThumbnailCache]);
|
||||||
|
|
||||||
// Clear selections when files change
|
// Clear selections when files change
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@ -275,7 +368,6 @@ const PageEditor = ({
|
|||||||
setSelectionMode(false);
|
setSelectionMode(false);
|
||||||
}, [activeFiles]);
|
}, [activeFiles]);
|
||||||
|
|
||||||
// Global drag cleanup to handle drops outside valid areas
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const handleGlobalDragEnd = () => {
|
const handleGlobalDragEnd = () => {
|
||||||
// Clean up drag state when drag operation ends anywhere
|
// Clean up drag state when drag operation ends anywhere
|
||||||
@ -286,7 +378,7 @@ const PageEditor = ({
|
|||||||
};
|
};
|
||||||
|
|
||||||
const handleGlobalDrop = (e: DragEvent) => {
|
const handleGlobalDrop = (e: DragEvent) => {
|
||||||
// Prevent default to avoid browser navigation on invalid drops
|
// Prevent default to handle invalid drops
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -702,7 +794,6 @@ const PageEditor = ({
|
|||||||
|
|
||||||
const closePdf = useCallback(() => {
|
const closePdf = useCallback(() => {
|
||||||
setActiveFiles([]);
|
setActiveFiles([]);
|
||||||
setProcessedFiles(new Map());
|
|
||||||
setMergedPdfDocument(null);
|
setMergedPdfDocument(null);
|
||||||
setSelectedPages([]);
|
setSelectedPages([]);
|
||||||
}, [setActiveFiles]);
|
}, [setActiveFiles]);
|
||||||
@ -749,31 +840,66 @@ const PageEditor = ({
|
|||||||
closePdf
|
closePdf
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
// Return early if no merged document - Homepage handles file selection
|
||||||
if (!mergedPdfDocument) {
|
if (!mergedPdfDocument) {
|
||||||
return (
|
return (
|
||||||
<Box pos="relative" h="100vh" style={{ overflow: 'auto' }}>
|
<Center h="100vh">
|
||||||
<LoadingOverlay visible={loading || pdfLoading} />
|
<LoadingOverlay visible={globalProcessing} />
|
||||||
|
{globalProcessing ? (
|
||||||
<Container size="lg" p="xl" h="100%" style={{ display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
|
<Text c="dimmed">Processing PDF files...</Text>
|
||||||
<FileUploadSelector
|
) : (
|
||||||
title="Select PDFs to edit"
|
<Text c="dimmed">Waiting for PDF files...</Text>
|
||||||
subtitle="Choose files from storage or upload PDFs - multiple files will be merged"
|
)}
|
||||||
sharedFiles={sharedFiles}
|
</Center>
|
||||||
onFilesSelect={handleMultipleFileUpload}
|
|
||||||
accept={["application/pdf"]}
|
|
||||||
loading={loading || pdfLoading}
|
|
||||||
/>
|
|
||||||
</Container>
|
|
||||||
</Box>
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Box pos="relative" h="100vh" style={{ overflow: 'auto' }}>
|
<Box pos="relative" h="100vh" style={{ overflow: 'auto' }}>
|
||||||
<LoadingOverlay visible={loading || pdfLoading} />
|
<LoadingOverlay visible={globalProcessing && !mergedPdfDocument} />
|
||||||
|
|
||||||
|
|
||||||
<Box p="md" pt="xl">
|
<Box p="md" pt="xl">
|
||||||
|
{/* Enhanced Processing Status */}
|
||||||
|
{(globalProcessing || hasProcessingErrors) && (
|
||||||
|
<Box mb="md" p="sm" style={{ backgroundColor: 'var(--mantine-color-blue-0)', borderRadius: 8 }}>
|
||||||
|
{globalProcessing && (
|
||||||
|
<Group justify="space-between" mb="xs">
|
||||||
|
<Text size="sm" fw={500}>Processing files...</Text>
|
||||||
|
<Text size="sm" c="dimmed">{Math.round(processingProgress.overall)}%</Text>
|
||||||
|
</Group>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{Array.from(processingStates.values()).map(state => (
|
||||||
|
<Group key={state.fileKey} justify="space-between" mb={4}>
|
||||||
|
<Text size="xs">{state.fileName}</Text>
|
||||||
|
<Group gap="xs">
|
||||||
|
<Text size="xs" c="dimmed">{state.progress}%</Text>
|
||||||
|
{state.error && (
|
||||||
|
<Button
|
||||||
|
size="xs"
|
||||||
|
variant="light"
|
||||||
|
color="red"
|
||||||
|
onClick={() => {
|
||||||
|
// Show error details or retry
|
||||||
|
console.log('Processing error:', state.error);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Error
|
||||||
|
</Button>
|
||||||
|
)}
|
||||||
|
</Group>
|
||||||
|
</Group>
|
||||||
|
))}
|
||||||
|
|
||||||
|
{hasProcessingErrors && (
|
||||||
|
<Text size="xs" c="red" mt="xs">
|
||||||
|
Some files failed to process. Check individual file status above.
|
||||||
|
</Text>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
|
||||||
<Group mb="md">
|
<Group mb="md">
|
||||||
<TextInput
|
<TextInput
|
||||||
value={filename}
|
value={filename}
|
||||||
@ -834,6 +960,7 @@ const PageEditor = ({
|
|||||||
page={page}
|
page={page}
|
||||||
index={index}
|
index={index}
|
||||||
totalPages={mergedPdfDocument.pages.length}
|
totalPages={mergedPdfDocument.pages.length}
|
||||||
|
originalFile={activeFiles.length === 1 ? activeFiles[0] : undefined}
|
||||||
selectedPages={selectedPages}
|
selectedPages={selectedPages}
|
||||||
selectionMode={selectionMode}
|
selectionMode={selectionMode}
|
||||||
draggedPage={draggedPage}
|
draggedPage={draggedPage}
|
||||||
@ -930,12 +1057,6 @@ const PageEditor = ({
|
|||||||
)}
|
)}
|
||||||
</Modal>
|
</Modal>
|
||||||
|
|
||||||
<FileInput
|
|
||||||
ref={fileInputRef}
|
|
||||||
accept="application/pdf"
|
|
||||||
onChange={(file) => file && handleFileUpload(file)}
|
|
||||||
style={{ display: 'none' }}
|
|
||||||
/>
|
|
||||||
|
|
||||||
{status && (
|
{status && (
|
||||||
<Notification
|
<Notification
|
||||||
@ -947,18 +1068,6 @@ const PageEditor = ({
|
|||||||
{status}
|
{status}
|
||||||
</Notification>
|
</Notification>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{error && (
|
|
||||||
<Notification
|
|
||||||
color="red"
|
|
||||||
mt="md"
|
|
||||||
onClose={() => setError(null)}
|
|
||||||
style={{ position: 'fixed', bottom: 70, right: 20, zIndex: 1000 }}
|
|
||||||
>
|
|
||||||
{error}
|
|
||||||
</Notification>
|
|
||||||
)}
|
|
||||||
|
|
||||||
</Box>
|
</Box>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import React, { useCallback } from 'react';
|
import React, { useCallback, useState, useEffect, useRef } from 'react';
|
||||||
import { Text, Checkbox, Tooltip, ActionIcon } from '@mantine/core';
|
import { Text, Checkbox, Tooltip, ActionIcon, Loader } from '@mantine/core';
|
||||||
import ArrowBackIcon from '@mui/icons-material/ArrowBack';
|
import ArrowBackIcon from '@mui/icons-material/ArrowBack';
|
||||||
import ArrowForwardIcon from '@mui/icons-material/ArrowForward';
|
import ArrowForwardIcon from '@mui/icons-material/ArrowForward';
|
||||||
import RotateLeftIcon from '@mui/icons-material/RotateLeft';
|
import RotateLeftIcon from '@mui/icons-material/RotateLeft';
|
||||||
@ -9,11 +9,18 @@ import ContentCutIcon from '@mui/icons-material/ContentCut';
|
|||||||
import DragIndicatorIcon from '@mui/icons-material/DragIndicator';
|
import DragIndicatorIcon from '@mui/icons-material/DragIndicator';
|
||||||
import { PDFPage } from '../../../types/pageEditor';
|
import { PDFPage } from '../../../types/pageEditor';
|
||||||
import styles from './PageEditor.module.css';
|
import styles from './PageEditor.module.css';
|
||||||
|
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
|
||||||
|
|
||||||
|
// Ensure PDF.js worker is available
|
||||||
|
if (!GlobalWorkerOptions.workerSrc) {
|
||||||
|
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
|
||||||
|
}
|
||||||
|
|
||||||
interface PageThumbnailProps {
|
interface PageThumbnailProps {
|
||||||
page: PDFPage;
|
page: PDFPage;
|
||||||
index: number;
|
index: number;
|
||||||
totalPages: number;
|
totalPages: number;
|
||||||
|
originalFile?: File; // For lazy thumbnail generation
|
||||||
selectedPages: string[];
|
selectedPages: string[];
|
||||||
selectionMode: boolean;
|
selectionMode: boolean;
|
||||||
draggedPage: string | null;
|
draggedPage: string | null;
|
||||||
@ -43,6 +50,7 @@ const PageThumbnail = ({
|
|||||||
page,
|
page,
|
||||||
index,
|
index,
|
||||||
totalPages,
|
totalPages,
|
||||||
|
originalFile,
|
||||||
selectedPages,
|
selectedPages,
|
||||||
selectionMode,
|
selectionMode,
|
||||||
draggedPage,
|
draggedPage,
|
||||||
@ -67,6 +75,74 @@ const PageThumbnail = ({
|
|||||||
pdfDocument,
|
pdfDocument,
|
||||||
setPdfDocument,
|
setPdfDocument,
|
||||||
}: PageThumbnailProps) => {
|
}: PageThumbnailProps) => {
|
||||||
|
const [thumbnailUrl, setThumbnailUrl] = useState<string | null>(page.thumbnail);
|
||||||
|
const [isLoadingThumbnail, setIsLoadingThumbnail] = useState(false);
|
||||||
|
|
||||||
|
// Listen for progressive thumbnail generation events
|
||||||
|
useEffect(() => {
|
||||||
|
const handleThumbnailGeneration = (event: CustomEvent) => {
|
||||||
|
const { pageNumber, sharedPdf, getThumbnailFromCache, addThumbnailToCache } = event.detail;
|
||||||
|
if (pageNumber === page.pageNumber && !thumbnailUrl && !isLoadingThumbnail) {
|
||||||
|
|
||||||
|
// Check cache first
|
||||||
|
const cachedThumbnail = getThumbnailFromCache(page.id);
|
||||||
|
if (cachedThumbnail) {
|
||||||
|
console.log(`Using cached thumbnail for page ${page.pageNumber}`);
|
||||||
|
setThumbnailUrl(cachedThumbnail);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate new thumbnail and cache it
|
||||||
|
loadThumbnailFromSharedPdf(sharedPdf, addThumbnailToCache);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
window.addEventListener('generateThumbnail', handleThumbnailGeneration as EventListener);
|
||||||
|
return () => window.removeEventListener('generateThumbnail', handleThumbnailGeneration as EventListener);
|
||||||
|
}, [page.pageNumber, page.id, thumbnailUrl, isLoadingThumbnail]);
|
||||||
|
|
||||||
|
const loadThumbnailFromSharedPdf = async (sharedPdf: any, addThumbnailToCache?: (pageId: string, thumbnail: string) => void) => {
|
||||||
|
if (isLoadingThumbnail || thumbnailUrl) return;
|
||||||
|
|
||||||
|
setIsLoadingThumbnail(true);
|
||||||
|
try {
|
||||||
|
const thumbnail = await generateThumbnailFromPdf(sharedPdf);
|
||||||
|
|
||||||
|
// Cache the generated thumbnail
|
||||||
|
if (addThumbnailToCache) {
|
||||||
|
addThumbnailToCache(page.id, thumbnail);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Failed to load thumbnail for page ${page.pageNumber}:`, error);
|
||||||
|
} finally {
|
||||||
|
setIsLoadingThumbnail(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const generateThumbnailFromPdf = async (pdf: any): Promise<string> => {
|
||||||
|
const pdfPage = await pdf.getPage(page.pageNumber);
|
||||||
|
const scale = 0.2; // Low quality for page editor
|
||||||
|
const viewport = pdfPage.getViewport({ scale });
|
||||||
|
|
||||||
|
const canvas = document.createElement('canvas');
|
||||||
|
canvas.width = viewport.width;
|
||||||
|
canvas.height = viewport.height;
|
||||||
|
|
||||||
|
const context = canvas.getContext('2d');
|
||||||
|
if (!context) {
|
||||||
|
throw new Error('Could not get canvas context');
|
||||||
|
}
|
||||||
|
|
||||||
|
await pdfPage.render({ canvasContext: context, viewport }).promise;
|
||||||
|
const thumbnail = canvas.toDataURL('image/jpeg', 0.8);
|
||||||
|
|
||||||
|
setThumbnailUrl(thumbnail);
|
||||||
|
console.log(`Thumbnail generated for page ${page.pageNumber}`);
|
||||||
|
|
||||||
|
return thumbnail;
|
||||||
|
};
|
||||||
|
|
||||||
// Register this component with pageRefs for animations
|
// Register this component with pageRefs for animations
|
||||||
const pageElementRef = useCallback((element: HTMLDivElement | null) => {
|
const pageElementRef = useCallback((element: HTMLDivElement | null) => {
|
||||||
if (element) {
|
if (element) {
|
||||||
@ -162,18 +238,30 @@ const PageThumbnail = ({
|
|||||||
justifyContent: 'center'
|
justifyContent: 'center'
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<img
|
{thumbnailUrl ? (
|
||||||
src={page.thumbnail}
|
<img
|
||||||
alt={`Page ${page.pageNumber}`}
|
src={thumbnailUrl}
|
||||||
style={{
|
alt={`Page ${page.pageNumber}`}
|
||||||
maxWidth: '100%',
|
style={{
|
||||||
maxHeight: '100%',
|
maxWidth: '100%',
|
||||||
objectFit: 'contain',
|
maxHeight: '100%',
|
||||||
borderRadius: 2,
|
objectFit: 'contain',
|
||||||
transform: `rotate(${page.rotation}deg)`,
|
borderRadius: 2,
|
||||||
transition: 'transform 0.3s ease-in-out'
|
transform: `rotate(${page.rotation}deg)`,
|
||||||
}}
|
transition: 'transform 0.3s ease-in-out'
|
||||||
/>
|
}}
|
||||||
|
/>
|
||||||
|
) : isLoadingThumbnail ? (
|
||||||
|
<div style={{ textAlign: 'center' }}>
|
||||||
|
<Loader size="sm" />
|
||||||
|
<Text size="xs" c="dimmed" mt={4}>Loading...</Text>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div style={{ textAlign: 'center' }}>
|
||||||
|
<Text size="lg" c="dimmed">📄</Text>
|
||||||
|
<Text size="xs" c="dimmed" mt={4}>Page {page.pageNumber}</Text>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<Text
|
<Text
|
||||||
|
288
frontend/src/hooks/useEnhancedProcessedFiles.ts
Normal file
288
frontend/src/hooks/useEnhancedProcessedFiles.ts
Normal file
@ -0,0 +1,288 @@
|
|||||||
|
import { useState, useEffect } from 'react';
|
||||||
|
import { ProcessedFile, ProcessingState, ProcessingConfig } from '../types/processing';
|
||||||
|
import { enhancedPDFProcessingService } from '../services/enhancedPDFProcessingService';
|
||||||
|
import { FileHasher } from '../utils/fileHash';
|
||||||
|
|
||||||
|
interface UseEnhancedProcessedFilesResult {
|
||||||
|
processedFiles: Map<File, ProcessedFile>;
|
||||||
|
processingStates: Map<string, ProcessingState>;
|
||||||
|
isProcessing: boolean;
|
||||||
|
hasProcessingErrors: boolean;
|
||||||
|
processingProgress: {
|
||||||
|
overall: number;
|
||||||
|
fileProgress: Map<string, number>;
|
||||||
|
estimatedTimeRemaining: number;
|
||||||
|
};
|
||||||
|
cacheStats: {
|
||||||
|
entries: number;
|
||||||
|
totalSizeBytes: number;
|
||||||
|
maxSizeBytes: number;
|
||||||
|
};
|
||||||
|
metrics: {
|
||||||
|
totalFiles: number;
|
||||||
|
completedFiles: number;
|
||||||
|
failedFiles: number;
|
||||||
|
averageProcessingTime: number;
|
||||||
|
cacheHitRate: number;
|
||||||
|
};
|
||||||
|
actions: {
|
||||||
|
cancelProcessing: (fileKey: string) => void;
|
||||||
|
retryProcessing: (file: File) => void;
|
||||||
|
clearCache: () => void;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useEnhancedProcessedFiles(
|
||||||
|
activeFiles: File[],
|
||||||
|
config?: Partial<ProcessingConfig>
|
||||||
|
): UseEnhancedProcessedFilesResult {
|
||||||
|
const [processedFiles, setProcessedFiles] = useState<Map<File, ProcessedFile>>(new Map());
|
||||||
|
const [processingStates, setProcessingStates] = useState<Map<string, ProcessingState>>(new Map());
|
||||||
|
|
||||||
|
// Subscribe to processing state changes once
|
||||||
|
useEffect(() => {
|
||||||
|
const unsubscribe = enhancedPDFProcessingService.onProcessingChange(setProcessingStates);
|
||||||
|
return unsubscribe;
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Process files when activeFiles changes
|
||||||
|
useEffect(() => {
|
||||||
|
if (activeFiles.length === 0) {
|
||||||
|
setProcessedFiles(new Map());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const processFiles = async () => {
|
||||||
|
const newProcessedFiles = new Map<File, ProcessedFile>();
|
||||||
|
|
||||||
|
for (const file of activeFiles) {
|
||||||
|
// Check if we already have this file processed
|
||||||
|
const existing = processedFiles.get(file);
|
||||||
|
if (existing) {
|
||||||
|
newProcessedFiles.set(file, existing);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Generate proper file key matching the service
|
||||||
|
const fileKey = await FileHasher.generateHybridHash(file);
|
||||||
|
console.log('Processing file:', file.name);
|
||||||
|
|
||||||
|
const processed = await enhancedPDFProcessingService.processFile(file, config);
|
||||||
|
if (processed) {
|
||||||
|
console.log('Got processed file for:', file.name);
|
||||||
|
newProcessedFiles.set(file, processed);
|
||||||
|
} else {
|
||||||
|
console.log('Processing started for:', file.name, '- waiting for completion');
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Failed to start processing for ${file.name}:`, error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update processed files if we have any
|
||||||
|
if (newProcessedFiles.size > 0) {
|
||||||
|
setProcessedFiles(newProcessedFiles);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
processFiles();
|
||||||
|
}, [activeFiles]);
|
||||||
|
|
||||||
|
// Listen for processing completion
|
||||||
|
useEffect(() => {
|
||||||
|
const checkForCompletedFiles = async () => {
|
||||||
|
let hasNewFiles = false;
|
||||||
|
const updatedFiles = new Map(processedFiles);
|
||||||
|
|
||||||
|
// Generate file keys for all files first
|
||||||
|
const fileKeyPromises = activeFiles.map(async (file) => ({
|
||||||
|
file,
|
||||||
|
key: await FileHasher.generateHybridHash(file)
|
||||||
|
}));
|
||||||
|
|
||||||
|
const fileKeyPairs = await Promise.all(fileKeyPromises);
|
||||||
|
|
||||||
|
for (const { file, key } of fileKeyPairs) {
|
||||||
|
// Only check files that don't have processed results yet
|
||||||
|
if (!updatedFiles.has(file)) {
|
||||||
|
const processingState = processingStates.get(key);
|
||||||
|
|
||||||
|
// Check for both processing and recently completed files
|
||||||
|
// This ensures we catch completed files before they're cleaned up
|
||||||
|
if (processingState?.status === 'processing' || processingState?.status === 'completed') {
|
||||||
|
try {
|
||||||
|
const processed = await enhancedPDFProcessingService.processFile(file, config);
|
||||||
|
if (processed) {
|
||||||
|
console.log('Processing completed for:', file.name);
|
||||||
|
updatedFiles.set(file, processed);
|
||||||
|
hasNewFiles = true;
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
// Ignore errors in completion check
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasNewFiles) {
|
||||||
|
setProcessedFiles(updatedFiles);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check every 500ms for completed processing
|
||||||
|
const interval = setInterval(checkForCompletedFiles, 500);
|
||||||
|
return () => clearInterval(interval);
|
||||||
|
}, [activeFiles, processingStates]);
|
||||||
|
|
||||||
|
|
||||||
|
// Cleanup when activeFiles changes
|
||||||
|
useEffect(() => {
|
||||||
|
const currentFiles = new Set(activeFiles);
|
||||||
|
const previousFiles = Array.from(processedFiles.keys());
|
||||||
|
const removedFiles = previousFiles.filter(file => !currentFiles.has(file));
|
||||||
|
|
||||||
|
if (removedFiles.length > 0) {
|
||||||
|
// Clean up processing service cache
|
||||||
|
enhancedPDFProcessingService.cleanup(removedFiles);
|
||||||
|
|
||||||
|
// Update local state
|
||||||
|
setProcessedFiles(prev => {
|
||||||
|
const updated = new Map();
|
||||||
|
for (const [file, processed] of prev) {
|
||||||
|
if (currentFiles.has(file)) {
|
||||||
|
updated.set(file, processed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return updated;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}, [activeFiles]);
|
||||||
|
|
||||||
|
// Calculate derived state
|
||||||
|
const isProcessing = processingStates.size > 0;
|
||||||
|
const hasProcessingErrors = Array.from(processingStates.values()).some(state => state.status === 'error');
|
||||||
|
|
||||||
|
// Calculate overall progress
|
||||||
|
const processingProgress = calculateProcessingProgress(processingStates);
|
||||||
|
|
||||||
|
// Get cache stats and metrics
|
||||||
|
const cacheStats = enhancedPDFProcessingService.getCacheStats();
|
||||||
|
const metrics = enhancedPDFProcessingService.getMetrics();
|
||||||
|
|
||||||
|
// Action handlers
|
||||||
|
const actions = {
|
||||||
|
cancelProcessing: (fileKey: string) => {
|
||||||
|
enhancedPDFProcessingService.cancelProcessing(fileKey);
|
||||||
|
},
|
||||||
|
|
||||||
|
retryProcessing: async (file: File) => {
|
||||||
|
try {
|
||||||
|
await enhancedPDFProcessingService.processFile(file, config);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Failed to retry processing for ${file.name}:`, error);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
clearCache: () => {
|
||||||
|
enhancedPDFProcessingService.clearAll();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
processedFiles,
|
||||||
|
processingStates,
|
||||||
|
isProcessing,
|
||||||
|
hasProcessingErrors,
|
||||||
|
processingProgress,
|
||||||
|
cacheStats,
|
||||||
|
metrics,
|
||||||
|
actions
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate overall processing progress from individual file states
|
||||||
|
*/
|
||||||
|
function calculateProcessingProgress(states: Map<string, ProcessingState>): {
|
||||||
|
overall: number;
|
||||||
|
fileProgress: Map<string, number>;
|
||||||
|
estimatedTimeRemaining: number;
|
||||||
|
} {
|
||||||
|
if (states.size === 0) {
|
||||||
|
return {
|
||||||
|
overall: 100,
|
||||||
|
fileProgress: new Map(),
|
||||||
|
estimatedTimeRemaining: 0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const fileProgress = new Map<string, number>();
|
||||||
|
let totalProgress = 0;
|
||||||
|
let totalEstimatedTime = 0;
|
||||||
|
|
||||||
|
for (const [fileKey, state] of states) {
|
||||||
|
fileProgress.set(fileKey, state.progress);
|
||||||
|
totalProgress += state.progress;
|
||||||
|
totalEstimatedTime += state.estimatedTimeRemaining || 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const overall = totalProgress / states.size;
|
||||||
|
const estimatedTimeRemaining = totalEstimatedTime;
|
||||||
|
|
||||||
|
return {
|
||||||
|
overall,
|
||||||
|
fileProgress,
|
||||||
|
estimatedTimeRemaining
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hook for getting a single processed file with enhanced features
|
||||||
|
*/
|
||||||
|
export function useEnhancedProcessedFile(
|
||||||
|
file: File | null,
|
||||||
|
config?: Partial<ProcessingConfig>
|
||||||
|
): {
|
||||||
|
processedFile: ProcessedFile | null;
|
||||||
|
isProcessing: boolean;
|
||||||
|
processingState: ProcessingState | null;
|
||||||
|
error: string | null;
|
||||||
|
canRetry: boolean;
|
||||||
|
actions: {
|
||||||
|
cancel: () => void;
|
||||||
|
retry: () => void;
|
||||||
|
};
|
||||||
|
} {
|
||||||
|
const result = useEnhancedProcessedFiles(file ? [file] : [], config);
|
||||||
|
|
||||||
|
const processedFile = file ? result.processedFiles.get(file) || null : null;
|
||||||
|
// Note: This is async but we can't await in hook return - consider refactoring if needed
|
||||||
|
const fileKey = file ? '' : ''; // TODO: Handle async file key generation
|
||||||
|
const processingState = fileKey ? result.processingStates.get(fileKey) || null : null;
|
||||||
|
const isProcessing = !!processingState;
|
||||||
|
const error = processingState?.error?.message || null;
|
||||||
|
const canRetry = processingState?.error?.recoverable || false;
|
||||||
|
|
||||||
|
const actions = {
|
||||||
|
cancel: () => {
|
||||||
|
if (fileKey) {
|
||||||
|
result.actions.cancelProcessing(fileKey);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
retry: () => {
|
||||||
|
if (file) {
|
||||||
|
result.actions.retryProcessing(file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
processedFile,
|
||||||
|
isProcessing,
|
||||||
|
processingState,
|
||||||
|
error,
|
||||||
|
canRetry,
|
||||||
|
actions
|
||||||
|
};
|
||||||
|
}
|
@ -50,18 +50,28 @@ export function usePDFProcessor() {
|
|||||||
|
|
||||||
const pages: PDFPage[] = [];
|
const pages: PDFPage[] = [];
|
||||||
|
|
||||||
// Generate thumbnails for all pages
|
// Create pages without thumbnails initially - load them lazily
|
||||||
for (let i = 1; i <= totalPages; i++) {
|
for (let i = 1; i <= totalPages; i++) {
|
||||||
const thumbnail = await generatePageThumbnail(file, i);
|
|
||||||
pages.push({
|
pages.push({
|
||||||
id: `${file.name}-page-${i}`,
|
id: `${file.name}-page-${i}`,
|
||||||
pageNumber: i,
|
pageNumber: i,
|
||||||
thumbnail,
|
thumbnail: null, // Will be loaded lazily
|
||||||
rotation: 0,
|
rotation: 0,
|
||||||
selected: false
|
selected: false
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Generate thumbnails for first 10 pages immediately for better UX
|
||||||
|
const priorityPages = Math.min(10, totalPages);
|
||||||
|
for (let i = 1; i <= priorityPages; i++) {
|
||||||
|
try {
|
||||||
|
const thumbnail = await generatePageThumbnail(file, i);
|
||||||
|
pages[i - 1].thumbnail = thumbnail;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn(`Failed to generate thumbnail for page ${i}:`, error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Clean up
|
// Clean up
|
||||||
pdf.destroy();
|
pdf.destroy();
|
||||||
|
|
||||||
|
125
frontend/src/hooks/useProcessedFiles.ts
Normal file
125
frontend/src/hooks/useProcessedFiles.ts
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
import { useState, useEffect } from 'react';
|
||||||
|
import { ProcessedFile, ProcessingState } from '../types/processing';
|
||||||
|
import { pdfProcessingService } from '../services/pdfProcessingService';
|
||||||
|
|
||||||
|
interface UseProcessedFilesResult {
|
||||||
|
processedFiles: Map<File, ProcessedFile>;
|
||||||
|
processingStates: Map<string, ProcessingState>;
|
||||||
|
isProcessing: boolean;
|
||||||
|
hasProcessingErrors: boolean;
|
||||||
|
cacheStats: {
|
||||||
|
entries: number;
|
||||||
|
totalSizeBytes: number;
|
||||||
|
maxSizeBytes: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useProcessedFiles(activeFiles: File[]): UseProcessedFilesResult {
|
||||||
|
const [processedFiles, setProcessedFiles] = useState<Map<File, ProcessedFile>>(new Map());
|
||||||
|
const [processingStates, setProcessingStates] = useState<Map<string, ProcessingState>>(new Map());
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
// Subscribe to processing state changes
|
||||||
|
const unsubscribe = pdfProcessingService.onProcessingChange(setProcessingStates);
|
||||||
|
|
||||||
|
// Check/start processing for each active file
|
||||||
|
const checkProcessing = async () => {
|
||||||
|
const newProcessedFiles = new Map<File, ProcessedFile>();
|
||||||
|
|
||||||
|
for (const file of activeFiles) {
|
||||||
|
const processed = await pdfProcessingService.getProcessedFile(file);
|
||||||
|
if (processed) {
|
||||||
|
newProcessedFiles.set(file, processed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setProcessedFiles(newProcessedFiles);
|
||||||
|
};
|
||||||
|
|
||||||
|
checkProcessing();
|
||||||
|
|
||||||
|
return unsubscribe;
|
||||||
|
}, [activeFiles]);
|
||||||
|
|
||||||
|
// Listen for processing completion and update processed files
|
||||||
|
useEffect(() => {
|
||||||
|
const updateProcessedFiles = async () => {
|
||||||
|
const updated = new Map<File, ProcessedFile>();
|
||||||
|
|
||||||
|
for (const file of activeFiles) {
|
||||||
|
const existing = processedFiles.get(file);
|
||||||
|
if (existing) {
|
||||||
|
updated.set(file, existing);
|
||||||
|
} else {
|
||||||
|
// Check if processing just completed
|
||||||
|
const processed = await pdfProcessingService.getProcessedFile(file);
|
||||||
|
if (processed) {
|
||||||
|
updated.set(file, processed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setProcessedFiles(updated);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Small delay to allow processing state to settle
|
||||||
|
const timeoutId = setTimeout(updateProcessedFiles, 100);
|
||||||
|
return () => clearTimeout(timeoutId);
|
||||||
|
}, [processingStates, activeFiles]);
|
||||||
|
|
||||||
|
// Cleanup when activeFiles changes
|
||||||
|
useEffect(() => {
|
||||||
|
const currentFiles = new Set(activeFiles);
|
||||||
|
const previousFiles = Array.from(processedFiles.keys());
|
||||||
|
const removedFiles = previousFiles.filter(file => !currentFiles.has(file));
|
||||||
|
|
||||||
|
if (removedFiles.length > 0) {
|
||||||
|
// Clean up processing service cache
|
||||||
|
pdfProcessingService.cleanup(removedFiles);
|
||||||
|
|
||||||
|
// Update local state
|
||||||
|
setProcessedFiles(prev => {
|
||||||
|
const updated = new Map();
|
||||||
|
for (const [file, processed] of prev) {
|
||||||
|
if (currentFiles.has(file)) {
|
||||||
|
updated.set(file, processed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return updated;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}, [activeFiles]);
|
||||||
|
|
||||||
|
// Derived state
|
||||||
|
const isProcessing = processingStates.size > 0;
|
||||||
|
const hasProcessingErrors = Array.from(processingStates.values()).some(state => state.status === 'error');
|
||||||
|
const cacheStats = pdfProcessingService.getCacheStats();
|
||||||
|
|
||||||
|
return {
|
||||||
|
processedFiles,
|
||||||
|
processingStates,
|
||||||
|
isProcessing,
|
||||||
|
hasProcessingErrors,
|
||||||
|
cacheStats
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hook for getting a single processed file
|
||||||
|
export function useProcessedFile(file: File | null): {
|
||||||
|
processedFile: ProcessedFile | null;
|
||||||
|
isProcessing: boolean;
|
||||||
|
processingState: ProcessingState | null;
|
||||||
|
} {
|
||||||
|
const result = useProcessedFiles(file ? [file] : []);
|
||||||
|
|
||||||
|
const processedFile = file ? result.processedFiles.get(file) || null : null;
|
||||||
|
const fileKey = file ? pdfProcessingService.generateFileKey(file) : '';
|
||||||
|
const processingState = fileKey ? result.processingStates.get(fileKey) || null : null;
|
||||||
|
const isProcessing = !!processingState;
|
||||||
|
|
||||||
|
return {
|
||||||
|
processedFile,
|
||||||
|
isProcessing,
|
||||||
|
processingState
|
||||||
|
};
|
||||||
|
}
|
552
frontend/src/services/enhancedPDFProcessingService.ts
Normal file
552
frontend/src/services/enhancedPDFProcessingService.ts
Normal file
@ -0,0 +1,552 @@
|
|||||||
|
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
|
||||||
|
import { ProcessedFile, ProcessingState, PDFPage, ProcessingStrategy, ProcessingConfig, ProcessingMetrics } from '../types/processing';
|
||||||
|
import { ProcessingCache } from './processingCache';
|
||||||
|
import { FileHasher } from '../utils/fileHash';
|
||||||
|
import { FileAnalyzer } from './fileAnalyzer';
|
||||||
|
import { ProcessingErrorHandler } from './processingErrorHandler';
|
||||||
|
|
||||||
|
// Set up PDF.js worker
|
||||||
|
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
|
||||||
|
|
||||||
|
export class EnhancedPDFProcessingService {
|
||||||
|
private static instance: EnhancedPDFProcessingService;
|
||||||
|
private cache = new ProcessingCache();
|
||||||
|
private processing = new Map<string, ProcessingState>();
|
||||||
|
private processingListeners = new Set<(states: Map<string, ProcessingState>) => void>();
|
||||||
|
private metrics: ProcessingMetrics = {
|
||||||
|
totalFiles: 0,
|
||||||
|
completedFiles: 0,
|
||||||
|
failedFiles: 0,
|
||||||
|
averageProcessingTime: 0,
|
||||||
|
cacheHitRate: 0,
|
||||||
|
memoryUsage: 0
|
||||||
|
};
|
||||||
|
|
||||||
|
private defaultConfig: ProcessingConfig = {
|
||||||
|
strategy: 'immediate_full',
|
||||||
|
chunkSize: 20,
|
||||||
|
thumbnailQuality: 'medium',
|
||||||
|
priorityPageCount: 10,
|
||||||
|
useWebWorker: false,
|
||||||
|
maxRetries: 3,
|
||||||
|
timeoutMs: 300000 // 5 minutes
|
||||||
|
};
|
||||||
|
|
||||||
|
private constructor() {}
|
||||||
|
|
||||||
|
static getInstance(): EnhancedPDFProcessingService {
|
||||||
|
if (!EnhancedPDFProcessingService.instance) {
|
||||||
|
EnhancedPDFProcessingService.instance = new EnhancedPDFProcessingService();
|
||||||
|
}
|
||||||
|
return EnhancedPDFProcessingService.instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process a file with intelligent strategy selection
|
||||||
|
*/
|
||||||
|
async processFile(file: File, customConfig?: Partial<ProcessingConfig>): Promise<ProcessedFile | null> {
|
||||||
|
const fileKey = await this.generateFileKey(file);
|
||||||
|
|
||||||
|
// Check cache first
|
||||||
|
const cached = this.cache.get(fileKey);
|
||||||
|
if (cached) {
|
||||||
|
console.log('Cache hit for:', file.name);
|
||||||
|
this.updateMetrics('cacheHit');
|
||||||
|
return cached;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if already processing
|
||||||
|
if (this.processing.has(fileKey)) {
|
||||||
|
console.log('Already processing:', file.name);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Analyze file to determine optimal strategy
|
||||||
|
const analysis = await FileAnalyzer.analyzeFile(file);
|
||||||
|
if (analysis.isCorrupted) {
|
||||||
|
throw new Error(`File ${file.name} appears to be corrupted`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create processing config
|
||||||
|
const config: ProcessingConfig = {
|
||||||
|
...this.defaultConfig,
|
||||||
|
strategy: analysis.recommendedStrategy,
|
||||||
|
...customConfig
|
||||||
|
};
|
||||||
|
|
||||||
|
// Start processing
|
||||||
|
this.startProcessing(file, fileKey, config, analysis.estimatedProcessingTime);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start processing a file with the specified configuration
|
||||||
|
*/
|
||||||
|
private async startProcessing(
|
||||||
|
file: File,
|
||||||
|
fileKey: string,
|
||||||
|
config: ProcessingConfig,
|
||||||
|
estimatedTime: number
|
||||||
|
): Promise<void> {
|
||||||
|
// Create cancellation token
|
||||||
|
const cancellationToken = ProcessingErrorHandler.createTimeoutController(config.timeoutMs);
|
||||||
|
|
||||||
|
// Set initial state
|
||||||
|
const state: ProcessingState = {
|
||||||
|
fileKey,
|
||||||
|
fileName: file.name,
|
||||||
|
status: 'processing',
|
||||||
|
progress: 0,
|
||||||
|
strategy: config.strategy,
|
||||||
|
startedAt: Date.now(),
|
||||||
|
estimatedTimeRemaining: estimatedTime,
|
||||||
|
cancellationToken
|
||||||
|
};
|
||||||
|
|
||||||
|
this.processing.set(fileKey, state);
|
||||||
|
this.notifyListeners();
|
||||||
|
this.updateMetrics('started');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Execute processing with retry logic
|
||||||
|
const processedFile = await ProcessingErrorHandler.executeWithRetry(
|
||||||
|
() => this.executeProcessingStrategy(file, config, state),
|
||||||
|
(error) => {
|
||||||
|
state.error = error;
|
||||||
|
this.notifyListeners();
|
||||||
|
},
|
||||||
|
config.maxRetries
|
||||||
|
);
|
||||||
|
|
||||||
|
// Cache the result
|
||||||
|
this.cache.set(fileKey, processedFile);
|
||||||
|
|
||||||
|
// Update state to completed
|
||||||
|
state.status = 'completed';
|
||||||
|
state.progress = 100;
|
||||||
|
state.completedAt = Date.now();
|
||||||
|
this.notifyListeners();
|
||||||
|
this.updateMetrics('completed', Date.now() - state.startedAt);
|
||||||
|
|
||||||
|
// Remove from processing map after brief delay
|
||||||
|
setTimeout(() => {
|
||||||
|
this.processing.delete(fileKey);
|
||||||
|
this.notifyListeners();
|
||||||
|
}, 2000);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Processing failed for', file.name, ':', error);
|
||||||
|
|
||||||
|
const processingError = ProcessingErrorHandler.createProcessingError(error);
|
||||||
|
state.status = 'error';
|
||||||
|
state.error = processingError;
|
||||||
|
this.notifyListeners();
|
||||||
|
this.updateMetrics('failed');
|
||||||
|
|
||||||
|
// Remove failed processing after delay
|
||||||
|
setTimeout(() => {
|
||||||
|
this.processing.delete(fileKey);
|
||||||
|
this.notifyListeners();
|
||||||
|
}, 10000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute the actual processing based on strategy
|
||||||
|
*/
|
||||||
|
private async executeProcessingStrategy(
|
||||||
|
file: File,
|
||||||
|
config: ProcessingConfig,
|
||||||
|
state: ProcessingState
|
||||||
|
): Promise<ProcessedFile> {
|
||||||
|
switch (config.strategy) {
|
||||||
|
case 'immediate_full':
|
||||||
|
return this.processImmediateFull(file, config, state);
|
||||||
|
|
||||||
|
case 'priority_pages':
|
||||||
|
return this.processPriorityPages(file, config, state);
|
||||||
|
|
||||||
|
case 'progressive_chunked':
|
||||||
|
return this.processProgressiveChunked(file, config, state);
|
||||||
|
|
||||||
|
case 'metadata_only':
|
||||||
|
return this.processMetadataOnly(file, config, state);
|
||||||
|
|
||||||
|
default:
|
||||||
|
return this.processImmediateFull(file, config, state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process all pages immediately (for small files)
|
||||||
|
*/
|
||||||
|
private async processImmediateFull(
|
||||||
|
file: File,
|
||||||
|
config: ProcessingConfig,
|
||||||
|
state: ProcessingState
|
||||||
|
): Promise<ProcessedFile> {
|
||||||
|
const arrayBuffer = await file.arrayBuffer();
|
||||||
|
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||||
|
const totalPages = pdf.numPages;
|
||||||
|
|
||||||
|
state.progress = 10;
|
||||||
|
this.notifyListeners();
|
||||||
|
|
||||||
|
const pages: PDFPage[] = [];
|
||||||
|
|
||||||
|
for (let i = 1; i <= totalPages; i++) {
|
||||||
|
// Check for cancellation
|
||||||
|
if (state.cancellationToken?.signal.aborted) {
|
||||||
|
pdf.destroy();
|
||||||
|
throw new Error('Processing cancelled');
|
||||||
|
}
|
||||||
|
|
||||||
|
const page = await pdf.getPage(i);
|
||||||
|
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
|
||||||
|
|
||||||
|
pages.push({
|
||||||
|
id: `${file.name}-page-${i}`,
|
||||||
|
pageNumber: i,
|
||||||
|
thumbnail,
|
||||||
|
rotation: 0,
|
||||||
|
selected: false
|
||||||
|
});
|
||||||
|
|
||||||
|
// Update progress
|
||||||
|
state.progress = 10 + (i / totalPages) * 85;
|
||||||
|
state.currentPage = i;
|
||||||
|
this.notifyListeners();
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf.destroy();
|
||||||
|
state.progress = 100;
|
||||||
|
this.notifyListeners();
|
||||||
|
|
||||||
|
return this.createProcessedFile(file, pages, totalPages);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process priority pages first, then queue the rest
|
||||||
|
*/
|
||||||
|
private async processPriorityPages(
|
||||||
|
file: File,
|
||||||
|
config: ProcessingConfig,
|
||||||
|
state: ProcessingState
|
||||||
|
): Promise<ProcessedFile> {
|
||||||
|
const arrayBuffer = await file.arrayBuffer();
|
||||||
|
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||||
|
const totalPages = pdf.numPages;
|
||||||
|
|
||||||
|
state.progress = 10;
|
||||||
|
this.notifyListeners();
|
||||||
|
|
||||||
|
const pages: PDFPage[] = [];
|
||||||
|
const priorityCount = Math.min(config.priorityPageCount, totalPages);
|
||||||
|
|
||||||
|
// Process priority pages first
|
||||||
|
for (let i = 1; i <= priorityCount; i++) {
|
||||||
|
if (state.cancellationToken?.signal.aborted) {
|
||||||
|
pdf.destroy();
|
||||||
|
throw new Error('Processing cancelled');
|
||||||
|
}
|
||||||
|
|
||||||
|
const page = await pdf.getPage(i);
|
||||||
|
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
|
||||||
|
|
||||||
|
pages.push({
|
||||||
|
id: `${file.name}-page-${i}`,
|
||||||
|
pageNumber: i,
|
||||||
|
thumbnail,
|
||||||
|
rotation: 0,
|
||||||
|
selected: false
|
||||||
|
});
|
||||||
|
|
||||||
|
state.progress = 10 + (i / priorityCount) * 60;
|
||||||
|
state.currentPage = i;
|
||||||
|
this.notifyListeners();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create placeholder pages for remaining pages
|
||||||
|
for (let i = priorityCount + 1; i <= totalPages; i++) {
|
||||||
|
pages.push({
|
||||||
|
id: `${file.name}-page-${i}`,
|
||||||
|
pageNumber: i,
|
||||||
|
thumbnail: null, // Will be loaded lazily
|
||||||
|
rotation: 0,
|
||||||
|
selected: false
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf.destroy();
|
||||||
|
state.progress = 100;
|
||||||
|
this.notifyListeners();
|
||||||
|
|
||||||
|
// Queue background processing for remaining pages (only if there are any)
|
||||||
|
if (priorityCount < totalPages) {
|
||||||
|
this.queueBackgroundProcessing(file, priorityCount + 1, totalPages);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.createProcessedFile(file, pages, totalPages);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process in chunks with breaks between chunks
|
||||||
|
*/
|
||||||
|
private async processProgressiveChunked(
|
||||||
|
file: File,
|
||||||
|
config: ProcessingConfig,
|
||||||
|
state: ProcessingState
|
||||||
|
): Promise<ProcessedFile> {
|
||||||
|
const arrayBuffer = await file.arrayBuffer();
|
||||||
|
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||||
|
const totalPages = pdf.numPages;
|
||||||
|
|
||||||
|
state.progress = 10;
|
||||||
|
this.notifyListeners();
|
||||||
|
|
||||||
|
const pages: PDFPage[] = [];
|
||||||
|
const chunkSize = config.chunkSize;
|
||||||
|
let processedPages = 0;
|
||||||
|
|
||||||
|
// Process first chunk immediately
|
||||||
|
const firstChunkEnd = Math.min(chunkSize, totalPages);
|
||||||
|
|
||||||
|
for (let i = 1; i <= firstChunkEnd; i++) {
|
||||||
|
if (state.cancellationToken?.signal.aborted) {
|
||||||
|
pdf.destroy();
|
||||||
|
throw new Error('Processing cancelled');
|
||||||
|
}
|
||||||
|
|
||||||
|
const page = await pdf.getPage(i);
|
||||||
|
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
|
||||||
|
|
||||||
|
pages.push({
|
||||||
|
id: `${file.name}-page-${i}`,
|
||||||
|
pageNumber: i,
|
||||||
|
thumbnail,
|
||||||
|
rotation: 0,
|
||||||
|
selected: false
|
||||||
|
});
|
||||||
|
|
||||||
|
processedPages++;
|
||||||
|
state.progress = 10 + (processedPages / totalPages) * 70;
|
||||||
|
state.currentPage = i;
|
||||||
|
this.notifyListeners();
|
||||||
|
|
||||||
|
// Small delay to prevent UI blocking
|
||||||
|
if (i % 5 === 0) {
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 10));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create placeholders for remaining pages
|
||||||
|
for (let i = firstChunkEnd + 1; i <= totalPages; i++) {
|
||||||
|
pages.push({
|
||||||
|
id: `${file.name}-page-${i}`,
|
||||||
|
pageNumber: i,
|
||||||
|
thumbnail: null,
|
||||||
|
rotation: 0,
|
||||||
|
selected: false
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf.destroy();
|
||||||
|
state.progress = 100;
|
||||||
|
this.notifyListeners();
|
||||||
|
|
||||||
|
// Queue remaining chunks for background processing (only if there are any)
|
||||||
|
if (firstChunkEnd < totalPages) {
|
||||||
|
this.queueChunkedBackgroundProcessing(file, firstChunkEnd + 1, totalPages, chunkSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.createProcessedFile(file, pages, totalPages);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process metadata only (for very large files)
|
||||||
|
*/
|
||||||
|
private async processMetadataOnly(
|
||||||
|
file: File,
|
||||||
|
config: ProcessingConfig,
|
||||||
|
state: ProcessingState
|
||||||
|
): Promise<ProcessedFile> {
|
||||||
|
const arrayBuffer = await file.arrayBuffer();
|
||||||
|
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||||
|
const totalPages = pdf.numPages;
|
||||||
|
|
||||||
|
state.progress = 50;
|
||||||
|
this.notifyListeners();
|
||||||
|
|
||||||
|
// Create placeholder pages without thumbnails
|
||||||
|
const pages: PDFPage[] = [];
|
||||||
|
for (let i = 1; i <= totalPages; i++) {
|
||||||
|
pages.push({
|
||||||
|
id: `${file.name}-page-${i}`,
|
||||||
|
pageNumber: i,
|
||||||
|
thumbnail: null,
|
||||||
|
rotation: 0,
|
||||||
|
selected: false
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf.destroy();
|
||||||
|
state.progress = 100;
|
||||||
|
this.notifyListeners();
|
||||||
|
|
||||||
|
return this.createProcessedFile(file, pages, totalPages);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Render a page thumbnail with specified quality
|
||||||
|
*/
|
||||||
|
private async renderPageThumbnail(page: any, quality: 'low' | 'medium' | 'high'): Promise<string> {
|
||||||
|
const scales = { low: 0.2, medium: 0.5, high: 0.8 }; // Reduced low quality for page editor
|
||||||
|
const scale = scales[quality];
|
||||||
|
|
||||||
|
const viewport = page.getViewport({ scale });
|
||||||
|
const canvas = document.createElement('canvas');
|
||||||
|
canvas.width = viewport.width;
|
||||||
|
canvas.height = viewport.height;
|
||||||
|
|
||||||
|
const context = canvas.getContext('2d');
|
||||||
|
if (!context) {
|
||||||
|
throw new Error('Could not get canvas context');
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.render({ canvasContext: context, viewport }).promise;
|
||||||
|
return canvas.toDataURL('image/jpeg', 0.8); // Use JPEG for better compression
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a ProcessedFile object
|
||||||
|
*/
|
||||||
|
private createProcessedFile(file: File, pages: PDFPage[], totalPages: number): ProcessedFile {
|
||||||
|
return {
|
||||||
|
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
||||||
|
pages,
|
||||||
|
totalPages,
|
||||||
|
metadata: {
|
||||||
|
title: file.name,
|
||||||
|
createdAt: new Date().toISOString(),
|
||||||
|
modifiedAt: new Date().toISOString()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queue background processing for remaining pages
|
||||||
|
*/
|
||||||
|
private queueBackgroundProcessing(file: File, startPage: number, endPage: number): void {
|
||||||
|
// TODO: Implement background processing queue
|
||||||
|
console.log(`Queued background processing for ${file.name} pages ${startPage}-${endPage}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queue chunked background processing
|
||||||
|
*/
|
||||||
|
private queueChunkedBackgroundProcessing(file: File, startPage: number, endPage: number, chunkSize: number): void {
|
||||||
|
// TODO: Implement chunked background processing
|
||||||
|
console.log(`Queued chunked background processing for ${file.name} pages ${startPage}-${endPage} in chunks of ${chunkSize}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a unique, collision-resistant cache key
|
||||||
|
*/
|
||||||
|
private async generateFileKey(file: File): Promise<string> {
|
||||||
|
return await FileHasher.generateHybridHash(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cancel processing for a specific file
|
||||||
|
*/
|
||||||
|
cancelProcessing(fileKey: string): void {
|
||||||
|
const state = this.processing.get(fileKey);
|
||||||
|
if (state && state.cancellationToken) {
|
||||||
|
state.cancellationToken.abort();
|
||||||
|
state.status = 'cancelled';
|
||||||
|
this.notifyListeners();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update processing metrics
|
||||||
|
*/
|
||||||
|
private updateMetrics(event: 'started' | 'completed' | 'failed' | 'cacheHit', processingTime?: number): void {
|
||||||
|
switch (event) {
|
||||||
|
case 'started':
|
||||||
|
this.metrics.totalFiles++;
|
||||||
|
break;
|
||||||
|
case 'completed':
|
||||||
|
this.metrics.completedFiles++;
|
||||||
|
if (processingTime) {
|
||||||
|
// Update rolling average
|
||||||
|
const totalProcessingTime = this.metrics.averageProcessingTime * (this.metrics.completedFiles - 1) + processingTime;
|
||||||
|
this.metrics.averageProcessingTime = totalProcessingTime / this.metrics.completedFiles;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'failed':
|
||||||
|
this.metrics.failedFiles++;
|
||||||
|
break;
|
||||||
|
case 'cacheHit':
|
||||||
|
// Update cache hit rate
|
||||||
|
const totalAttempts = this.metrics.totalFiles + 1;
|
||||||
|
this.metrics.cacheHitRate = (this.metrics.cacheHitRate * this.metrics.totalFiles + 1) / totalAttempts;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get processing metrics
|
||||||
|
*/
|
||||||
|
getMetrics(): ProcessingMetrics {
|
||||||
|
return { ...this.metrics };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* State subscription for components
|
||||||
|
*/
|
||||||
|
onProcessingChange(callback: (states: Map<string, ProcessingState>) => void): () => void {
|
||||||
|
this.processingListeners.add(callback);
|
||||||
|
return () => this.processingListeners.delete(callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
getProcessingStates(): Map<string, ProcessingState> {
|
||||||
|
return new Map(this.processing);
|
||||||
|
}
|
||||||
|
|
||||||
|
private notifyListeners(): void {
|
||||||
|
this.processingListeners.forEach(callback => callback(this.processing));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleanup method for removed files
|
||||||
|
*/
|
||||||
|
cleanup(removedFiles: File[]): void {
|
||||||
|
removedFiles.forEach(async (file) => {
|
||||||
|
const key = await this.generateFileKey(file);
|
||||||
|
this.cache.delete(key);
|
||||||
|
this.cancelProcessing(key);
|
||||||
|
this.processing.delete(key);
|
||||||
|
});
|
||||||
|
this.notifyListeners();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get cache statistics
|
||||||
|
*/
|
||||||
|
getCacheStats() {
|
||||||
|
return this.cache.getStats();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear all cache and processing
|
||||||
|
*/
|
||||||
|
clearAll(): void {
|
||||||
|
this.cache.clear();
|
||||||
|
this.processing.clear();
|
||||||
|
this.notifyListeners();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export singleton instance
|
||||||
|
export const enhancedPDFProcessingService = EnhancedPDFProcessingService.getInstance();
|
240
frontend/src/services/fileAnalyzer.ts
Normal file
240
frontend/src/services/fileAnalyzer.ts
Normal file
@ -0,0 +1,240 @@
|
|||||||
|
import { getDocument } from 'pdfjs-dist';
|
||||||
|
import { FileAnalysis, ProcessingStrategy } from '../types/processing';
|
||||||
|
|
||||||
|
export class FileAnalyzer {
|
||||||
|
private static readonly SIZE_THRESHOLDS = {
|
||||||
|
SMALL: 10 * 1024 * 1024, // 10MB
|
||||||
|
MEDIUM: 50 * 1024 * 1024, // 50MB
|
||||||
|
LARGE: 200 * 1024 * 1024, // 200MB
|
||||||
|
};
|
||||||
|
|
||||||
|
private static readonly PAGE_THRESHOLDS = {
|
||||||
|
FEW: 10, // < 10 pages - immediate full processing
|
||||||
|
MANY: 50, // < 50 pages - priority pages
|
||||||
|
MASSIVE: 100, // < 100 pages - progressive chunked
|
||||||
|
// >100 pages = metadata only
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze a file to determine optimal processing strategy
|
||||||
|
*/
|
||||||
|
static async analyzeFile(file: File): Promise<FileAnalysis> {
|
||||||
|
const analysis: FileAnalysis = {
|
||||||
|
fileSize: file.size,
|
||||||
|
isEncrypted: false,
|
||||||
|
isCorrupted: false,
|
||||||
|
recommendedStrategy: 'metadata_only',
|
||||||
|
estimatedProcessingTime: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Quick validation and page count estimation
|
||||||
|
const quickAnalysis = await this.quickPDFAnalysis(file);
|
||||||
|
analysis.estimatedPageCount = quickAnalysis.pageCount;
|
||||||
|
analysis.isEncrypted = quickAnalysis.isEncrypted;
|
||||||
|
analysis.isCorrupted = quickAnalysis.isCorrupted;
|
||||||
|
|
||||||
|
// Determine strategy based on file characteristics
|
||||||
|
analysis.recommendedStrategy = this.determineStrategy(file.size, quickAnalysis.pageCount);
|
||||||
|
|
||||||
|
// Estimate processing time
|
||||||
|
analysis.estimatedProcessingTime = this.estimateProcessingTime(
|
||||||
|
file.size,
|
||||||
|
quickAnalysis.pageCount,
|
||||||
|
analysis.recommendedStrategy
|
||||||
|
);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('File analysis failed:', error);
|
||||||
|
analysis.isCorrupted = true;
|
||||||
|
analysis.recommendedStrategy = 'metadata_only';
|
||||||
|
}
|
||||||
|
|
||||||
|
return analysis;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Quick PDF analysis without full processing
|
||||||
|
*/
|
||||||
|
private static async quickPDFAnalysis(file: File): Promise<{
|
||||||
|
pageCount: number;
|
||||||
|
isEncrypted: boolean;
|
||||||
|
isCorrupted: boolean;
|
||||||
|
}> {
|
||||||
|
try {
|
||||||
|
// For small files, read the whole file
|
||||||
|
// For large files, try the whole file first (PDF.js needs the complete structure)
|
||||||
|
const arrayBuffer = await file.arrayBuffer();
|
||||||
|
|
||||||
|
const pdf = await getDocument({
|
||||||
|
data: arrayBuffer,
|
||||||
|
stopAtErrors: false, // Don't stop at minor errors
|
||||||
|
verbosity: 0 // Suppress PDF.js warnings
|
||||||
|
}).promise;
|
||||||
|
|
||||||
|
const pageCount = pdf.numPages;
|
||||||
|
const isEncrypted = pdf.isEncrypted;
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
pdf.destroy();
|
||||||
|
|
||||||
|
return {
|
||||||
|
pageCount,
|
||||||
|
isEncrypted,
|
||||||
|
isCorrupted: false
|
||||||
|
};
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
// Try to determine if it's corruption vs encryption
|
||||||
|
const errorMessage = error instanceof Error ? error.message.toLowerCase() : '';
|
||||||
|
const isEncrypted = errorMessage.includes('password') || errorMessage.includes('encrypted');
|
||||||
|
|
||||||
|
return {
|
||||||
|
pageCount: 0,
|
||||||
|
isEncrypted,
|
||||||
|
isCorrupted: !isEncrypted // If not encrypted, probably corrupted
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine the best processing strategy based on file characteristics
|
||||||
|
*/
|
||||||
|
private static determineStrategy(fileSize: number, pageCount?: number): ProcessingStrategy {
|
||||||
|
// Handle corrupted or encrypted files
|
||||||
|
if (!pageCount || pageCount === 0) {
|
||||||
|
return 'metadata_only';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Small files with few pages - process everything immediately
|
||||||
|
if (fileSize <= this.SIZE_THRESHOLDS.SMALL && pageCount <= this.PAGE_THRESHOLDS.FEW) {
|
||||||
|
return 'immediate_full';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Medium files or many pages - priority pages first, then progressive
|
||||||
|
if (fileSize <= this.SIZE_THRESHOLDS.MEDIUM && pageCount <= this.PAGE_THRESHOLDS.MANY) {
|
||||||
|
return 'priority_pages';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Large files or massive page counts - chunked processing
|
||||||
|
if (fileSize <= this.SIZE_THRESHOLDS.LARGE && pageCount <= this.PAGE_THRESHOLDS.MASSIVE) {
|
||||||
|
return 'progressive_chunked';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Very large files - metadata only
|
||||||
|
return 'metadata_only';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estimate processing time based on file characteristics and strategy
|
||||||
|
*/
|
||||||
|
private static estimateProcessingTime(
|
||||||
|
fileSize: number,
|
||||||
|
pageCount: number = 0,
|
||||||
|
strategy: ProcessingStrategy
|
||||||
|
): number {
|
||||||
|
const baseTimes = {
|
||||||
|
immediate_full: 200, // 200ms per page
|
||||||
|
priority_pages: 150, // 150ms per page (optimized)
|
||||||
|
progressive_chunked: 100, // 100ms per page (chunked)
|
||||||
|
metadata_only: 50 // 50ms total
|
||||||
|
};
|
||||||
|
|
||||||
|
const baseTime = baseTimes[strategy];
|
||||||
|
|
||||||
|
switch (strategy) {
|
||||||
|
case 'metadata_only':
|
||||||
|
return baseTime;
|
||||||
|
|
||||||
|
case 'immediate_full':
|
||||||
|
return pageCount * baseTime;
|
||||||
|
|
||||||
|
case 'priority_pages':
|
||||||
|
// Estimate time for priority pages (first 10)
|
||||||
|
const priorityPages = Math.min(pageCount, 10);
|
||||||
|
return priorityPages * baseTime;
|
||||||
|
|
||||||
|
case 'progressive_chunked':
|
||||||
|
// Estimate time for first chunk (20 pages)
|
||||||
|
const firstChunk = Math.min(pageCount, 20);
|
||||||
|
return firstChunk * baseTime;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return pageCount * baseTime;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get processing recommendations for a set of files
|
||||||
|
*/
|
||||||
|
static async analyzeMultipleFiles(files: File[]): Promise<{
|
||||||
|
analyses: Map<File, FileAnalysis>;
|
||||||
|
recommendations: {
|
||||||
|
totalEstimatedTime: number;
|
||||||
|
suggestedBatchSize: number;
|
||||||
|
shouldUseWebWorker: boolean;
|
||||||
|
memoryWarning: boolean;
|
||||||
|
};
|
||||||
|
}> {
|
||||||
|
const analyses = new Map<File, FileAnalysis>();
|
||||||
|
let totalEstimatedTime = 0;
|
||||||
|
let totalSize = 0;
|
||||||
|
let totalPages = 0;
|
||||||
|
|
||||||
|
// Analyze each file
|
||||||
|
for (const file of files) {
|
||||||
|
const analysis = await this.analyzeFile(file);
|
||||||
|
analyses.set(file, analysis);
|
||||||
|
totalEstimatedTime += analysis.estimatedProcessingTime;
|
||||||
|
totalSize += file.size;
|
||||||
|
totalPages += analysis.estimatedPageCount || 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate recommendations
|
||||||
|
const recommendations = {
|
||||||
|
totalEstimatedTime,
|
||||||
|
suggestedBatchSize: this.calculateBatchSize(files.length, totalSize),
|
||||||
|
shouldUseWebWorker: totalPages > 100 || totalSize > this.SIZE_THRESHOLDS.MEDIUM,
|
||||||
|
memoryWarning: totalSize > this.SIZE_THRESHOLDS.LARGE || totalPages > this.PAGE_THRESHOLDS.MASSIVE
|
||||||
|
};
|
||||||
|
|
||||||
|
return { analyses, recommendations };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate optimal batch size for processing multiple files
|
||||||
|
*/
|
||||||
|
private static calculateBatchSize(fileCount: number, totalSize: number): number {
|
||||||
|
// Process small batches for large total sizes
|
||||||
|
if (totalSize > this.SIZE_THRESHOLDS.LARGE) {
|
||||||
|
return Math.max(1, Math.floor(fileCount / 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (totalSize > this.SIZE_THRESHOLDS.MEDIUM) {
|
||||||
|
return Math.max(2, Math.floor(fileCount / 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process all at once for smaller total sizes
|
||||||
|
return fileCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a file appears to be a valid PDF
|
||||||
|
*/
|
||||||
|
static async isValidPDF(file: File): Promise<boolean> {
|
||||||
|
if (file.type !== 'application/pdf' && !file.name.toLowerCase().endsWith('.pdf')) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Read first few bytes to check PDF header
|
||||||
|
const header = file.slice(0, 8);
|
||||||
|
const headerBytes = new Uint8Array(await header.arrayBuffer());
|
||||||
|
const headerString = String.fromCharCode(...headerBytes);
|
||||||
|
|
||||||
|
return headerString.startsWith('%PDF-');
|
||||||
|
} catch (error) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
188
frontend/src/services/pdfProcessingService.ts
Normal file
188
frontend/src/services/pdfProcessingService.ts
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
|
||||||
|
import { ProcessedFile, ProcessingState, PDFPage } from '../types/processing';
|
||||||
|
import { ProcessingCache } from './processingCache';
|
||||||
|
|
||||||
|
// Set up PDF.js worker
|
||||||
|
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
|
||||||
|
|
||||||
|
export class PDFProcessingService {
|
||||||
|
private static instance: PDFProcessingService;
|
||||||
|
private cache = new ProcessingCache();
|
||||||
|
private processing = new Map<string, ProcessingState>();
|
||||||
|
private processingListeners = new Set<(states: Map<string, ProcessingState>) => void>();
|
||||||
|
|
||||||
|
private constructor() {}
|
||||||
|
|
||||||
|
static getInstance(): PDFProcessingService {
|
||||||
|
if (!PDFProcessingService.instance) {
|
||||||
|
PDFProcessingService.instance = new PDFProcessingService();
|
||||||
|
}
|
||||||
|
return PDFProcessingService.instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
async getProcessedFile(file: File): Promise<ProcessedFile | null> {
|
||||||
|
const fileKey = this.generateFileKey(file);
|
||||||
|
|
||||||
|
// Check cache first
|
||||||
|
const cached = this.cache.get(fileKey);
|
||||||
|
if (cached) {
|
||||||
|
console.log('Cache hit for:', file.name);
|
||||||
|
return cached;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if already processing
|
||||||
|
if (this.processing.has(fileKey)) {
|
||||||
|
console.log('Already processing:', file.name);
|
||||||
|
return null; // Will be available when processing completes
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start processing
|
||||||
|
this.startProcessing(file, fileKey);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async startProcessing(file: File, fileKey: string): Promise<void> {
|
||||||
|
// Set initial state
|
||||||
|
const state: ProcessingState = {
|
||||||
|
fileKey,
|
||||||
|
fileName: file.name,
|
||||||
|
status: 'processing',
|
||||||
|
progress: 0,
|
||||||
|
startedAt: Date.now()
|
||||||
|
};
|
||||||
|
|
||||||
|
this.processing.set(fileKey, state);
|
||||||
|
this.notifyListeners();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Process the file with progress updates
|
||||||
|
const processedFile = await this.processFileWithProgress(file, (progress) => {
|
||||||
|
state.progress = progress;
|
||||||
|
this.notifyListeners();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Cache the result
|
||||||
|
this.cache.set(fileKey, processedFile);
|
||||||
|
|
||||||
|
// Update state to completed
|
||||||
|
state.status = 'completed';
|
||||||
|
state.progress = 100;
|
||||||
|
state.completedAt = Date.now();
|
||||||
|
this.notifyListeners();
|
||||||
|
|
||||||
|
// Remove from processing map after brief delay
|
||||||
|
setTimeout(() => {
|
||||||
|
this.processing.delete(fileKey);
|
||||||
|
this.notifyListeners();
|
||||||
|
}, 2000);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Processing failed for', file.name, ':', error);
|
||||||
|
state.status = 'error';
|
||||||
|
state.error = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
this.notifyListeners();
|
||||||
|
|
||||||
|
// Remove failed processing after delay
|
||||||
|
setTimeout(() => {
|
||||||
|
this.processing.delete(fileKey);
|
||||||
|
this.notifyListeners();
|
||||||
|
}, 5000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async processFileWithProgress(
|
||||||
|
file: File,
|
||||||
|
onProgress: (progress: number) => void
|
||||||
|
): Promise<ProcessedFile> {
|
||||||
|
const arrayBuffer = await file.arrayBuffer();
|
||||||
|
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||||
|
const totalPages = pdf.numPages;
|
||||||
|
|
||||||
|
onProgress(10); // PDF loaded
|
||||||
|
|
||||||
|
const pages: PDFPage[] = [];
|
||||||
|
|
||||||
|
for (let i = 1; i <= totalPages; i++) {
|
||||||
|
const page = await pdf.getPage(i);
|
||||||
|
const viewport = page.getViewport({ scale: 0.5 });
|
||||||
|
const canvas = document.createElement('canvas');
|
||||||
|
canvas.width = viewport.width;
|
||||||
|
canvas.height = viewport.height;
|
||||||
|
|
||||||
|
const context = canvas.getContext('2d');
|
||||||
|
if (context) {
|
||||||
|
await page.render({ canvasContext: context, viewport }).promise;
|
||||||
|
const thumbnail = canvas.toDataURL();
|
||||||
|
|
||||||
|
pages.push({
|
||||||
|
id: `${file.name}-page-${i}`,
|
||||||
|
pageNumber: i,
|
||||||
|
thumbnail,
|
||||||
|
rotation: 0,
|
||||||
|
selected: false
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update progress
|
||||||
|
const progress = 10 + (i / totalPages) * 85; // 10-95%
|
||||||
|
onProgress(progress);
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf.destroy();
|
||||||
|
onProgress(100);
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
||||||
|
pages,
|
||||||
|
totalPages,
|
||||||
|
metadata: {
|
||||||
|
title: file.name,
|
||||||
|
createdAt: new Date().toISOString(),
|
||||||
|
modifiedAt: new Date().toISOString()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// State subscription for components
|
||||||
|
onProcessingChange(callback: (states: Map<string, ProcessingState>) => void): () => void {
|
||||||
|
this.processingListeners.add(callback);
|
||||||
|
return () => this.processingListeners.delete(callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
getProcessingStates(): Map<string, ProcessingState> {
|
||||||
|
return new Map(this.processing);
|
||||||
|
}
|
||||||
|
|
||||||
|
private notifyListeners(): void {
|
||||||
|
this.processingListeners.forEach(callback => callback(this.processing));
|
||||||
|
}
|
||||||
|
|
||||||
|
generateFileKey(file: File): string {
|
||||||
|
return `${file.name}-${file.size}-${file.lastModified}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup method for activeFiles changes
|
||||||
|
cleanup(removedFiles: File[]): void {
|
||||||
|
removedFiles.forEach(file => {
|
||||||
|
const key = this.generateFileKey(file);
|
||||||
|
this.cache.delete(key);
|
||||||
|
this.processing.delete(key);
|
||||||
|
});
|
||||||
|
this.notifyListeners();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get cache stats (for debugging)
|
||||||
|
getCacheStats() {
|
||||||
|
return this.cache.getStats();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear all cache and processing
|
||||||
|
clearAll(): void {
|
||||||
|
this.cache.clear();
|
||||||
|
this.processing.clear();
|
||||||
|
this.notifyListeners();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export singleton instance
|
||||||
|
export const pdfProcessingService = PDFProcessingService.getInstance();
|
138
frontend/src/services/processingCache.ts
Normal file
138
frontend/src/services/processingCache.ts
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
import { ProcessedFile, CacheConfig, CacheEntry, CacheStats } from '../types/processing';
|
||||||
|
|
||||||
|
export class ProcessingCache {
|
||||||
|
private cache = new Map<string, CacheEntry>();
|
||||||
|
private totalSize = 0;
|
||||||
|
|
||||||
|
constructor(private config: CacheConfig = {
|
||||||
|
maxFiles: 20,
|
||||||
|
maxSizeBytes: 2 * 1024 * 1024 * 1024, // 2GB
|
||||||
|
ttlMs: 30 * 60 * 1000 // 30 minutes
|
||||||
|
}) {}
|
||||||
|
|
||||||
|
set(key: string, data: ProcessedFile): void {
|
||||||
|
// Remove expired entries first
|
||||||
|
this.cleanup();
|
||||||
|
|
||||||
|
// Calculate entry size (rough estimate)
|
||||||
|
const size = this.calculateSize(data);
|
||||||
|
|
||||||
|
// Make room if needed
|
||||||
|
this.makeRoom(size);
|
||||||
|
|
||||||
|
this.cache.set(key, {
|
||||||
|
data,
|
||||||
|
size,
|
||||||
|
lastAccessed: Date.now(),
|
||||||
|
createdAt: Date.now()
|
||||||
|
});
|
||||||
|
|
||||||
|
this.totalSize += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
get(key: string): ProcessedFile | null {
|
||||||
|
const entry = this.cache.get(key);
|
||||||
|
if (!entry) return null;
|
||||||
|
|
||||||
|
// Check TTL
|
||||||
|
if (Date.now() - entry.createdAt > this.config.ttlMs) {
|
||||||
|
this.delete(key);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update last accessed
|
||||||
|
entry.lastAccessed = Date.now();
|
||||||
|
return entry.data;
|
||||||
|
}
|
||||||
|
|
||||||
|
has(key: string): boolean {
|
||||||
|
const entry = this.cache.get(key);
|
||||||
|
if (!entry) return false;
|
||||||
|
|
||||||
|
// Check TTL
|
||||||
|
if (Date.now() - entry.createdAt > this.config.ttlMs) {
|
||||||
|
this.delete(key);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private makeRoom(neededSize: number): void {
|
||||||
|
// Remove oldest entries until we have space
|
||||||
|
while (
|
||||||
|
this.cache.size >= this.config.maxFiles ||
|
||||||
|
this.totalSize + neededSize > this.config.maxSizeBytes
|
||||||
|
) {
|
||||||
|
const oldestKey = this.findOldestEntry();
|
||||||
|
if (oldestKey) {
|
||||||
|
this.delete(oldestKey);
|
||||||
|
} else break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private findOldestEntry(): string | null {
|
||||||
|
let oldest: { key: string; lastAccessed: number } | null = null;
|
||||||
|
|
||||||
|
for (const [key, entry] of this.cache) {
|
||||||
|
if (!oldest || entry.lastAccessed < oldest.lastAccessed) {
|
||||||
|
oldest = { key, lastAccessed: entry.lastAccessed };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return oldest?.key || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private cleanup(): void {
|
||||||
|
const now = Date.now();
|
||||||
|
for (const [key, entry] of this.cache) {
|
||||||
|
if (now - entry.createdAt > this.config.ttlMs) {
|
||||||
|
this.delete(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private calculateSize(data: ProcessedFile): number {
|
||||||
|
// Rough size estimation
|
||||||
|
let size = 0;
|
||||||
|
|
||||||
|
// Estimate size of thumbnails (main memory consumer)
|
||||||
|
data.pages.forEach(page => {
|
||||||
|
if (page.thumbnail) {
|
||||||
|
// Base64 thumbnail is roughly 50KB each
|
||||||
|
size += 50 * 1024;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add some overhead for other data
|
||||||
|
size += 10 * 1024; // 10KB overhead
|
||||||
|
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete(key: string): void {
|
||||||
|
const entry = this.cache.get(key);
|
||||||
|
if (entry) {
|
||||||
|
this.totalSize -= entry.size;
|
||||||
|
this.cache.delete(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clear(): void {
|
||||||
|
this.cache.clear();
|
||||||
|
this.totalSize = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
getStats(): CacheStats {
|
||||||
|
return {
|
||||||
|
entries: this.cache.size,
|
||||||
|
totalSizeBytes: this.totalSize,
|
||||||
|
maxSizeBytes: this.config.maxSizeBytes
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all cached keys (for debugging and cleanup)
|
||||||
|
getKeys(): string[] {
|
||||||
|
return Array.from(this.cache.keys());
|
||||||
|
}
|
||||||
|
}
|
282
frontend/src/services/processingErrorHandler.ts
Normal file
282
frontend/src/services/processingErrorHandler.ts
Normal file
@ -0,0 +1,282 @@
|
|||||||
|
import { ProcessingError } from '../types/processing';
|
||||||
|
|
||||||
|
export class ProcessingErrorHandler {
|
||||||
|
private static readonly DEFAULT_MAX_RETRIES = 3;
|
||||||
|
private static readonly RETRY_DELAYS = [1000, 2000, 4000]; // Progressive backoff in ms
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a ProcessingError from an unknown error
|
||||||
|
*/
|
||||||
|
static createProcessingError(
|
||||||
|
error: unknown,
|
||||||
|
retryCount: number = 0,
|
||||||
|
maxRetries: number = this.DEFAULT_MAX_RETRIES
|
||||||
|
): ProcessingError {
|
||||||
|
const originalError = error instanceof Error ? error : new Error(String(error));
|
||||||
|
const message = originalError.message;
|
||||||
|
|
||||||
|
// Determine error type based on error message and properties
|
||||||
|
const errorType = this.determineErrorType(originalError, message);
|
||||||
|
|
||||||
|
// Determine if error is recoverable
|
||||||
|
const recoverable = this.isRecoverable(errorType, retryCount, maxRetries);
|
||||||
|
|
||||||
|
return {
|
||||||
|
type: errorType,
|
||||||
|
message: this.formatErrorMessage(errorType, message),
|
||||||
|
recoverable,
|
||||||
|
retryCount,
|
||||||
|
maxRetries,
|
||||||
|
originalError
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine the type of error based on error characteristics
|
||||||
|
*/
|
||||||
|
private static determineErrorType(error: Error, message: string): ProcessingError['type'] {
|
||||||
|
const lowerMessage = message.toLowerCase();
|
||||||
|
|
||||||
|
// Network-related errors
|
||||||
|
if (lowerMessage.includes('network') ||
|
||||||
|
lowerMessage.includes('fetch') ||
|
||||||
|
lowerMessage.includes('connection')) {
|
||||||
|
return 'network';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Memory-related errors
|
||||||
|
if (lowerMessage.includes('memory') ||
|
||||||
|
lowerMessage.includes('quota') ||
|
||||||
|
lowerMessage.includes('allocation') ||
|
||||||
|
error.name === 'QuotaExceededError') {
|
||||||
|
return 'memory';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Timeout errors
|
||||||
|
if (lowerMessage.includes('timeout') ||
|
||||||
|
lowerMessage.includes('aborted') ||
|
||||||
|
error.name === 'AbortError') {
|
||||||
|
return 'timeout';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cancellation
|
||||||
|
if (lowerMessage.includes('cancel') ||
|
||||||
|
lowerMessage.includes('abort') ||
|
||||||
|
error.name === 'AbortError') {
|
||||||
|
return 'cancelled';
|
||||||
|
}
|
||||||
|
|
||||||
|
// PDF corruption/parsing errors
|
||||||
|
if (lowerMessage.includes('pdf') ||
|
||||||
|
lowerMessage.includes('parse') ||
|
||||||
|
lowerMessage.includes('invalid') ||
|
||||||
|
lowerMessage.includes('corrupt') ||
|
||||||
|
lowerMessage.includes('malformed')) {
|
||||||
|
return 'corruption';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default to parsing error
|
||||||
|
return 'parsing';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine if an error is recoverable based on type and retry count
|
||||||
|
*/
|
||||||
|
private static isRecoverable(
|
||||||
|
errorType: ProcessingError['type'],
|
||||||
|
retryCount: number,
|
||||||
|
maxRetries: number
|
||||||
|
): boolean {
|
||||||
|
// Never recoverable
|
||||||
|
if (errorType === 'cancelled' || errorType === 'corruption') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recoverable if we haven't exceeded retry count
|
||||||
|
if (retryCount >= maxRetries) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Memory errors are usually not recoverable
|
||||||
|
if (errorType === 'memory') {
|
||||||
|
return retryCount < 1; // Only one retry for memory errors
|
||||||
|
}
|
||||||
|
|
||||||
|
// Network and timeout errors are usually recoverable
|
||||||
|
return errorType === 'network' || errorType === 'timeout' || errorType === 'parsing';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format error message for user display
|
||||||
|
*/
|
||||||
|
private static formatErrorMessage(errorType: ProcessingError['type'], originalMessage: string): string {
|
||||||
|
switch (errorType) {
|
||||||
|
case 'network':
|
||||||
|
return 'Network connection failed. Please check your internet connection and try again.';
|
||||||
|
|
||||||
|
case 'memory':
|
||||||
|
return 'Insufficient memory to process this file. Try closing other applications or processing a smaller file.';
|
||||||
|
|
||||||
|
case 'timeout':
|
||||||
|
return 'Processing timed out. This file may be too large or complex to process.';
|
||||||
|
|
||||||
|
case 'cancelled':
|
||||||
|
return 'Processing was cancelled by user.';
|
||||||
|
|
||||||
|
case 'corruption':
|
||||||
|
return 'This PDF file appears to be corrupted or encrypted. Please try a different file.';
|
||||||
|
|
||||||
|
case 'parsing':
|
||||||
|
return `Failed to process PDF: ${originalMessage}`;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return `Processing failed: ${originalMessage}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute an operation with automatic retry logic
|
||||||
|
*/
|
||||||
|
static async executeWithRetry<T>(
|
||||||
|
operation: () => Promise<T>,
|
||||||
|
onError?: (error: ProcessingError) => void,
|
||||||
|
maxRetries: number = this.DEFAULT_MAX_RETRIES
|
||||||
|
): Promise<T> {
|
||||||
|
let lastError: ProcessingError | null = null;
|
||||||
|
|
||||||
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||||
|
try {
|
||||||
|
return await operation();
|
||||||
|
} catch (error) {
|
||||||
|
lastError = this.createProcessingError(error, attempt, maxRetries);
|
||||||
|
|
||||||
|
// Notify error handler
|
||||||
|
if (onError) {
|
||||||
|
onError(lastError);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't retry if not recoverable
|
||||||
|
if (!lastError.recoverable) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't retry on last attempt
|
||||||
|
if (attempt === maxRetries) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait before retry with progressive backoff
|
||||||
|
const delay = this.RETRY_DELAYS[Math.min(attempt, this.RETRY_DELAYS.length - 1)];
|
||||||
|
await this.delay(delay);
|
||||||
|
|
||||||
|
console.log(`Retrying operation (attempt ${attempt + 2}/${maxRetries + 1}) after ${delay}ms delay`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// All retries exhausted
|
||||||
|
throw lastError || new Error('Operation failed after all retries');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a timeout wrapper for operations
|
||||||
|
*/
|
||||||
|
static withTimeout<T>(
|
||||||
|
operation: () => Promise<T>,
|
||||||
|
timeoutMs: number,
|
||||||
|
timeoutMessage: string = 'Operation timed out'
|
||||||
|
): Promise<T> {
|
||||||
|
return new Promise<T>((resolve, reject) => {
|
||||||
|
const timeoutId = setTimeout(() => {
|
||||||
|
reject(new Error(timeoutMessage));
|
||||||
|
}, timeoutMs);
|
||||||
|
|
||||||
|
operation()
|
||||||
|
.then(result => {
|
||||||
|
clearTimeout(timeoutId);
|
||||||
|
resolve(result);
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
clearTimeout(timeoutId);
|
||||||
|
reject(error);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an AbortController that times out after specified duration
|
||||||
|
*/
|
||||||
|
static createTimeoutController(timeoutMs: number): AbortController {
|
||||||
|
const controller = new AbortController();
|
||||||
|
|
||||||
|
setTimeout(() => {
|
||||||
|
controller.abort();
|
||||||
|
}, timeoutMs);
|
||||||
|
|
||||||
|
return controller;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if an error indicates the operation should be retried
|
||||||
|
*/
|
||||||
|
static shouldRetry(error: ProcessingError): boolean {
|
||||||
|
return error.recoverable && error.retryCount < error.maxRetries;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get user-friendly suggestions based on error type
|
||||||
|
*/
|
||||||
|
static getErrorSuggestions(error: ProcessingError): string[] {
|
||||||
|
switch (error.type) {
|
||||||
|
case 'network':
|
||||||
|
return [
|
||||||
|
'Check your internet connection',
|
||||||
|
'Try refreshing the page',
|
||||||
|
'Try again in a few moments'
|
||||||
|
];
|
||||||
|
|
||||||
|
case 'memory':
|
||||||
|
return [
|
||||||
|
'Close other browser tabs or applications',
|
||||||
|
'Try processing a smaller file',
|
||||||
|
'Restart your browser',
|
||||||
|
'Use a device with more memory'
|
||||||
|
];
|
||||||
|
|
||||||
|
case 'timeout':
|
||||||
|
return [
|
||||||
|
'Try processing a smaller file',
|
||||||
|
'Break large files into smaller sections',
|
||||||
|
'Check your internet connection speed'
|
||||||
|
];
|
||||||
|
|
||||||
|
case 'corruption':
|
||||||
|
return [
|
||||||
|
'Verify the PDF file opens in other applications',
|
||||||
|
'Try re-downloading the file',
|
||||||
|
'Try a different PDF file',
|
||||||
|
'Contact the file creator if it appears corrupted'
|
||||||
|
];
|
||||||
|
|
||||||
|
case 'parsing':
|
||||||
|
return [
|
||||||
|
'Verify this is a valid PDF file',
|
||||||
|
'Try a different PDF file',
|
||||||
|
'Contact support if the problem persists'
|
||||||
|
];
|
||||||
|
|
||||||
|
default:
|
||||||
|
return [
|
||||||
|
'Try refreshing the page',
|
||||||
|
'Try again in a few moments',
|
||||||
|
'Contact support if the problem persists'
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility function for delays
|
||||||
|
*/
|
||||||
|
private static delay(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
}
|
@ -1,7 +1,7 @@
|
|||||||
export interface PDFPage {
|
export interface PDFPage {
|
||||||
id: string;
|
id: string;
|
||||||
pageNumber: number;
|
pageNumber: number;
|
||||||
thumbnail: string;
|
thumbnail: string | null;
|
||||||
rotation: number;
|
rotation: number;
|
||||||
selected: boolean;
|
selected: boolean;
|
||||||
splitBefore?: boolean;
|
splitBefore?: boolean;
|
||||||
|
91
frontend/src/types/processing.ts
Normal file
91
frontend/src/types/processing.ts
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
export interface ProcessingError {
|
||||||
|
type: 'network' | 'parsing' | 'memory' | 'corruption' | 'timeout' | 'cancelled';
|
||||||
|
message: string;
|
||||||
|
recoverable: boolean;
|
||||||
|
retryCount: number;
|
||||||
|
maxRetries: number;
|
||||||
|
originalError?: Error;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ProcessingState {
|
||||||
|
fileKey: string;
|
||||||
|
fileName: string;
|
||||||
|
status: 'pending' | 'processing' | 'completed' | 'error' | 'cancelled';
|
||||||
|
progress: number; // 0-100
|
||||||
|
strategy: ProcessingStrategy;
|
||||||
|
error?: ProcessingError;
|
||||||
|
startedAt: number;
|
||||||
|
completedAt?: number;
|
||||||
|
estimatedTimeRemaining?: number;
|
||||||
|
currentPage?: number;
|
||||||
|
cancellationToken?: AbortController;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ProcessedFile {
|
||||||
|
id: string;
|
||||||
|
pages: PDFPage[];
|
||||||
|
totalPages: number;
|
||||||
|
metadata: {
|
||||||
|
title: string;
|
||||||
|
createdAt: string;
|
||||||
|
modifiedAt: string;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PDFPage {
|
||||||
|
id: string;
|
||||||
|
pageNumber: number;
|
||||||
|
thumbnail: string | null;
|
||||||
|
rotation: number;
|
||||||
|
selected: boolean;
|
||||||
|
splitBefore?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CacheConfig {
|
||||||
|
maxFiles: number;
|
||||||
|
maxSizeBytes: number;
|
||||||
|
ttlMs: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CacheEntry {
|
||||||
|
data: ProcessedFile;
|
||||||
|
size: number;
|
||||||
|
lastAccessed: number;
|
||||||
|
createdAt: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CacheStats {
|
||||||
|
entries: number;
|
||||||
|
totalSizeBytes: number;
|
||||||
|
maxSizeBytes: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type ProcessingStrategy = 'immediate_full' | 'progressive_chunked' | 'metadata_only' | 'priority_pages';
|
||||||
|
|
||||||
|
export interface ProcessingConfig {
|
||||||
|
strategy: ProcessingStrategy;
|
||||||
|
chunkSize: number; // Pages per chunk
|
||||||
|
thumbnailQuality: 'low' | 'medium' | 'high';
|
||||||
|
priorityPageCount: number; // Number of priority pages to process first
|
||||||
|
useWebWorker: boolean;
|
||||||
|
maxRetries: number;
|
||||||
|
timeoutMs: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FileAnalysis {
|
||||||
|
fileSize: number;
|
||||||
|
estimatedPageCount?: number;
|
||||||
|
isEncrypted: boolean;
|
||||||
|
isCorrupted: boolean;
|
||||||
|
recommendedStrategy: ProcessingStrategy;
|
||||||
|
estimatedProcessingTime: number; // milliseconds
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ProcessingMetrics {
|
||||||
|
totalFiles: number;
|
||||||
|
completedFiles: number;
|
||||||
|
failedFiles: number;
|
||||||
|
averageProcessingTime: number;
|
||||||
|
cacheHitRate: number;
|
||||||
|
memoryUsage: number;
|
||||||
|
}
|
127
frontend/src/utils/fileHash.ts
Normal file
127
frontend/src/utils/fileHash.ts
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
/**
|
||||||
|
* File hashing utilities for cache key generation
|
||||||
|
*/
|
||||||
|
|
||||||
|
export class FileHasher {
|
||||||
|
private static readonly CHUNK_SIZE = 64 * 1024; // 64KB chunks for hashing
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a content-based hash for a file
|
||||||
|
* Uses first + last + middle chunks to create a reasonably unique hash
|
||||||
|
* without reading the entire file (which would be expensive for large files)
|
||||||
|
*/
|
||||||
|
static async generateContentHash(file: File): Promise<string> {
|
||||||
|
const chunks = await this.getFileChunks(file);
|
||||||
|
const combined = await this.combineChunks(chunks);
|
||||||
|
return await this.hashArrayBuffer(combined);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a fast hash based on file metadata
|
||||||
|
* Faster but less collision-resistant than content hash
|
||||||
|
*/
|
||||||
|
static generateMetadataHash(file: File): string {
|
||||||
|
const data = `${file.name}-${file.size}-${file.lastModified}-${file.type}`;
|
||||||
|
return this.simpleHash(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a hybrid hash that balances speed and uniqueness
|
||||||
|
* Uses metadata + small content sample
|
||||||
|
*/
|
||||||
|
static async generateHybridHash(file: File): Promise<string> {
|
||||||
|
const metadataHash = this.generateMetadataHash(file);
|
||||||
|
|
||||||
|
// For small files, use full content hash
|
||||||
|
if (file.size <= 1024 * 1024) { // 1MB
|
||||||
|
const contentHash = await this.generateContentHash(file);
|
||||||
|
return `${metadataHash}-${contentHash}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For large files, use first chunk only
|
||||||
|
const firstChunk = file.slice(0, this.CHUNK_SIZE);
|
||||||
|
const firstChunkBuffer = await firstChunk.arrayBuffer();
|
||||||
|
const firstChunkHash = await this.hashArrayBuffer(firstChunkBuffer);
|
||||||
|
|
||||||
|
return `${metadataHash}-${firstChunkHash}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async getFileChunks(file: File): Promise<ArrayBuffer[]> {
|
||||||
|
const chunks: ArrayBuffer[] = [];
|
||||||
|
|
||||||
|
// First chunk
|
||||||
|
if (file.size > 0) {
|
||||||
|
const firstChunk = file.slice(0, Math.min(this.CHUNK_SIZE, file.size));
|
||||||
|
chunks.push(await firstChunk.arrayBuffer());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Middle chunk (if file is large enough)
|
||||||
|
if (file.size > this.CHUNK_SIZE * 2) {
|
||||||
|
const middleStart = Math.floor(file.size / 2) - Math.floor(this.CHUNK_SIZE / 2);
|
||||||
|
const middleEnd = middleStart + this.CHUNK_SIZE;
|
||||||
|
const middleChunk = file.slice(middleStart, middleEnd);
|
||||||
|
chunks.push(await middleChunk.arrayBuffer());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Last chunk (if file is large enough and different from first)
|
||||||
|
if (file.size > this.CHUNK_SIZE) {
|
||||||
|
const lastStart = Math.max(file.size - this.CHUNK_SIZE, this.CHUNK_SIZE);
|
||||||
|
const lastChunk = file.slice(lastStart);
|
||||||
|
chunks.push(await lastChunk.arrayBuffer());
|
||||||
|
}
|
||||||
|
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async combineChunks(chunks: ArrayBuffer[]): Promise<ArrayBuffer> {
|
||||||
|
const totalLength = chunks.reduce((sum, chunk) => sum + chunk.byteLength, 0);
|
||||||
|
const combined = new Uint8Array(totalLength);
|
||||||
|
|
||||||
|
let offset = 0;
|
||||||
|
for (const chunk of chunks) {
|
||||||
|
combined.set(new Uint8Array(chunk), offset);
|
||||||
|
offset += chunk.byteLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
return combined.buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async hashArrayBuffer(buffer: ArrayBuffer): Promise<string> {
|
||||||
|
// Use Web Crypto API for proper hashing
|
||||||
|
if (crypto.subtle) {
|
||||||
|
const hashBuffer = await crypto.subtle.digest('SHA-256', buffer);
|
||||||
|
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
||||||
|
return hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback for environments without crypto.subtle
|
||||||
|
return this.simpleHash(Array.from(new Uint8Array(buffer)).join(''));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static simpleHash(str: string): string {
|
||||||
|
let hash = 0;
|
||||||
|
if (str.length === 0) return hash.toString();
|
||||||
|
|
||||||
|
for (let i = 0; i < str.length; i++) {
|
||||||
|
const char = str.charCodeAt(i);
|
||||||
|
hash = ((hash << 5) - hash) + char;
|
||||||
|
hash = hash & hash; // Convert to 32-bit integer
|
||||||
|
}
|
||||||
|
|
||||||
|
return Math.abs(hash).toString(16);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate that a file matches its expected hash
|
||||||
|
* Useful for detecting file corruption or changes
|
||||||
|
*/
|
||||||
|
static async validateFileHash(file: File, expectedHash: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
const actualHash = await this.generateHybridHash(file);
|
||||||
|
return actualHash === expectedHash;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Hash validation failed:', error);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user