mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-02-17 13:52:14 +01:00
Stirling 2.0 (#3928)
# Description of Changes <!-- File context for managing files between tools and views Optimisation for large files Updated Split to work with new file system and match Matts stepped design closer --> --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
546
frontend/src/services/enhancedPDFProcessingService.ts
Normal file
546
frontend/src/services/enhancedPDFProcessingService.ts
Normal file
@@ -0,0 +1,546 @@
|
||||
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
|
||||
import { ProcessedFile, ProcessingState, PDFPage, ProcessingStrategy, ProcessingConfig, ProcessingMetrics } from '../types/processing';
|
||||
import { ProcessingCache } from './processingCache';
|
||||
import { FileHasher } from '../utils/fileHash';
|
||||
import { FileAnalyzer } from './fileAnalyzer';
|
||||
import { ProcessingErrorHandler } from './processingErrorHandler';
|
||||
|
||||
// Set up PDF.js worker
|
||||
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
|
||||
|
||||
export class EnhancedPDFProcessingService {
|
||||
private static instance: EnhancedPDFProcessingService;
|
||||
private cache = new ProcessingCache();
|
||||
private processing = new Map<string, ProcessingState>();
|
||||
private processingListeners = new Set<(states: Map<string, ProcessingState>) => void>();
|
||||
private metrics: ProcessingMetrics = {
|
||||
totalFiles: 0,
|
||||
completedFiles: 0,
|
||||
failedFiles: 0,
|
||||
averageProcessingTime: 0,
|
||||
cacheHitRate: 0,
|
||||
memoryUsage: 0
|
||||
};
|
||||
|
||||
private defaultConfig: ProcessingConfig = {
|
||||
strategy: 'immediate_full',
|
||||
chunkSize: 20,
|
||||
thumbnailQuality: 'medium',
|
||||
priorityPageCount: 10,
|
||||
useWebWorker: false,
|
||||
maxRetries: 3,
|
||||
timeoutMs: 300000 // 5 minutes
|
||||
};
|
||||
|
||||
private constructor() {}
|
||||
|
||||
static getInstance(): EnhancedPDFProcessingService {
|
||||
if (!EnhancedPDFProcessingService.instance) {
|
||||
EnhancedPDFProcessingService.instance = new EnhancedPDFProcessingService();
|
||||
}
|
||||
return EnhancedPDFProcessingService.instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a file with intelligent strategy selection
|
||||
*/
|
||||
async processFile(file: File, customConfig?: Partial<ProcessingConfig>): Promise<ProcessedFile | null> {
|
||||
const fileKey = await this.generateFileKey(file);
|
||||
|
||||
// Check cache first
|
||||
const cached = this.cache.get(fileKey);
|
||||
if (cached) {
|
||||
this.updateMetrics('cacheHit');
|
||||
return cached;
|
||||
}
|
||||
|
||||
// Check if already processing
|
||||
if (this.processing.has(fileKey)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Analyze file to determine optimal strategy
|
||||
const analysis = await FileAnalyzer.analyzeFile(file);
|
||||
if (analysis.isCorrupted) {
|
||||
throw new Error(`File ${file.name} appears to be corrupted`);
|
||||
}
|
||||
|
||||
// Create processing config
|
||||
const config: ProcessingConfig = {
|
||||
...this.defaultConfig,
|
||||
strategy: analysis.recommendedStrategy,
|
||||
...customConfig
|
||||
};
|
||||
|
||||
// Start processing
|
||||
this.startProcessing(file, fileKey, config, analysis.estimatedProcessingTime);
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start processing a file with the specified configuration
|
||||
*/
|
||||
private async startProcessing(
|
||||
file: File,
|
||||
fileKey: string,
|
||||
config: ProcessingConfig,
|
||||
estimatedTime: number
|
||||
): Promise<void> {
|
||||
// Create cancellation token
|
||||
const cancellationToken = ProcessingErrorHandler.createTimeoutController(config.timeoutMs);
|
||||
|
||||
// Set initial state
|
||||
const state: ProcessingState = {
|
||||
fileKey,
|
||||
fileName: file.name,
|
||||
status: 'processing',
|
||||
progress: 0,
|
||||
strategy: config.strategy,
|
||||
startedAt: Date.now(),
|
||||
estimatedTimeRemaining: estimatedTime,
|
||||
cancellationToken
|
||||
};
|
||||
|
||||
this.processing.set(fileKey, state);
|
||||
this.notifyListeners();
|
||||
this.updateMetrics('started');
|
||||
|
||||
try {
|
||||
// Execute processing with retry logic
|
||||
const processedFile = await ProcessingErrorHandler.executeWithRetry(
|
||||
() => this.executeProcessingStrategy(file, config, state),
|
||||
(error) => {
|
||||
state.error = error;
|
||||
this.notifyListeners();
|
||||
},
|
||||
config.maxRetries
|
||||
);
|
||||
|
||||
// Cache the result
|
||||
this.cache.set(fileKey, processedFile);
|
||||
|
||||
// Update state to completed
|
||||
state.status = 'completed';
|
||||
state.progress = 100;
|
||||
state.completedAt = Date.now();
|
||||
this.notifyListeners();
|
||||
this.updateMetrics('completed', Date.now() - state.startedAt);
|
||||
|
||||
// Remove from processing map after brief delay
|
||||
setTimeout(() => {
|
||||
this.processing.delete(fileKey);
|
||||
this.notifyListeners();
|
||||
}, 2000);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Processing failed for', file.name, ':', error);
|
||||
|
||||
const processingError = ProcessingErrorHandler.createProcessingError(error);
|
||||
state.status = 'error';
|
||||
state.error = processingError;
|
||||
this.notifyListeners();
|
||||
this.updateMetrics('failed');
|
||||
|
||||
// Remove failed processing after delay
|
||||
setTimeout(() => {
|
||||
this.processing.delete(fileKey);
|
||||
this.notifyListeners();
|
||||
}, 10000);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the actual processing based on strategy
|
||||
*/
|
||||
private async executeProcessingStrategy(
|
||||
file: File,
|
||||
config: ProcessingConfig,
|
||||
state: ProcessingState
|
||||
): Promise<ProcessedFile> {
|
||||
switch (config.strategy) {
|
||||
case 'immediate_full':
|
||||
return this.processImmediateFull(file, config, state);
|
||||
|
||||
case 'priority_pages':
|
||||
return this.processPriorityPages(file, config, state);
|
||||
|
||||
case 'progressive_chunked':
|
||||
return this.processProgressiveChunked(file, config, state);
|
||||
|
||||
case 'metadata_only':
|
||||
return this.processMetadataOnly(file, config, state);
|
||||
|
||||
default:
|
||||
return this.processImmediateFull(file, config, state);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process all pages immediately (for small files)
|
||||
*/
|
||||
private async processImmediateFull(
|
||||
file: File,
|
||||
config: ProcessingConfig,
|
||||
state: ProcessingState
|
||||
): Promise<ProcessedFile> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const totalPages = pdf.numPages;
|
||||
|
||||
state.progress = 10;
|
||||
this.notifyListeners();
|
||||
|
||||
const pages: PDFPage[] = [];
|
||||
|
||||
for (let i = 1; i <= totalPages; i++) {
|
||||
// Check for cancellation
|
||||
if (state.cancellationToken?.signal.aborted) {
|
||||
pdf.destroy();
|
||||
throw new Error('Processing cancelled');
|
||||
}
|
||||
|
||||
const page = await pdf.getPage(i);
|
||||
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
|
||||
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail,
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
|
||||
// Update progress
|
||||
state.progress = 10 + (i / totalPages) * 85;
|
||||
state.currentPage = i;
|
||||
this.notifyListeners();
|
||||
}
|
||||
|
||||
pdf.destroy();
|
||||
state.progress = 100;
|
||||
this.notifyListeners();
|
||||
|
||||
return this.createProcessedFile(file, pages, totalPages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process priority pages first, then queue the rest
|
||||
*/
|
||||
private async processPriorityPages(
|
||||
file: File,
|
||||
config: ProcessingConfig,
|
||||
state: ProcessingState
|
||||
): Promise<ProcessedFile> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const totalPages = pdf.numPages;
|
||||
|
||||
state.progress = 10;
|
||||
this.notifyListeners();
|
||||
|
||||
const pages: PDFPage[] = [];
|
||||
const priorityCount = Math.min(config.priorityPageCount, totalPages);
|
||||
|
||||
// Process priority pages first
|
||||
for (let i = 1; i <= priorityCount; i++) {
|
||||
if (state.cancellationToken?.signal.aborted) {
|
||||
pdf.destroy();
|
||||
throw new Error('Processing cancelled');
|
||||
}
|
||||
|
||||
const page = await pdf.getPage(i);
|
||||
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
|
||||
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail,
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
|
||||
state.progress = 10 + (i / priorityCount) * 60;
|
||||
state.currentPage = i;
|
||||
this.notifyListeners();
|
||||
}
|
||||
|
||||
// Create placeholder pages for remaining pages
|
||||
for (let i = priorityCount + 1; i <= totalPages; i++) {
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail: null, // Will be loaded lazily
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
}
|
||||
|
||||
pdf.destroy();
|
||||
state.progress = 100;
|
||||
this.notifyListeners();
|
||||
|
||||
return this.createProcessedFile(file, pages, totalPages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process in chunks with breaks between chunks
|
||||
*/
|
||||
private async processProgressiveChunked(
|
||||
file: File,
|
||||
config: ProcessingConfig,
|
||||
state: ProcessingState
|
||||
): Promise<ProcessedFile> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const totalPages = pdf.numPages;
|
||||
|
||||
state.progress = 10;
|
||||
this.notifyListeners();
|
||||
|
||||
const pages: PDFPage[] = [];
|
||||
const chunkSize = config.chunkSize;
|
||||
let processedPages = 0;
|
||||
|
||||
// Process first chunk immediately
|
||||
const firstChunkEnd = Math.min(chunkSize, totalPages);
|
||||
|
||||
for (let i = 1; i <= firstChunkEnd; i++) {
|
||||
if (state.cancellationToken?.signal.aborted) {
|
||||
pdf.destroy();
|
||||
throw new Error('Processing cancelled');
|
||||
}
|
||||
|
||||
const page = await pdf.getPage(i);
|
||||
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
|
||||
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail,
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
|
||||
processedPages++;
|
||||
state.progress = 10 + (processedPages / totalPages) * 70;
|
||||
state.currentPage = i;
|
||||
this.notifyListeners();
|
||||
|
||||
// Small delay to prevent UI blocking
|
||||
if (i % 5 === 0) {
|
||||
await new Promise(resolve => setTimeout(resolve, 10));
|
||||
}
|
||||
}
|
||||
|
||||
// Create placeholders for remaining pages
|
||||
for (let i = firstChunkEnd + 1; i <= totalPages; i++) {
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail: null,
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
}
|
||||
|
||||
pdf.destroy();
|
||||
state.progress = 100;
|
||||
this.notifyListeners();
|
||||
|
||||
return this.createProcessedFile(file, pages, totalPages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process metadata only (for very large files)
|
||||
*/
|
||||
private async processMetadataOnly(
|
||||
file: File,
|
||||
config: ProcessingConfig,
|
||||
state: ProcessingState
|
||||
): Promise<ProcessedFile> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const totalPages = pdf.numPages;
|
||||
|
||||
state.progress = 50;
|
||||
this.notifyListeners();
|
||||
|
||||
// Create placeholder pages without thumbnails
|
||||
const pages: PDFPage[] = [];
|
||||
for (let i = 1; i <= totalPages; i++) {
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail: null,
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
}
|
||||
|
||||
pdf.destroy();
|
||||
state.progress = 100;
|
||||
this.notifyListeners();
|
||||
|
||||
return this.createProcessedFile(file, pages, totalPages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a page thumbnail with specified quality
|
||||
*/
|
||||
private async renderPageThumbnail(page: any, quality: 'low' | 'medium' | 'high'): Promise<string> {
|
||||
const scales = { low: 0.2, medium: 0.5, high: 0.8 }; // Reduced low quality for page editor
|
||||
const scale = scales[quality];
|
||||
|
||||
const viewport = page.getViewport({ scale });
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
|
||||
const context = canvas.getContext('2d');
|
||||
if (!context) {
|
||||
throw new Error('Could not get canvas context');
|
||||
}
|
||||
|
||||
await page.render({ canvasContext: context, viewport }).promise;
|
||||
return canvas.toDataURL('image/jpeg', 0.8); // Use JPEG for better compression
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a ProcessedFile object
|
||||
*/
|
||||
private createProcessedFile(file: File, pages: PDFPage[], totalPages: number): ProcessedFile {
|
||||
return {
|
||||
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
||||
pages,
|
||||
totalPages,
|
||||
metadata: {
|
||||
title: file.name,
|
||||
createdAt: new Date().toISOString(),
|
||||
modifiedAt: new Date().toISOString()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate a unique, collision-resistant cache key
|
||||
*/
|
||||
private async generateFileKey(file: File): Promise<string> {
|
||||
return await FileHasher.generateHybridHash(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel processing for a specific file
|
||||
*/
|
||||
cancelProcessing(fileKey: string): void {
|
||||
const state = this.processing.get(fileKey);
|
||||
if (state && state.cancellationToken) {
|
||||
state.cancellationToken.abort();
|
||||
state.status = 'cancelled';
|
||||
this.notifyListeners();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update processing metrics
|
||||
*/
|
||||
private updateMetrics(event: 'started' | 'completed' | 'failed' | 'cacheHit', processingTime?: number): void {
|
||||
switch (event) {
|
||||
case 'started':
|
||||
this.metrics.totalFiles++;
|
||||
break;
|
||||
case 'completed':
|
||||
this.metrics.completedFiles++;
|
||||
if (processingTime) {
|
||||
// Update rolling average
|
||||
const totalProcessingTime = this.metrics.averageProcessingTime * (this.metrics.completedFiles - 1) + processingTime;
|
||||
this.metrics.averageProcessingTime = totalProcessingTime / this.metrics.completedFiles;
|
||||
}
|
||||
break;
|
||||
case 'failed':
|
||||
this.metrics.failedFiles++;
|
||||
break;
|
||||
case 'cacheHit':
|
||||
// Update cache hit rate
|
||||
const totalAttempts = this.metrics.totalFiles + 1;
|
||||
this.metrics.cacheHitRate = (this.metrics.cacheHitRate * this.metrics.totalFiles + 1) / totalAttempts;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get processing metrics
|
||||
*/
|
||||
getMetrics(): ProcessingMetrics {
|
||||
return { ...this.metrics };
|
||||
}
|
||||
|
||||
/**
|
||||
* State subscription for components
|
||||
*/
|
||||
onProcessingChange(callback: (states: Map<string, ProcessingState>) => void): () => void {
|
||||
this.processingListeners.add(callback);
|
||||
return () => this.processingListeners.delete(callback);
|
||||
}
|
||||
|
||||
getProcessingStates(): Map<string, ProcessingState> {
|
||||
return new Map(this.processing);
|
||||
}
|
||||
|
||||
private notifyListeners(): void {
|
||||
this.processingListeners.forEach(callback => callback(this.processing));
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup method for removed files
|
||||
*/
|
||||
cleanup(removedFiles: File[]): void {
|
||||
removedFiles.forEach(async (file) => {
|
||||
const key = await this.generateFileKey(file);
|
||||
this.cache.delete(key);
|
||||
this.cancelProcessing(key);
|
||||
this.processing.delete(key);
|
||||
});
|
||||
this.notifyListeners();
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all processing for view switches
|
||||
*/
|
||||
clearAllProcessing(): void {
|
||||
// Cancel all ongoing processing
|
||||
this.processing.forEach((state, key) => {
|
||||
if (state.cancellationToken) {
|
||||
state.cancellationToken.abort();
|
||||
}
|
||||
});
|
||||
|
||||
// Clear processing states
|
||||
this.processing.clear();
|
||||
this.notifyListeners();
|
||||
|
||||
// Force memory cleanup hint
|
||||
if (typeof window !== 'undefined' && window.gc) {
|
||||
setTimeout(() => window.gc(), 100);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cache statistics
|
||||
*/
|
||||
getCacheStats() {
|
||||
return this.cache.getStats();
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all cache and processing
|
||||
*/
|
||||
clearAll(): void {
|
||||
this.cache.clear();
|
||||
this.processing.clear();
|
||||
this.notifyListeners();
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
export const enhancedPDFProcessingService = EnhancedPDFProcessingService.getInstance();
|
||||
240
frontend/src/services/fileAnalyzer.ts
Normal file
240
frontend/src/services/fileAnalyzer.ts
Normal file
@@ -0,0 +1,240 @@
|
||||
import { getDocument } from 'pdfjs-dist';
|
||||
import { FileAnalysis, ProcessingStrategy } from '../types/processing';
|
||||
|
||||
export class FileAnalyzer {
|
||||
private static readonly SIZE_THRESHOLDS = {
|
||||
SMALL: 10 * 1024 * 1024, // 10MB
|
||||
MEDIUM: 50 * 1024 * 1024, // 50MB
|
||||
LARGE: 200 * 1024 * 1024, // 200MB
|
||||
};
|
||||
|
||||
private static readonly PAGE_THRESHOLDS = {
|
||||
FEW: 10, // < 10 pages - immediate full processing
|
||||
MANY: 50, // < 50 pages - priority pages
|
||||
MASSIVE: 100, // < 100 pages - progressive chunked
|
||||
// >100 pages = metadata only
|
||||
};
|
||||
|
||||
/**
|
||||
* Analyze a file to determine optimal processing strategy
|
||||
*/
|
||||
static async analyzeFile(file: File): Promise<FileAnalysis> {
|
||||
const analysis: FileAnalysis = {
|
||||
fileSize: file.size,
|
||||
isEncrypted: false,
|
||||
isCorrupted: false,
|
||||
recommendedStrategy: 'metadata_only',
|
||||
estimatedProcessingTime: 0,
|
||||
};
|
||||
|
||||
try {
|
||||
// Quick validation and page count estimation
|
||||
const quickAnalysis = await this.quickPDFAnalysis(file);
|
||||
analysis.estimatedPageCount = quickAnalysis.pageCount;
|
||||
analysis.isEncrypted = quickAnalysis.isEncrypted;
|
||||
analysis.isCorrupted = quickAnalysis.isCorrupted;
|
||||
|
||||
// Determine strategy based on file characteristics
|
||||
analysis.recommendedStrategy = this.determineStrategy(file.size, quickAnalysis.pageCount);
|
||||
|
||||
// Estimate processing time
|
||||
analysis.estimatedProcessingTime = this.estimateProcessingTime(
|
||||
file.size,
|
||||
quickAnalysis.pageCount,
|
||||
analysis.recommendedStrategy
|
||||
);
|
||||
|
||||
} catch (error) {
|
||||
console.error('File analysis failed:', error);
|
||||
analysis.isCorrupted = true;
|
||||
analysis.recommendedStrategy = 'metadata_only';
|
||||
}
|
||||
|
||||
return analysis;
|
||||
}
|
||||
|
||||
/**
|
||||
* Quick PDF analysis without full processing
|
||||
*/
|
||||
private static async quickPDFAnalysis(file: File): Promise<{
|
||||
pageCount: number;
|
||||
isEncrypted: boolean;
|
||||
isCorrupted: boolean;
|
||||
}> {
|
||||
try {
|
||||
// For small files, read the whole file
|
||||
// For large files, try the whole file first (PDF.js needs the complete structure)
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
|
||||
const pdf = await getDocument({
|
||||
data: arrayBuffer,
|
||||
stopAtErrors: false, // Don't stop at minor errors
|
||||
verbosity: 0 // Suppress PDF.js warnings
|
||||
}).promise;
|
||||
|
||||
const pageCount = pdf.numPages;
|
||||
const isEncrypted = pdf.isEncrypted;
|
||||
|
||||
// Clean up
|
||||
pdf.destroy();
|
||||
|
||||
return {
|
||||
pageCount,
|
||||
isEncrypted,
|
||||
isCorrupted: false
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
// Try to determine if it's corruption vs encryption
|
||||
const errorMessage = error instanceof Error ? error.message.toLowerCase() : '';
|
||||
const isEncrypted = errorMessage.includes('password') || errorMessage.includes('encrypted');
|
||||
|
||||
return {
|
||||
pageCount: 0,
|
||||
isEncrypted,
|
||||
isCorrupted: !isEncrypted // If not encrypted, probably corrupted
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the best processing strategy based on file characteristics
|
||||
*/
|
||||
private static determineStrategy(fileSize: number, pageCount?: number): ProcessingStrategy {
|
||||
// Handle corrupted or encrypted files
|
||||
if (!pageCount || pageCount === 0) {
|
||||
return 'metadata_only';
|
||||
}
|
||||
|
||||
// Small files with few pages - process everything immediately
|
||||
if (fileSize <= this.SIZE_THRESHOLDS.SMALL && pageCount <= this.PAGE_THRESHOLDS.FEW) {
|
||||
return 'immediate_full';
|
||||
}
|
||||
|
||||
// Medium files or many pages - priority pages first, then progressive
|
||||
if (fileSize <= this.SIZE_THRESHOLDS.MEDIUM && pageCount <= this.PAGE_THRESHOLDS.MANY) {
|
||||
return 'priority_pages';
|
||||
}
|
||||
|
||||
// Large files or massive page counts - chunked processing
|
||||
if (fileSize <= this.SIZE_THRESHOLDS.LARGE && pageCount <= this.PAGE_THRESHOLDS.MASSIVE) {
|
||||
return 'progressive_chunked';
|
||||
}
|
||||
|
||||
// Very large files - metadata only
|
||||
return 'metadata_only';
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate processing time based on file characteristics and strategy
|
||||
*/
|
||||
private static estimateProcessingTime(
|
||||
fileSize: number,
|
||||
pageCount: number = 0,
|
||||
strategy: ProcessingStrategy
|
||||
): number {
|
||||
const baseTimes = {
|
||||
immediate_full: 200, // 200ms per page
|
||||
priority_pages: 150, // 150ms per page (optimized)
|
||||
progressive_chunked: 100, // 100ms per page (chunked)
|
||||
metadata_only: 50 // 50ms total
|
||||
};
|
||||
|
||||
const baseTime = baseTimes[strategy];
|
||||
|
||||
switch (strategy) {
|
||||
case 'metadata_only':
|
||||
return baseTime;
|
||||
|
||||
case 'immediate_full':
|
||||
return pageCount * baseTime;
|
||||
|
||||
case 'priority_pages':
|
||||
// Estimate time for priority pages (first 10)
|
||||
const priorityPages = Math.min(pageCount, 10);
|
||||
return priorityPages * baseTime;
|
||||
|
||||
case 'progressive_chunked':
|
||||
// Estimate time for first chunk (20 pages)
|
||||
const firstChunk = Math.min(pageCount, 20);
|
||||
return firstChunk * baseTime;
|
||||
|
||||
default:
|
||||
return pageCount * baseTime;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get processing recommendations for a set of files
|
||||
*/
|
||||
static async analyzeMultipleFiles(files: File[]): Promise<{
|
||||
analyses: Map<File, FileAnalysis>;
|
||||
recommendations: {
|
||||
totalEstimatedTime: number;
|
||||
suggestedBatchSize: number;
|
||||
shouldUseWebWorker: boolean;
|
||||
memoryWarning: boolean;
|
||||
};
|
||||
}> {
|
||||
const analyses = new Map<File, FileAnalysis>();
|
||||
let totalEstimatedTime = 0;
|
||||
let totalSize = 0;
|
||||
let totalPages = 0;
|
||||
|
||||
// Analyze each file
|
||||
for (const file of files) {
|
||||
const analysis = await this.analyzeFile(file);
|
||||
analyses.set(file, analysis);
|
||||
totalEstimatedTime += analysis.estimatedProcessingTime;
|
||||
totalSize += file.size;
|
||||
totalPages += analysis.estimatedPageCount || 0;
|
||||
}
|
||||
|
||||
// Generate recommendations
|
||||
const recommendations = {
|
||||
totalEstimatedTime,
|
||||
suggestedBatchSize: this.calculateBatchSize(files.length, totalSize),
|
||||
shouldUseWebWorker: totalPages > 100 || totalSize > this.SIZE_THRESHOLDS.MEDIUM,
|
||||
memoryWarning: totalSize > this.SIZE_THRESHOLDS.LARGE || totalPages > this.PAGE_THRESHOLDS.MASSIVE
|
||||
};
|
||||
|
||||
return { analyses, recommendations };
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate optimal batch size for processing multiple files
|
||||
*/
|
||||
private static calculateBatchSize(fileCount: number, totalSize: number): number {
|
||||
// Process small batches for large total sizes
|
||||
if (totalSize > this.SIZE_THRESHOLDS.LARGE) {
|
||||
return Math.max(1, Math.floor(fileCount / 4));
|
||||
}
|
||||
|
||||
if (totalSize > this.SIZE_THRESHOLDS.MEDIUM) {
|
||||
return Math.max(2, Math.floor(fileCount / 2));
|
||||
}
|
||||
|
||||
// Process all at once for smaller total sizes
|
||||
return fileCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a file appears to be a valid PDF
|
||||
*/
|
||||
static async isValidPDF(file: File): Promise<boolean> {
|
||||
if (file.type !== 'application/pdf' && !file.name.toLowerCase().endsWith('.pdf')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
// Read first few bytes to check PDF header
|
||||
const header = file.slice(0, 8);
|
||||
const headerBytes = new Uint8Array(await header.arrayBuffer());
|
||||
const headerString = String.fromCharCode(...headerBytes);
|
||||
|
||||
return headerString.startsWith('%PDF-');
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -12,12 +12,12 @@ export class PDFExportService {
|
||||
* Export PDF document with applied operations
|
||||
*/
|
||||
async exportPDF(
|
||||
pdfDocument: PDFDocument,
|
||||
pdfDocument: PDFDocument,
|
||||
selectedPageIds: string[] = [],
|
||||
options: ExportOptions = {}
|
||||
): Promise<{ blob: Blob; filename: string } | { blobs: Blob[]; filenames: string[] }> {
|
||||
const { selectedOnly = false, filename, splitDocuments = false } = options;
|
||||
|
||||
|
||||
try {
|
||||
// Determine which pages to export
|
||||
const pagesToExport = selectedOnly && selectedPageIds.length > 0
|
||||
@@ -57,16 +57,16 @@ export class PDFExportService {
|
||||
for (const page of pages) {
|
||||
// Get the original page from source document
|
||||
const sourcePageIndex = page.pageNumber - 1;
|
||||
|
||||
|
||||
if (sourcePageIndex >= 0 && sourcePageIndex < sourceDoc.getPageCount()) {
|
||||
// Copy the page
|
||||
const [copiedPage] = await newDoc.copyPages(sourceDoc, [sourcePageIndex]);
|
||||
|
||||
|
||||
// Apply rotation
|
||||
if (page.rotation !== 0) {
|
||||
copiedPage.setRotation(degrees(page.rotation));
|
||||
}
|
||||
|
||||
|
||||
newDoc.addPage(copiedPage);
|
||||
}
|
||||
}
|
||||
@@ -108,20 +108,20 @@ export class PDFExportService {
|
||||
|
||||
for (const endIndex of splitPoints) {
|
||||
const segmentPages = pages.slice(startIndex, endIndex);
|
||||
|
||||
|
||||
if (segmentPages.length > 0) {
|
||||
const newDoc = await PDFLibDocument.create();
|
||||
|
||||
|
||||
for (const page of segmentPages) {
|
||||
const sourcePageIndex = page.pageNumber - 1;
|
||||
|
||||
|
||||
if (sourcePageIndex >= 0 && sourcePageIndex < sourceDoc.getPageCount()) {
|
||||
const [copiedPage] = await newDoc.copyPages(sourceDoc, [sourcePageIndex]);
|
||||
|
||||
|
||||
if (page.rotation !== 0) {
|
||||
copiedPage.setRotation(degrees(page.rotation));
|
||||
}
|
||||
|
||||
|
||||
newDoc.addPage(copiedPage);
|
||||
}
|
||||
}
|
||||
@@ -130,16 +130,16 @@ export class PDFExportService {
|
||||
newDoc.setCreator('Stirling PDF');
|
||||
newDoc.setProducer('Stirling PDF');
|
||||
newDoc.setTitle(`${baseFilename} - Part ${partNumber}`);
|
||||
|
||||
|
||||
const pdfBytes = await newDoc.save();
|
||||
const blob = new Blob([pdfBytes], { type: 'application/pdf' });
|
||||
const filename = this.generateSplitFilename(baseFilename, partNumber);
|
||||
|
||||
|
||||
blobs.push(blob);
|
||||
filenames.push(filename);
|
||||
partNumber++;
|
||||
}
|
||||
|
||||
|
||||
startIndex = endIndex;
|
||||
}
|
||||
|
||||
@@ -172,11 +172,11 @@ export class PDFExportService {
|
||||
link.href = url;
|
||||
link.download = filename;
|
||||
link.style.display = 'none';
|
||||
|
||||
|
||||
document.body.appendChild(link);
|
||||
link.click();
|
||||
document.body.removeChild(link);
|
||||
|
||||
|
||||
// Clean up the URL after a short delay
|
||||
setTimeout(() => URL.revokeObjectURL(url), 1000);
|
||||
}
|
||||
@@ -185,8 +185,7 @@ export class PDFExportService {
|
||||
* Download multiple files as a ZIP
|
||||
*/
|
||||
async downloadAsZip(blobs: Blob[], filenames: string[], zipFilename: string): Promise<void> {
|
||||
// For now, download files individually
|
||||
// TODO: Implement ZIP creation when needed
|
||||
// For now, download files wherindividually
|
||||
blobs.forEach((blob, index) => {
|
||||
setTimeout(() => {
|
||||
this.downloadFile(blob, filenames[index]);
|
||||
@@ -208,7 +207,7 @@ export class PDFExportService {
|
||||
errors.push('No pages available to export');
|
||||
}
|
||||
|
||||
const pagesToExport = selectedOnly
|
||||
const pagesToExport = selectedOnly
|
||||
? pdfDocument.pages.filter(page => selectedPageIds.includes(page.id))
|
||||
: pdfDocument.pages;
|
||||
|
||||
@@ -227,7 +226,7 @@ export class PDFExportService {
|
||||
splitCount: number;
|
||||
estimatedSize: string;
|
||||
} {
|
||||
const pagesToExport = selectedOnly
|
||||
const pagesToExport = selectedOnly
|
||||
? pdfDocument.pages.filter(page => selectedPageIds.includes(page.id))
|
||||
: pdfDocument.pages;
|
||||
|
||||
@@ -260,4 +259,4 @@ export class PDFExportService {
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
export const pdfExportService = new PDFExportService();
|
||||
export const pdfExportService = new PDFExportService();
|
||||
|
||||
188
frontend/src/services/pdfProcessingService.ts
Normal file
188
frontend/src/services/pdfProcessingService.ts
Normal file
@@ -0,0 +1,188 @@
|
||||
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
|
||||
import { ProcessedFile, ProcessingState, PDFPage } from '../types/processing';
|
||||
import { ProcessingCache } from './processingCache';
|
||||
|
||||
// Set up PDF.js worker
|
||||
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
|
||||
|
||||
export class PDFProcessingService {
|
||||
private static instance: PDFProcessingService;
|
||||
private cache = new ProcessingCache();
|
||||
private processing = new Map<string, ProcessingState>();
|
||||
private processingListeners = new Set<(states: Map<string, ProcessingState>) => void>();
|
||||
|
||||
private constructor() {}
|
||||
|
||||
static getInstance(): PDFProcessingService {
|
||||
if (!PDFProcessingService.instance) {
|
||||
PDFProcessingService.instance = new PDFProcessingService();
|
||||
}
|
||||
return PDFProcessingService.instance;
|
||||
}
|
||||
|
||||
async getProcessedFile(file: File): Promise<ProcessedFile | null> {
|
||||
const fileKey = this.generateFileKey(file);
|
||||
|
||||
// Check cache first
|
||||
const cached = this.cache.get(fileKey);
|
||||
if (cached) {
|
||||
console.log('Cache hit for:', file.name);
|
||||
return cached;
|
||||
}
|
||||
|
||||
// Check if already processing
|
||||
if (this.processing.has(fileKey)) {
|
||||
console.log('Already processing:', file.name);
|
||||
return null; // Will be available when processing completes
|
||||
}
|
||||
|
||||
// Start processing
|
||||
this.startProcessing(file, fileKey);
|
||||
return null;
|
||||
}
|
||||
|
||||
private async startProcessing(file: File, fileKey: string): Promise<void> {
|
||||
// Set initial state
|
||||
const state: ProcessingState = {
|
||||
fileKey,
|
||||
fileName: file.name,
|
||||
status: 'processing',
|
||||
progress: 0,
|
||||
startedAt: Date.now()
|
||||
};
|
||||
|
||||
this.processing.set(fileKey, state);
|
||||
this.notifyListeners();
|
||||
|
||||
try {
|
||||
// Process the file with progress updates
|
||||
const processedFile = await this.processFileWithProgress(file, (progress) => {
|
||||
state.progress = progress;
|
||||
this.notifyListeners();
|
||||
});
|
||||
|
||||
// Cache the result
|
||||
this.cache.set(fileKey, processedFile);
|
||||
|
||||
// Update state to completed
|
||||
state.status = 'completed';
|
||||
state.progress = 100;
|
||||
state.completedAt = Date.now();
|
||||
this.notifyListeners();
|
||||
|
||||
// Remove from processing map after brief delay
|
||||
setTimeout(() => {
|
||||
this.processing.delete(fileKey);
|
||||
this.notifyListeners();
|
||||
}, 2000);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Processing failed for', file.name, ':', error);
|
||||
state.status = 'error';
|
||||
state.error = error instanceof Error ? error.message : 'Unknown error';
|
||||
this.notifyListeners();
|
||||
|
||||
// Remove failed processing after delay
|
||||
setTimeout(() => {
|
||||
this.processing.delete(fileKey);
|
||||
this.notifyListeners();
|
||||
}, 5000);
|
||||
}
|
||||
}
|
||||
|
||||
private async processFileWithProgress(
|
||||
file: File,
|
||||
onProgress: (progress: number) => void
|
||||
): Promise<ProcessedFile> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const totalPages = pdf.numPages;
|
||||
|
||||
onProgress(10); // PDF loaded
|
||||
|
||||
const pages: PDFPage[] = [];
|
||||
|
||||
for (let i = 1; i <= totalPages; i++) {
|
||||
const page = await pdf.getPage(i);
|
||||
const viewport = page.getViewport({ scale: 0.5 });
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
|
||||
const context = canvas.getContext('2d');
|
||||
if (context) {
|
||||
await page.render({ canvasContext: context, viewport }).promise;
|
||||
const thumbnail = canvas.toDataURL();
|
||||
|
||||
pages.push({
|
||||
id: `${file.name}-page-${i}`,
|
||||
pageNumber: i,
|
||||
thumbnail,
|
||||
rotation: 0,
|
||||
selected: false
|
||||
});
|
||||
}
|
||||
|
||||
// Update progress
|
||||
const progress = 10 + (i / totalPages) * 85; // 10-95%
|
||||
onProgress(progress);
|
||||
}
|
||||
|
||||
pdf.destroy();
|
||||
onProgress(100);
|
||||
|
||||
return {
|
||||
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
||||
pages,
|
||||
totalPages,
|
||||
metadata: {
|
||||
title: file.name,
|
||||
createdAt: new Date().toISOString(),
|
||||
modifiedAt: new Date().toISOString()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// State subscription for components
|
||||
onProcessingChange(callback: (states: Map<string, ProcessingState>) => void): () => void {
|
||||
this.processingListeners.add(callback);
|
||||
return () => this.processingListeners.delete(callback);
|
||||
}
|
||||
|
||||
getProcessingStates(): Map<string, ProcessingState> {
|
||||
return new Map(this.processing);
|
||||
}
|
||||
|
||||
private notifyListeners(): void {
|
||||
this.processingListeners.forEach(callback => callback(this.processing));
|
||||
}
|
||||
|
||||
generateFileKey(file: File): string {
|
||||
return `${file.name}-${file.size}-${file.lastModified}`;
|
||||
}
|
||||
|
||||
// Cleanup method for activeFiles changes
|
||||
cleanup(removedFiles: File[]): void {
|
||||
removedFiles.forEach(file => {
|
||||
const key = this.generateFileKey(file);
|
||||
this.cache.delete(key);
|
||||
this.processing.delete(key);
|
||||
});
|
||||
this.notifyListeners();
|
||||
}
|
||||
|
||||
// Get cache stats (for debugging)
|
||||
getCacheStats() {
|
||||
return this.cache.getStats();
|
||||
}
|
||||
|
||||
// Clear all cache and processing
|
||||
clearAll(): void {
|
||||
this.cache.clear();
|
||||
this.processing.clear();
|
||||
this.notifyListeners();
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
export const pdfProcessingService = PDFProcessingService.getInstance();
|
||||
138
frontend/src/services/processingCache.ts
Normal file
138
frontend/src/services/processingCache.ts
Normal file
@@ -0,0 +1,138 @@
|
||||
import { ProcessedFile, CacheConfig, CacheEntry, CacheStats } from '../types/processing';
|
||||
|
||||
export class ProcessingCache {
|
||||
private cache = new Map<string, CacheEntry>();
|
||||
private totalSize = 0;
|
||||
|
||||
constructor(private config: CacheConfig = {
|
||||
maxFiles: 20,
|
||||
maxSizeBytes: 2 * 1024 * 1024 * 1024, // 2GB
|
||||
ttlMs: 30 * 60 * 1000 // 30 minutes
|
||||
}) {}
|
||||
|
||||
set(key: string, data: ProcessedFile): void {
|
||||
// Remove expired entries first
|
||||
this.cleanup();
|
||||
|
||||
// Calculate entry size (rough estimate)
|
||||
const size = this.calculateSize(data);
|
||||
|
||||
// Make room if needed
|
||||
this.makeRoom(size);
|
||||
|
||||
this.cache.set(key, {
|
||||
data,
|
||||
size,
|
||||
lastAccessed: Date.now(),
|
||||
createdAt: Date.now()
|
||||
});
|
||||
|
||||
this.totalSize += size;
|
||||
}
|
||||
|
||||
get(key: string): ProcessedFile | null {
|
||||
const entry = this.cache.get(key);
|
||||
if (!entry) return null;
|
||||
|
||||
// Check TTL
|
||||
if (Date.now() - entry.createdAt > this.config.ttlMs) {
|
||||
this.delete(key);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Update last accessed
|
||||
entry.lastAccessed = Date.now();
|
||||
return entry.data;
|
||||
}
|
||||
|
||||
has(key: string): boolean {
|
||||
const entry = this.cache.get(key);
|
||||
if (!entry) return false;
|
||||
|
||||
// Check TTL
|
||||
if (Date.now() - entry.createdAt > this.config.ttlMs) {
|
||||
this.delete(key);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private makeRoom(neededSize: number): void {
|
||||
// Remove oldest entries until we have space
|
||||
while (
|
||||
this.cache.size >= this.config.maxFiles ||
|
||||
this.totalSize + neededSize > this.config.maxSizeBytes
|
||||
) {
|
||||
const oldestKey = this.findOldestEntry();
|
||||
if (oldestKey) {
|
||||
this.delete(oldestKey);
|
||||
} else break;
|
||||
}
|
||||
}
|
||||
|
||||
private findOldestEntry(): string | null {
|
||||
let oldest: { key: string; lastAccessed: number } | null = null;
|
||||
|
||||
for (const [key, entry] of this.cache) {
|
||||
if (!oldest || entry.lastAccessed < oldest.lastAccessed) {
|
||||
oldest = { key, lastAccessed: entry.lastAccessed };
|
||||
}
|
||||
}
|
||||
|
||||
return oldest?.key || null;
|
||||
}
|
||||
|
||||
private cleanup(): void {
|
||||
const now = Date.now();
|
||||
for (const [key, entry] of this.cache) {
|
||||
if (now - entry.createdAt > this.config.ttlMs) {
|
||||
this.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private calculateSize(data: ProcessedFile): number {
|
||||
// Rough size estimation
|
||||
let size = 0;
|
||||
|
||||
// Estimate size of thumbnails (main memory consumer)
|
||||
data.pages.forEach(page => {
|
||||
if (page.thumbnail) {
|
||||
// Base64 thumbnail is roughly 50KB each
|
||||
size += 50 * 1024;
|
||||
}
|
||||
});
|
||||
|
||||
// Add some overhead for other data
|
||||
size += 10 * 1024; // 10KB overhead
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
delete(key: string): void {
|
||||
const entry = this.cache.get(key);
|
||||
if (entry) {
|
||||
this.totalSize -= entry.size;
|
||||
this.cache.delete(key);
|
||||
}
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.cache.clear();
|
||||
this.totalSize = 0;
|
||||
}
|
||||
|
||||
getStats(): CacheStats {
|
||||
return {
|
||||
entries: this.cache.size,
|
||||
totalSizeBytes: this.totalSize,
|
||||
maxSizeBytes: this.config.maxSizeBytes
|
||||
};
|
||||
}
|
||||
|
||||
// Get all cached keys (for debugging and cleanup)
|
||||
getKeys(): string[] {
|
||||
return Array.from(this.cache.keys());
|
||||
}
|
||||
}
|
||||
282
frontend/src/services/processingErrorHandler.ts
Normal file
282
frontend/src/services/processingErrorHandler.ts
Normal file
@@ -0,0 +1,282 @@
|
||||
import { ProcessingError } from '../types/processing';
|
||||
|
||||
export class ProcessingErrorHandler {
|
||||
private static readonly DEFAULT_MAX_RETRIES = 3;
|
||||
private static readonly RETRY_DELAYS = [1000, 2000, 4000]; // Progressive backoff in ms
|
||||
|
||||
/**
|
||||
* Create a ProcessingError from an unknown error
|
||||
*/
|
||||
static createProcessingError(
|
||||
error: unknown,
|
||||
retryCount: number = 0,
|
||||
maxRetries: number = this.DEFAULT_MAX_RETRIES
|
||||
): ProcessingError {
|
||||
const originalError = error instanceof Error ? error : new Error(String(error));
|
||||
const message = originalError.message;
|
||||
|
||||
// Determine error type based on error message and properties
|
||||
const errorType = this.determineErrorType(originalError, message);
|
||||
|
||||
// Determine if error is recoverable
|
||||
const recoverable = this.isRecoverable(errorType, retryCount, maxRetries);
|
||||
|
||||
return {
|
||||
type: errorType,
|
||||
message: this.formatErrorMessage(errorType, message),
|
||||
recoverable,
|
||||
retryCount,
|
||||
maxRetries,
|
||||
originalError
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the type of error based on error characteristics
|
||||
*/
|
||||
private static determineErrorType(error: Error, message: string): ProcessingError['type'] {
|
||||
const lowerMessage = message.toLowerCase();
|
||||
|
||||
// Network-related errors
|
||||
if (lowerMessage.includes('network') ||
|
||||
lowerMessage.includes('fetch') ||
|
||||
lowerMessage.includes('connection')) {
|
||||
return 'network';
|
||||
}
|
||||
|
||||
// Memory-related errors
|
||||
if (lowerMessage.includes('memory') ||
|
||||
lowerMessage.includes('quota') ||
|
||||
lowerMessage.includes('allocation') ||
|
||||
error.name === 'QuotaExceededError') {
|
||||
return 'memory';
|
||||
}
|
||||
|
||||
// Timeout errors
|
||||
if (lowerMessage.includes('timeout') ||
|
||||
lowerMessage.includes('aborted') ||
|
||||
error.name === 'AbortError') {
|
||||
return 'timeout';
|
||||
}
|
||||
|
||||
// Cancellation
|
||||
if (lowerMessage.includes('cancel') ||
|
||||
lowerMessage.includes('abort') ||
|
||||
error.name === 'AbortError') {
|
||||
return 'cancelled';
|
||||
}
|
||||
|
||||
// PDF corruption/parsing errors
|
||||
if (lowerMessage.includes('pdf') ||
|
||||
lowerMessage.includes('parse') ||
|
||||
lowerMessage.includes('invalid') ||
|
||||
lowerMessage.includes('corrupt') ||
|
||||
lowerMessage.includes('malformed')) {
|
||||
return 'corruption';
|
||||
}
|
||||
|
||||
// Default to parsing error
|
||||
return 'parsing';
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if an error is recoverable based on type and retry count
|
||||
*/
|
||||
private static isRecoverable(
|
||||
errorType: ProcessingError['type'],
|
||||
retryCount: number,
|
||||
maxRetries: number
|
||||
): boolean {
|
||||
// Never recoverable
|
||||
if (errorType === 'cancelled' || errorType === 'corruption') {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Recoverable if we haven't exceeded retry count
|
||||
if (retryCount >= maxRetries) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Memory errors are usually not recoverable
|
||||
if (errorType === 'memory') {
|
||||
return retryCount < 1; // Only one retry for memory errors
|
||||
}
|
||||
|
||||
// Network and timeout errors are usually recoverable
|
||||
return errorType === 'network' || errorType === 'timeout' || errorType === 'parsing';
|
||||
}
|
||||
|
||||
/**
|
||||
* Format error message for user display
|
||||
*/
|
||||
private static formatErrorMessage(errorType: ProcessingError['type'], originalMessage: string): string {
|
||||
switch (errorType) {
|
||||
case 'network':
|
||||
return 'Network connection failed. Please check your internet connection and try again.';
|
||||
|
||||
case 'memory':
|
||||
return 'Insufficient memory to process this file. Try closing other applications or processing a smaller file.';
|
||||
|
||||
case 'timeout':
|
||||
return 'Processing timed out. This file may be too large or complex to process.';
|
||||
|
||||
case 'cancelled':
|
||||
return 'Processing was cancelled by user.';
|
||||
|
||||
case 'corruption':
|
||||
return 'This PDF file appears to be corrupted or encrypted. Please try a different file.';
|
||||
|
||||
case 'parsing':
|
||||
return `Failed to process PDF: ${originalMessage}`;
|
||||
|
||||
default:
|
||||
return `Processing failed: ${originalMessage}`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute an operation with automatic retry logic
|
||||
*/
|
||||
static async executeWithRetry<T>(
|
||||
operation: () => Promise<T>,
|
||||
onError?: (error: ProcessingError) => void,
|
||||
maxRetries: number = this.DEFAULT_MAX_RETRIES
|
||||
): Promise<T> {
|
||||
let lastError: ProcessingError | null = null;
|
||||
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
return await operation();
|
||||
} catch (error) {
|
||||
lastError = this.createProcessingError(error, attempt, maxRetries);
|
||||
|
||||
// Notify error handler
|
||||
if (onError) {
|
||||
onError(lastError);
|
||||
}
|
||||
|
||||
// Don't retry if not recoverable
|
||||
if (!lastError.recoverable) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Don't retry on last attempt
|
||||
if (attempt === maxRetries) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Wait before retry with progressive backoff
|
||||
const delay = this.RETRY_DELAYS[Math.min(attempt, this.RETRY_DELAYS.length - 1)];
|
||||
await this.delay(delay);
|
||||
|
||||
console.log(`Retrying operation (attempt ${attempt + 2}/${maxRetries + 1}) after ${delay}ms delay`);
|
||||
}
|
||||
}
|
||||
|
||||
// All retries exhausted
|
||||
throw lastError || new Error('Operation failed after all retries');
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a timeout wrapper for operations
|
||||
*/
|
||||
static withTimeout<T>(
|
||||
operation: () => Promise<T>,
|
||||
timeoutMs: number,
|
||||
timeoutMessage: string = 'Operation timed out'
|
||||
): Promise<T> {
|
||||
return new Promise<T>((resolve, reject) => {
|
||||
const timeoutId = setTimeout(() => {
|
||||
reject(new Error(timeoutMessage));
|
||||
}, timeoutMs);
|
||||
|
||||
operation()
|
||||
.then(result => {
|
||||
clearTimeout(timeoutId);
|
||||
resolve(result);
|
||||
})
|
||||
.catch(error => {
|
||||
clearTimeout(timeoutId);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an AbortController that times out after specified duration
|
||||
*/
|
||||
static createTimeoutController(timeoutMs: number): AbortController {
|
||||
const controller = new AbortController();
|
||||
|
||||
setTimeout(() => {
|
||||
controller.abort();
|
||||
}, timeoutMs);
|
||||
|
||||
return controller;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an error indicates the operation should be retried
|
||||
*/
|
||||
static shouldRetry(error: ProcessingError): boolean {
|
||||
return error.recoverable && error.retryCount < error.maxRetries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get user-friendly suggestions based on error type
|
||||
*/
|
||||
static getErrorSuggestions(error: ProcessingError): string[] {
|
||||
switch (error.type) {
|
||||
case 'network':
|
||||
return [
|
||||
'Check your internet connection',
|
||||
'Try refreshing the page',
|
||||
'Try again in a few moments'
|
||||
];
|
||||
|
||||
case 'memory':
|
||||
return [
|
||||
'Close other browser tabs or applications',
|
||||
'Try processing a smaller file',
|
||||
'Restart your browser',
|
||||
'Use a device with more memory'
|
||||
];
|
||||
|
||||
case 'timeout':
|
||||
return [
|
||||
'Try processing a smaller file',
|
||||
'Break large files into smaller sections',
|
||||
'Check your internet connection speed'
|
||||
];
|
||||
|
||||
case 'corruption':
|
||||
return [
|
||||
'Verify the PDF file opens in other applications',
|
||||
'Try re-downloading the file',
|
||||
'Try a different PDF file',
|
||||
'Contact the file creator if it appears corrupted'
|
||||
];
|
||||
|
||||
case 'parsing':
|
||||
return [
|
||||
'Verify this is a valid PDF file',
|
||||
'Try a different PDF file',
|
||||
'Contact support if the problem persists'
|
||||
];
|
||||
|
||||
default:
|
||||
return [
|
||||
'Try refreshing the page',
|
||||
'Try again in a few moments',
|
||||
'Contact support if the problem persists'
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function for delays
|
||||
*/
|
||||
private static delay(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
||||
450
frontend/src/services/thumbnailGenerationService.ts
Normal file
450
frontend/src/services/thumbnailGenerationService.ts
Normal file
@@ -0,0 +1,450 @@
|
||||
/**
|
||||
* High-performance thumbnail generation service using Web Workers
|
||||
*/
|
||||
|
||||
interface ThumbnailResult {
|
||||
pageNumber: number;
|
||||
thumbnail: string;
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
interface ThumbnailGenerationOptions {
|
||||
scale?: number;
|
||||
quality?: number;
|
||||
batchSize?: number;
|
||||
parallelBatches?: number;
|
||||
}
|
||||
|
||||
interface CachedThumbnail {
|
||||
thumbnail: string;
|
||||
lastUsed: number;
|
||||
sizeBytes: number;
|
||||
}
|
||||
|
||||
export class ThumbnailGenerationService {
|
||||
private workers: Worker[] = [];
|
||||
private activeJobs = new Map<string, { resolve: Function; reject: Function; onProgress?: Function }>();
|
||||
private jobCounter = 0;
|
||||
private isGenerating = false;
|
||||
|
||||
// Session-based thumbnail cache
|
||||
private thumbnailCache = new Map<string, CachedThumbnail>();
|
||||
private maxCacheSizeBytes = 1024 * 1024 * 1024; // 1GB cache limit
|
||||
private currentCacheSize = 0;
|
||||
|
||||
constructor(private maxWorkers: number = 3) {
|
||||
this.initializeWorkers();
|
||||
}
|
||||
|
||||
private initializeWorkers(): void {
|
||||
const workerPromises: Promise<Worker | null>[] = [];
|
||||
|
||||
for (let i = 0; i < this.maxWorkers; i++) {
|
||||
const workerPromise = new Promise<Worker | null>((resolve) => {
|
||||
try {
|
||||
console.log(`Attempting to create worker ${i}...`);
|
||||
const worker = new Worker('/thumbnailWorker.js');
|
||||
let workerReady = false;
|
||||
let pingTimeout: NodeJS.Timeout;
|
||||
|
||||
worker.onmessage = (e) => {
|
||||
const { type, data, jobId } = e.data;
|
||||
|
||||
// Handle PONG response to confirm worker is ready
|
||||
if (type === 'PONG') {
|
||||
workerReady = true;
|
||||
clearTimeout(pingTimeout);
|
||||
console.log(`✓ Worker ${i} is ready and responsive`);
|
||||
resolve(worker);
|
||||
return;
|
||||
}
|
||||
|
||||
const job = this.activeJobs.get(jobId);
|
||||
if (!job) return;
|
||||
|
||||
switch (type) {
|
||||
case 'PROGRESS':
|
||||
if (job.onProgress) {
|
||||
job.onProgress(data);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'COMPLETE':
|
||||
job.resolve(data.thumbnails);
|
||||
this.activeJobs.delete(jobId);
|
||||
break;
|
||||
|
||||
case 'ERROR':
|
||||
job.reject(new Error(data.error));
|
||||
this.activeJobs.delete(jobId);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
worker.onerror = (error) => {
|
||||
console.error(`✗ Worker ${i} failed with error:`, error);
|
||||
clearTimeout(pingTimeout);
|
||||
worker.terminate();
|
||||
resolve(null);
|
||||
};
|
||||
|
||||
// Test worker with timeout
|
||||
pingTimeout = setTimeout(() => {
|
||||
if (!workerReady) {
|
||||
console.warn(`✗ Worker ${i} timed out (no PONG response)`);
|
||||
worker.terminate();
|
||||
resolve(null);
|
||||
}
|
||||
}, 3000); // Reduced timeout for faster feedback
|
||||
|
||||
// Send PING to test worker
|
||||
try {
|
||||
worker.postMessage({ type: 'PING' });
|
||||
} catch (pingError) {
|
||||
console.error(`✗ Failed to send PING to worker ${i}:`, pingError);
|
||||
clearTimeout(pingTimeout);
|
||||
worker.terminate();
|
||||
resolve(null);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error(`✗ Failed to create worker ${i}:`, error);
|
||||
resolve(null);
|
||||
}
|
||||
});
|
||||
|
||||
workerPromises.push(workerPromise);
|
||||
}
|
||||
|
||||
// Wait for all workers to initialize or fail
|
||||
Promise.all(workerPromises).then((workers) => {
|
||||
this.workers = workers.filter((w): w is Worker => w !== null);
|
||||
const successCount = this.workers.length;
|
||||
const failCount = this.maxWorkers - successCount;
|
||||
|
||||
console.log(`🔧 Worker initialization complete: ${successCount}/${this.maxWorkers} workers ready`);
|
||||
|
||||
if (failCount > 0) {
|
||||
console.warn(`⚠️ ${failCount} workers failed to initialize - will use main thread fallback`);
|
||||
}
|
||||
|
||||
if (successCount === 0) {
|
||||
console.warn('🚨 No Web Workers available - all thumbnail generation will use main thread');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate thumbnails for multiple pages using Web Workers
|
||||
*/
|
||||
async generateThumbnails(
|
||||
pdfArrayBuffer: ArrayBuffer,
|
||||
pageNumbers: number[],
|
||||
options: ThumbnailGenerationOptions = {},
|
||||
onProgress?: (progress: { completed: number; total: number; thumbnails: ThumbnailResult[] }) => void
|
||||
): Promise<ThumbnailResult[]> {
|
||||
if (this.isGenerating) {
|
||||
console.warn('🚨 ThumbnailService: Thumbnail generation already in progress, rejecting new request');
|
||||
throw new Error('Thumbnail generation already in progress');
|
||||
}
|
||||
|
||||
console.log(`🎬 ThumbnailService: Starting thumbnail generation for ${pageNumbers.length} pages`);
|
||||
this.isGenerating = true;
|
||||
|
||||
const {
|
||||
scale = 0.2,
|
||||
quality = 0.8,
|
||||
batchSize = 20, // Pages per worker
|
||||
parallelBatches = this.maxWorkers
|
||||
} = options;
|
||||
|
||||
try {
|
||||
// Check if workers are available, fallback to main thread if not
|
||||
if (this.workers.length === 0) {
|
||||
console.warn('No Web Workers available, falling back to main thread processing');
|
||||
return await this.generateThumbnailsMainThread(pdfArrayBuffer, pageNumbers, scale, quality, onProgress);
|
||||
}
|
||||
|
||||
// Split pages across workers
|
||||
const workerBatches = this.distributeWork(pageNumbers, this.workers.length);
|
||||
console.log(`🔧 ThumbnailService: Distributing ${pageNumbers.length} pages across ${this.workers.length} workers:`, workerBatches.map(batch => batch.length));
|
||||
const jobPromises: Promise<ThumbnailResult[]>[] = [];
|
||||
|
||||
for (let i = 0; i < workerBatches.length; i++) {
|
||||
const batch = workerBatches[i];
|
||||
if (batch.length === 0) continue;
|
||||
|
||||
const worker = this.workers[i % this.workers.length];
|
||||
const jobId = `job-${++this.jobCounter}`;
|
||||
console.log(`🔧 ThumbnailService: Sending job ${jobId} with ${batch.length} pages to worker ${i}:`, batch);
|
||||
|
||||
const promise = new Promise<ThumbnailResult[]>((resolve, reject) => {
|
||||
// Add timeout for worker jobs
|
||||
const timeout = setTimeout(() => {
|
||||
console.error(`⏰ ThumbnailService: Worker job ${jobId} timed out`);
|
||||
this.activeJobs.delete(jobId);
|
||||
reject(new Error(`Worker job ${jobId} timed out`));
|
||||
}, 60000); // 1 minute timeout
|
||||
|
||||
// Create job with timeout handling
|
||||
this.activeJobs.set(jobId, {
|
||||
resolve: (result: any) => {
|
||||
console.log(`✅ ThumbnailService: Job ${jobId} completed with ${result.length} thumbnails`);
|
||||
clearTimeout(timeout);
|
||||
resolve(result);
|
||||
},
|
||||
reject: (error: any) => {
|
||||
console.error(`❌ ThumbnailService: Job ${jobId} failed:`, error);
|
||||
clearTimeout(timeout);
|
||||
reject(error);
|
||||
},
|
||||
onProgress: onProgress ? (progressData: any) => {
|
||||
console.log(`📊 ThumbnailService: Job ${jobId} progress - ${progressData.completed}/${progressData.total} (${progressData.thumbnails.length} new)`);
|
||||
onProgress(progressData);
|
||||
} : undefined
|
||||
});
|
||||
|
||||
worker.postMessage({
|
||||
type: 'GENERATE_THUMBNAILS',
|
||||
jobId,
|
||||
data: {
|
||||
pdfArrayBuffer,
|
||||
pageNumbers: batch,
|
||||
scale,
|
||||
quality
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
jobPromises.push(promise);
|
||||
}
|
||||
|
||||
// Wait for all workers to complete
|
||||
const results = await Promise.all(jobPromises);
|
||||
|
||||
// Flatten and sort results by page number
|
||||
const allThumbnails = results.flat().sort((a, b) => a.pageNumber - b.pageNumber);
|
||||
console.log(`🎯 ThumbnailService: All workers completed, returning ${allThumbnails.length} thumbnails`);
|
||||
|
||||
return allThumbnails;
|
||||
|
||||
} catch (error) {
|
||||
console.error('Web Worker thumbnail generation failed, falling back to main thread:', error);
|
||||
return await this.generateThumbnailsMainThread(pdfArrayBuffer, pageNumbers, scale, quality, onProgress);
|
||||
} finally {
|
||||
console.log('🔄 ThumbnailService: Resetting isGenerating flag');
|
||||
this.isGenerating = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback thumbnail generation on main thread
|
||||
*/
|
||||
private async generateThumbnailsMainThread(
|
||||
pdfArrayBuffer: ArrayBuffer,
|
||||
pageNumbers: number[],
|
||||
scale: number,
|
||||
quality: number,
|
||||
onProgress?: (progress: { completed: number; total: number; thumbnails: ThumbnailResult[] }) => void
|
||||
): Promise<ThumbnailResult[]> {
|
||||
console.log(`🔧 ThumbnailService: Fallback to main thread for ${pageNumbers.length} pages`);
|
||||
|
||||
// Import PDF.js dynamically for main thread
|
||||
const { getDocument } = await import('pdfjs-dist');
|
||||
|
||||
// Load PDF once
|
||||
const pdf = await getDocument({ data: pdfArrayBuffer }).promise;
|
||||
console.log(`✓ ThumbnailService: PDF loaded on main thread`);
|
||||
|
||||
|
||||
const allResults: ThumbnailResult[] = [];
|
||||
let completed = 0;
|
||||
const batchSize = 5; // Small batches for UI responsiveness
|
||||
|
||||
// Process pages in small batches
|
||||
for (let i = 0; i < pageNumbers.length; i += batchSize) {
|
||||
const batch = pageNumbers.slice(i, i + batchSize);
|
||||
|
||||
// Process batch sequentially (to avoid canvas conflicts)
|
||||
for (const pageNumber of batch) {
|
||||
try {
|
||||
const page = await pdf.getPage(pageNumber);
|
||||
const viewport = page.getViewport({ scale });
|
||||
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
|
||||
const context = canvas.getContext('2d');
|
||||
if (!context) {
|
||||
throw new Error('Could not get canvas context');
|
||||
}
|
||||
|
||||
await page.render({ canvasContext: context, viewport }).promise;
|
||||
const thumbnail = canvas.toDataURL('image/jpeg', quality);
|
||||
|
||||
allResults.push({ pageNumber, thumbnail, success: true });
|
||||
|
||||
} catch (error) {
|
||||
console.error(`Failed to generate thumbnail for page ${pageNumber}:`, error);
|
||||
allResults.push({
|
||||
pageNumber,
|
||||
thumbnail: '',
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Unknown error'
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
completed += batch.length;
|
||||
|
||||
// Report progress
|
||||
if (onProgress) {
|
||||
onProgress({
|
||||
completed,
|
||||
total: pageNumbers.length,
|
||||
thumbnails: allResults.slice(-batch.length).filter(r => r.success)
|
||||
});
|
||||
}
|
||||
|
||||
// Small delay to keep UI responsive
|
||||
if (i + batchSize < pageNumbers.length) {
|
||||
await new Promise(resolve => setTimeout(resolve, 10));
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up
|
||||
pdf.destroy();
|
||||
|
||||
return allResults.filter(r => r.success);
|
||||
}
|
||||
|
||||
/**
|
||||
* Distribute work evenly across workers
|
||||
*/
|
||||
private distributeWork(pageNumbers: number[], numWorkers: number): number[][] {
|
||||
const batches: number[][] = Array(numWorkers).fill(null).map(() => []);
|
||||
|
||||
pageNumbers.forEach((pageNum, index) => {
|
||||
const workerIndex = index % numWorkers;
|
||||
batches[workerIndex].push(pageNum);
|
||||
});
|
||||
|
||||
return batches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a single thumbnail (fallback for individual pages)
|
||||
*/
|
||||
async generateSingleThumbnail(
|
||||
pdfArrayBuffer: ArrayBuffer,
|
||||
pageNumber: number,
|
||||
options: ThumbnailGenerationOptions = {}
|
||||
): Promise<string> {
|
||||
const results = await this.generateThumbnails(pdfArrayBuffer, [pageNumber], options);
|
||||
|
||||
if (results.length === 0 || !results[0].success) {
|
||||
throw new Error(`Failed to generate thumbnail for page ${pageNumber}`);
|
||||
}
|
||||
|
||||
return results[0].thumbnail;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add thumbnail to cache with size management
|
||||
*/
|
||||
addThumbnailToCache(pageId: string, thumbnail: string): void {
|
||||
const thumbnailSizeBytes = thumbnail.length * 0.75; // Rough base64 size estimate
|
||||
const now = Date.now();
|
||||
|
||||
// Add new thumbnail
|
||||
this.thumbnailCache.set(pageId, {
|
||||
thumbnail,
|
||||
lastUsed: now,
|
||||
sizeBytes: thumbnailSizeBytes
|
||||
});
|
||||
|
||||
this.currentCacheSize += thumbnailSizeBytes;
|
||||
|
||||
// If we exceed 1GB, trigger cleanup
|
||||
if (this.currentCacheSize > this.maxCacheSizeBytes) {
|
||||
this.cleanupThumbnailCache();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get thumbnail from cache and update last used timestamp
|
||||
*/
|
||||
getThumbnailFromCache(pageId: string): string | null {
|
||||
const cached = this.thumbnailCache.get(pageId);
|
||||
if (!cached) return null;
|
||||
|
||||
// Update last used timestamp
|
||||
cached.lastUsed = Date.now();
|
||||
|
||||
return cached.thumbnail;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up cache using LRU eviction
|
||||
*/
|
||||
private cleanupThumbnailCache(): void {
|
||||
const entries = Array.from(this.thumbnailCache.entries());
|
||||
|
||||
// Sort by last used (oldest first)
|
||||
entries.sort(([, a], [, b]) => a.lastUsed - b.lastUsed);
|
||||
|
||||
this.thumbnailCache.clear();
|
||||
this.currentCacheSize = 0;
|
||||
const targetSize = this.maxCacheSizeBytes * 0.8; // Clean to 80% of limit
|
||||
|
||||
// Keep most recently used entries until we hit target size
|
||||
for (let i = entries.length - 1; i >= 0 && this.currentCacheSize < targetSize; i--) {
|
||||
const [key, value] = entries[i];
|
||||
this.thumbnailCache.set(key, value);
|
||||
this.currentCacheSize += value.sizeBytes;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all cached thumbnails
|
||||
*/
|
||||
clearThumbnailCache(): void {
|
||||
this.thumbnailCache.clear();
|
||||
this.currentCacheSize = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cache statistics
|
||||
*/
|
||||
getCacheStats() {
|
||||
return {
|
||||
entries: this.thumbnailCache.size,
|
||||
totalSizeBytes: this.currentCacheSize,
|
||||
maxSizeBytes: this.maxCacheSizeBytes
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop generation but keep cache and workers alive
|
||||
*/
|
||||
stopGeneration(): void {
|
||||
this.activeJobs.clear();
|
||||
this.isGenerating = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Terminate all workers and clear cache (only on explicit cleanup)
|
||||
*/
|
||||
destroy(): void {
|
||||
this.workers.forEach(worker => worker.terminate());
|
||||
this.workers = [];
|
||||
this.activeJobs.clear();
|
||||
this.isGenerating = false;
|
||||
this.clearThumbnailCache();
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
export const thumbnailGenerationService = new ThumbnailGenerationService();
|
||||
300
frontend/src/services/zipFileService.ts
Normal file
300
frontend/src/services/zipFileService.ts
Normal file
@@ -0,0 +1,300 @@
|
||||
import JSZip from 'jszip';
|
||||
|
||||
export interface ZipExtractionResult {
|
||||
success: boolean;
|
||||
extractedFiles: File[];
|
||||
errors: string[];
|
||||
totalFiles: number;
|
||||
extractedCount: number;
|
||||
}
|
||||
|
||||
export interface ZipValidationResult {
|
||||
isValid: boolean;
|
||||
fileCount: number;
|
||||
totalSizeBytes: number;
|
||||
containsPDFs: boolean;
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
export interface ZipExtractionProgress {
|
||||
currentFile: string;
|
||||
extractedCount: number;
|
||||
totalFiles: number;
|
||||
progress: number;
|
||||
}
|
||||
|
||||
export class ZipFileService {
|
||||
private readonly maxFileSize = 100 * 1024 * 1024; // 100MB per file
|
||||
private readonly maxTotalSize = 500 * 1024 * 1024; // 500MB total extraction limit
|
||||
private readonly supportedExtensions = ['.pdf'];
|
||||
|
||||
/**
|
||||
* Validate a ZIP file without extracting it
|
||||
*/
|
||||
async validateZipFile(file: File): Promise<ZipValidationResult> {
|
||||
const result: ZipValidationResult = {
|
||||
isValid: false,
|
||||
fileCount: 0,
|
||||
totalSizeBytes: 0,
|
||||
containsPDFs: false,
|
||||
errors: []
|
||||
};
|
||||
|
||||
try {
|
||||
// Check file size
|
||||
if (file.size > this.maxTotalSize) {
|
||||
result.errors.push(`ZIP file too large: ${this.formatFileSize(file.size)} (max: ${this.formatFileSize(this.maxTotalSize)})`);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Check file type
|
||||
if (!this.isZipFile(file)) {
|
||||
result.errors.push('File is not a valid ZIP archive');
|
||||
return result;
|
||||
}
|
||||
|
||||
// Load and validate ZIP contents
|
||||
const zip = new JSZip();
|
||||
const zipContents = await zip.loadAsync(file);
|
||||
|
||||
let totalSize = 0;
|
||||
let fileCount = 0;
|
||||
let containsPDFs = false;
|
||||
|
||||
// Analyze ZIP contents
|
||||
for (const [filename, zipEntry] of Object.entries(zipContents.files)) {
|
||||
if (zipEntry.dir) {
|
||||
continue; // Skip directories
|
||||
}
|
||||
|
||||
fileCount++;
|
||||
const uncompressedSize = zipEntry._data?.uncompressedSize || 0;
|
||||
totalSize += uncompressedSize;
|
||||
|
||||
// Check if file is a PDF
|
||||
if (this.isPdfFile(filename)) {
|
||||
containsPDFs = true;
|
||||
}
|
||||
|
||||
// Check individual file size
|
||||
if (uncompressedSize > this.maxFileSize) {
|
||||
result.errors.push(`File "${filename}" too large: ${this.formatFileSize(uncompressedSize)} (max: ${this.formatFileSize(this.maxFileSize)})`);
|
||||
}
|
||||
}
|
||||
|
||||
// Check total uncompressed size
|
||||
if (totalSize > this.maxTotalSize) {
|
||||
result.errors.push(`Total uncompressed size too large: ${this.formatFileSize(totalSize)} (max: ${this.formatFileSize(this.maxTotalSize)})`);
|
||||
}
|
||||
|
||||
result.fileCount = fileCount;
|
||||
result.totalSizeBytes = totalSize;
|
||||
result.containsPDFs = containsPDFs;
|
||||
result.isValid = result.errors.length === 0 && containsPDFs;
|
||||
|
||||
if (!containsPDFs) {
|
||||
result.errors.push('ZIP file does not contain any PDF files');
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
result.errors.push(`Failed to validate ZIP file: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract PDF files from a ZIP archive
|
||||
*/
|
||||
async extractPdfFiles(
|
||||
file: File,
|
||||
onProgress?: (progress: ZipExtractionProgress) => void
|
||||
): Promise<ZipExtractionResult> {
|
||||
const result: ZipExtractionResult = {
|
||||
success: false,
|
||||
extractedFiles: [],
|
||||
errors: [],
|
||||
totalFiles: 0,
|
||||
extractedCount: 0
|
||||
};
|
||||
|
||||
try {
|
||||
// Validate ZIP file first
|
||||
const validation = await this.validateZipFile(file);
|
||||
if (!validation.isValid) {
|
||||
result.errors = validation.errors;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Load ZIP contents
|
||||
const zip = new JSZip();
|
||||
const zipContents = await zip.loadAsync(file);
|
||||
|
||||
// Get all PDF files
|
||||
const pdfFiles = Object.entries(zipContents.files).filter(([filename, zipEntry]) =>
|
||||
!zipEntry.dir && this.isPdfFile(filename)
|
||||
);
|
||||
|
||||
result.totalFiles = pdfFiles.length;
|
||||
|
||||
// Extract each PDF file
|
||||
for (let i = 0; i < pdfFiles.length; i++) {
|
||||
const [filename, zipEntry] = pdfFiles[i];
|
||||
|
||||
try {
|
||||
// Report progress
|
||||
if (onProgress) {
|
||||
onProgress({
|
||||
currentFile: filename,
|
||||
extractedCount: i,
|
||||
totalFiles: pdfFiles.length,
|
||||
progress: (i / pdfFiles.length) * 100
|
||||
});
|
||||
}
|
||||
|
||||
// Extract file content
|
||||
const content = await zipEntry.async('uint8array');
|
||||
|
||||
// Create File object
|
||||
const extractedFile = new File([content], this.sanitizeFilename(filename), {
|
||||
type: 'application/pdf',
|
||||
lastModified: zipEntry.date?.getTime() || Date.now()
|
||||
});
|
||||
|
||||
// Validate extracted PDF
|
||||
if (await this.isValidPdfFile(extractedFile)) {
|
||||
result.extractedFiles.push(extractedFile);
|
||||
result.extractedCount++;
|
||||
} else {
|
||||
result.errors.push(`File "${filename}" is not a valid PDF`);
|
||||
}
|
||||
} catch (error) {
|
||||
result.errors.push(`Failed to extract "${filename}": ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Final progress report
|
||||
if (onProgress) {
|
||||
onProgress({
|
||||
currentFile: '',
|
||||
extractedCount: result.extractedCount,
|
||||
totalFiles: result.totalFiles,
|
||||
progress: 100
|
||||
});
|
||||
}
|
||||
|
||||
result.success = result.extractedCount > 0;
|
||||
return result;
|
||||
} catch (error) {
|
||||
result.errors.push(`Failed to extract ZIP file: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a file is a ZIP file based on type and extension
|
||||
*/
|
||||
private isZipFile(file: File): boolean {
|
||||
const validTypes = [
|
||||
'application/zip',
|
||||
'application/x-zip-compressed',
|
||||
'application/x-zip',
|
||||
'application/octet-stream' // Some browsers use this for ZIP files
|
||||
];
|
||||
|
||||
const validExtensions = ['.zip'];
|
||||
const hasValidType = validTypes.includes(file.type);
|
||||
const hasValidExtension = validExtensions.some(ext =>
|
||||
file.name.toLowerCase().endsWith(ext)
|
||||
);
|
||||
|
||||
return hasValidType || hasValidExtension;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a filename indicates a PDF file
|
||||
*/
|
||||
private isPdfFile(filename: string): boolean {
|
||||
return filename.toLowerCase().endsWith('.pdf');
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that a file is actually a PDF by checking its header
|
||||
*/
|
||||
private async isValidPdfFile(file: File): Promise<boolean> {
|
||||
try {
|
||||
// Read first few bytes to check PDF header
|
||||
const buffer = await file.slice(0, 8).arrayBuffer();
|
||||
const bytes = new Uint8Array(buffer);
|
||||
|
||||
// Check for PDF header: %PDF-
|
||||
return bytes[0] === 0x25 && // %
|
||||
bytes[1] === 0x50 && // P
|
||||
bytes[2] === 0x44 && // D
|
||||
bytes[3] === 0x46 && // F
|
||||
bytes[4] === 0x2D; // -
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize filename for safe use
|
||||
*/
|
||||
private sanitizeFilename(filename: string): string {
|
||||
// Remove directory path and get just the filename
|
||||
const basename = filename.split('/').pop() || filename;
|
||||
|
||||
// Remove or replace unsafe characters
|
||||
return basename
|
||||
.replace(/[<>:"/\\|?*]/g, '_') // Replace unsafe chars with underscore
|
||||
.replace(/\s+/g, '_') // Replace spaces with underscores
|
||||
.replace(/_{2,}/g, '_') // Replace multiple underscores with single
|
||||
.replace(/^_|_$/g, ''); // Remove leading/trailing underscores
|
||||
}
|
||||
|
||||
/**
|
||||
* Format file size for display
|
||||
*/
|
||||
private formatFileSize(bytes: number): string {
|
||||
if (bytes === 0) return '0 B';
|
||||
const k = 1024;
|
||||
const sizes = ['B', 'KB', 'MB', 'GB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get file extension from filename
|
||||
*/
|
||||
private getFileExtension(filename: string): string {
|
||||
return filename.substring(filename.lastIndexOf('.')).toLowerCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if ZIP file contains password protection
|
||||
*/
|
||||
private async isPasswordProtected(file: File): Promise<boolean> {
|
||||
try {
|
||||
const zip = new JSZip();
|
||||
await zip.loadAsync(file);
|
||||
|
||||
// Check if any files are encrypted
|
||||
for (const [filename, zipEntry] of Object.entries(zip.files)) {
|
||||
if (zipEntry.options?.compression === 'STORE' && zipEntry._data?.compressedSize === 0) {
|
||||
// This might indicate encryption, but JSZip doesn't provide direct encryption detection
|
||||
// We'll handle this in the extraction phase
|
||||
}
|
||||
}
|
||||
|
||||
return false; // JSZip will throw an error if password is required
|
||||
} catch (error) {
|
||||
// If we can't load the ZIP, it might be password protected
|
||||
const errorMessage = error instanceof Error ? error.message : '';
|
||||
return errorMessage.includes('password') || errorMessage.includes('encrypted');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
export const zipFileService = new ZipFileService();
|
||||
Reference in New Issue
Block a user