Stirling 2.0 (#3928)

# Description of Changes

<!--

File context for managing files between tools and views
Optimisation for large files
Updated Split to work with new file system and match Matts stepped
design closer

-->

---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
Reece Browne
2025-07-16 17:53:50 +01:00
committed by GitHub
parent 584e2ecee7
commit 922bbc9076
66 changed files with 8728 additions and 2519 deletions

View File

@@ -0,0 +1,546 @@
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
import { ProcessedFile, ProcessingState, PDFPage, ProcessingStrategy, ProcessingConfig, ProcessingMetrics } from '../types/processing';
import { ProcessingCache } from './processingCache';
import { FileHasher } from '../utils/fileHash';
import { FileAnalyzer } from './fileAnalyzer';
import { ProcessingErrorHandler } from './processingErrorHandler';
// Set up PDF.js worker
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
export class EnhancedPDFProcessingService {
private static instance: EnhancedPDFProcessingService;
private cache = new ProcessingCache();
private processing = new Map<string, ProcessingState>();
private processingListeners = new Set<(states: Map<string, ProcessingState>) => void>();
private metrics: ProcessingMetrics = {
totalFiles: 0,
completedFiles: 0,
failedFiles: 0,
averageProcessingTime: 0,
cacheHitRate: 0,
memoryUsage: 0
};
private defaultConfig: ProcessingConfig = {
strategy: 'immediate_full',
chunkSize: 20,
thumbnailQuality: 'medium',
priorityPageCount: 10,
useWebWorker: false,
maxRetries: 3,
timeoutMs: 300000 // 5 minutes
};
private constructor() {}
static getInstance(): EnhancedPDFProcessingService {
if (!EnhancedPDFProcessingService.instance) {
EnhancedPDFProcessingService.instance = new EnhancedPDFProcessingService();
}
return EnhancedPDFProcessingService.instance;
}
/**
* Process a file with intelligent strategy selection
*/
async processFile(file: File, customConfig?: Partial<ProcessingConfig>): Promise<ProcessedFile | null> {
const fileKey = await this.generateFileKey(file);
// Check cache first
const cached = this.cache.get(fileKey);
if (cached) {
this.updateMetrics('cacheHit');
return cached;
}
// Check if already processing
if (this.processing.has(fileKey)) {
return null;
}
// Analyze file to determine optimal strategy
const analysis = await FileAnalyzer.analyzeFile(file);
if (analysis.isCorrupted) {
throw new Error(`File ${file.name} appears to be corrupted`);
}
// Create processing config
const config: ProcessingConfig = {
...this.defaultConfig,
strategy: analysis.recommendedStrategy,
...customConfig
};
// Start processing
this.startProcessing(file, fileKey, config, analysis.estimatedProcessingTime);
return null;
}
/**
* Start processing a file with the specified configuration
*/
private async startProcessing(
file: File,
fileKey: string,
config: ProcessingConfig,
estimatedTime: number
): Promise<void> {
// Create cancellation token
const cancellationToken = ProcessingErrorHandler.createTimeoutController(config.timeoutMs);
// Set initial state
const state: ProcessingState = {
fileKey,
fileName: file.name,
status: 'processing',
progress: 0,
strategy: config.strategy,
startedAt: Date.now(),
estimatedTimeRemaining: estimatedTime,
cancellationToken
};
this.processing.set(fileKey, state);
this.notifyListeners();
this.updateMetrics('started');
try {
// Execute processing with retry logic
const processedFile = await ProcessingErrorHandler.executeWithRetry(
() => this.executeProcessingStrategy(file, config, state),
(error) => {
state.error = error;
this.notifyListeners();
},
config.maxRetries
);
// Cache the result
this.cache.set(fileKey, processedFile);
// Update state to completed
state.status = 'completed';
state.progress = 100;
state.completedAt = Date.now();
this.notifyListeners();
this.updateMetrics('completed', Date.now() - state.startedAt);
// Remove from processing map after brief delay
setTimeout(() => {
this.processing.delete(fileKey);
this.notifyListeners();
}, 2000);
} catch (error) {
console.error('Processing failed for', file.name, ':', error);
const processingError = ProcessingErrorHandler.createProcessingError(error);
state.status = 'error';
state.error = processingError;
this.notifyListeners();
this.updateMetrics('failed');
// Remove failed processing after delay
setTimeout(() => {
this.processing.delete(fileKey);
this.notifyListeners();
}, 10000);
}
}
/**
* Execute the actual processing based on strategy
*/
private async executeProcessingStrategy(
file: File,
config: ProcessingConfig,
state: ProcessingState
): Promise<ProcessedFile> {
switch (config.strategy) {
case 'immediate_full':
return this.processImmediateFull(file, config, state);
case 'priority_pages':
return this.processPriorityPages(file, config, state);
case 'progressive_chunked':
return this.processProgressiveChunked(file, config, state);
case 'metadata_only':
return this.processMetadataOnly(file, config, state);
default:
return this.processImmediateFull(file, config, state);
}
}
/**
* Process all pages immediately (for small files)
*/
private async processImmediateFull(
file: File,
config: ProcessingConfig,
state: ProcessingState
): Promise<ProcessedFile> {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const totalPages = pdf.numPages;
state.progress = 10;
this.notifyListeners();
const pages: PDFPage[] = [];
for (let i = 1; i <= totalPages; i++) {
// Check for cancellation
if (state.cancellationToken?.signal.aborted) {
pdf.destroy();
throw new Error('Processing cancelled');
}
const page = await pdf.getPage(i);
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail,
rotation: 0,
selected: false
});
// Update progress
state.progress = 10 + (i / totalPages) * 85;
state.currentPage = i;
this.notifyListeners();
}
pdf.destroy();
state.progress = 100;
this.notifyListeners();
return this.createProcessedFile(file, pages, totalPages);
}
/**
* Process priority pages first, then queue the rest
*/
private async processPriorityPages(
file: File,
config: ProcessingConfig,
state: ProcessingState
): Promise<ProcessedFile> {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const totalPages = pdf.numPages;
state.progress = 10;
this.notifyListeners();
const pages: PDFPage[] = [];
const priorityCount = Math.min(config.priorityPageCount, totalPages);
// Process priority pages first
for (let i = 1; i <= priorityCount; i++) {
if (state.cancellationToken?.signal.aborted) {
pdf.destroy();
throw new Error('Processing cancelled');
}
const page = await pdf.getPage(i);
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail,
rotation: 0,
selected: false
});
state.progress = 10 + (i / priorityCount) * 60;
state.currentPage = i;
this.notifyListeners();
}
// Create placeholder pages for remaining pages
for (let i = priorityCount + 1; i <= totalPages; i++) {
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail: null, // Will be loaded lazily
rotation: 0,
selected: false
});
}
pdf.destroy();
state.progress = 100;
this.notifyListeners();
return this.createProcessedFile(file, pages, totalPages);
}
/**
* Process in chunks with breaks between chunks
*/
private async processProgressiveChunked(
file: File,
config: ProcessingConfig,
state: ProcessingState
): Promise<ProcessedFile> {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const totalPages = pdf.numPages;
state.progress = 10;
this.notifyListeners();
const pages: PDFPage[] = [];
const chunkSize = config.chunkSize;
let processedPages = 0;
// Process first chunk immediately
const firstChunkEnd = Math.min(chunkSize, totalPages);
for (let i = 1; i <= firstChunkEnd; i++) {
if (state.cancellationToken?.signal.aborted) {
pdf.destroy();
throw new Error('Processing cancelled');
}
const page = await pdf.getPage(i);
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail,
rotation: 0,
selected: false
});
processedPages++;
state.progress = 10 + (processedPages / totalPages) * 70;
state.currentPage = i;
this.notifyListeners();
// Small delay to prevent UI blocking
if (i % 5 === 0) {
await new Promise(resolve => setTimeout(resolve, 10));
}
}
// Create placeholders for remaining pages
for (let i = firstChunkEnd + 1; i <= totalPages; i++) {
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail: null,
rotation: 0,
selected: false
});
}
pdf.destroy();
state.progress = 100;
this.notifyListeners();
return this.createProcessedFile(file, pages, totalPages);
}
/**
* Process metadata only (for very large files)
*/
private async processMetadataOnly(
file: File,
config: ProcessingConfig,
state: ProcessingState
): Promise<ProcessedFile> {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const totalPages = pdf.numPages;
state.progress = 50;
this.notifyListeners();
// Create placeholder pages without thumbnails
const pages: PDFPage[] = [];
for (let i = 1; i <= totalPages; i++) {
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail: null,
rotation: 0,
selected: false
});
}
pdf.destroy();
state.progress = 100;
this.notifyListeners();
return this.createProcessedFile(file, pages, totalPages);
}
/**
* Render a page thumbnail with specified quality
*/
private async renderPageThumbnail(page: any, quality: 'low' | 'medium' | 'high'): Promise<string> {
const scales = { low: 0.2, medium: 0.5, high: 0.8 }; // Reduced low quality for page editor
const scale = scales[quality];
const viewport = page.getViewport({ scale });
const canvas = document.createElement('canvas');
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext('2d');
if (!context) {
throw new Error('Could not get canvas context');
}
await page.render({ canvasContext: context, viewport }).promise;
return canvas.toDataURL('image/jpeg', 0.8); // Use JPEG for better compression
}
/**
* Create a ProcessedFile object
*/
private createProcessedFile(file: File, pages: PDFPage[], totalPages: number): ProcessedFile {
return {
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
pages,
totalPages,
metadata: {
title: file.name,
createdAt: new Date().toISOString(),
modifiedAt: new Date().toISOString()
}
};
}
/**
* Generate a unique, collision-resistant cache key
*/
private async generateFileKey(file: File): Promise<string> {
return await FileHasher.generateHybridHash(file);
}
/**
* Cancel processing for a specific file
*/
cancelProcessing(fileKey: string): void {
const state = this.processing.get(fileKey);
if (state && state.cancellationToken) {
state.cancellationToken.abort();
state.status = 'cancelled';
this.notifyListeners();
}
}
/**
* Update processing metrics
*/
private updateMetrics(event: 'started' | 'completed' | 'failed' | 'cacheHit', processingTime?: number): void {
switch (event) {
case 'started':
this.metrics.totalFiles++;
break;
case 'completed':
this.metrics.completedFiles++;
if (processingTime) {
// Update rolling average
const totalProcessingTime = this.metrics.averageProcessingTime * (this.metrics.completedFiles - 1) + processingTime;
this.metrics.averageProcessingTime = totalProcessingTime / this.metrics.completedFiles;
}
break;
case 'failed':
this.metrics.failedFiles++;
break;
case 'cacheHit':
// Update cache hit rate
const totalAttempts = this.metrics.totalFiles + 1;
this.metrics.cacheHitRate = (this.metrics.cacheHitRate * this.metrics.totalFiles + 1) / totalAttempts;
break;
}
}
/**
* Get processing metrics
*/
getMetrics(): ProcessingMetrics {
return { ...this.metrics };
}
/**
* State subscription for components
*/
onProcessingChange(callback: (states: Map<string, ProcessingState>) => void): () => void {
this.processingListeners.add(callback);
return () => this.processingListeners.delete(callback);
}
getProcessingStates(): Map<string, ProcessingState> {
return new Map(this.processing);
}
private notifyListeners(): void {
this.processingListeners.forEach(callback => callback(this.processing));
}
/**
* Cleanup method for removed files
*/
cleanup(removedFiles: File[]): void {
removedFiles.forEach(async (file) => {
const key = await this.generateFileKey(file);
this.cache.delete(key);
this.cancelProcessing(key);
this.processing.delete(key);
});
this.notifyListeners();
}
/**
* Clear all processing for view switches
*/
clearAllProcessing(): void {
// Cancel all ongoing processing
this.processing.forEach((state, key) => {
if (state.cancellationToken) {
state.cancellationToken.abort();
}
});
// Clear processing states
this.processing.clear();
this.notifyListeners();
// Force memory cleanup hint
if (typeof window !== 'undefined' && window.gc) {
setTimeout(() => window.gc(), 100);
}
}
/**
* Get cache statistics
*/
getCacheStats() {
return this.cache.getStats();
}
/**
* Clear all cache and processing
*/
clearAll(): void {
this.cache.clear();
this.processing.clear();
this.notifyListeners();
}
}
// Export singleton instance
export const enhancedPDFProcessingService = EnhancedPDFProcessingService.getInstance();

View File

@@ -0,0 +1,240 @@
import { getDocument } from 'pdfjs-dist';
import { FileAnalysis, ProcessingStrategy } from '../types/processing';
export class FileAnalyzer {
private static readonly SIZE_THRESHOLDS = {
SMALL: 10 * 1024 * 1024, // 10MB
MEDIUM: 50 * 1024 * 1024, // 50MB
LARGE: 200 * 1024 * 1024, // 200MB
};
private static readonly PAGE_THRESHOLDS = {
FEW: 10, // < 10 pages - immediate full processing
MANY: 50, // < 50 pages - priority pages
MASSIVE: 100, // < 100 pages - progressive chunked
// >100 pages = metadata only
};
/**
* Analyze a file to determine optimal processing strategy
*/
static async analyzeFile(file: File): Promise<FileAnalysis> {
const analysis: FileAnalysis = {
fileSize: file.size,
isEncrypted: false,
isCorrupted: false,
recommendedStrategy: 'metadata_only',
estimatedProcessingTime: 0,
};
try {
// Quick validation and page count estimation
const quickAnalysis = await this.quickPDFAnalysis(file);
analysis.estimatedPageCount = quickAnalysis.pageCount;
analysis.isEncrypted = quickAnalysis.isEncrypted;
analysis.isCorrupted = quickAnalysis.isCorrupted;
// Determine strategy based on file characteristics
analysis.recommendedStrategy = this.determineStrategy(file.size, quickAnalysis.pageCount);
// Estimate processing time
analysis.estimatedProcessingTime = this.estimateProcessingTime(
file.size,
quickAnalysis.pageCount,
analysis.recommendedStrategy
);
} catch (error) {
console.error('File analysis failed:', error);
analysis.isCorrupted = true;
analysis.recommendedStrategy = 'metadata_only';
}
return analysis;
}
/**
* Quick PDF analysis without full processing
*/
private static async quickPDFAnalysis(file: File): Promise<{
pageCount: number;
isEncrypted: boolean;
isCorrupted: boolean;
}> {
try {
// For small files, read the whole file
// For large files, try the whole file first (PDF.js needs the complete structure)
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({
data: arrayBuffer,
stopAtErrors: false, // Don't stop at minor errors
verbosity: 0 // Suppress PDF.js warnings
}).promise;
const pageCount = pdf.numPages;
const isEncrypted = pdf.isEncrypted;
// Clean up
pdf.destroy();
return {
pageCount,
isEncrypted,
isCorrupted: false
};
} catch (error) {
// Try to determine if it's corruption vs encryption
const errorMessage = error instanceof Error ? error.message.toLowerCase() : '';
const isEncrypted = errorMessage.includes('password') || errorMessage.includes('encrypted');
return {
pageCount: 0,
isEncrypted,
isCorrupted: !isEncrypted // If not encrypted, probably corrupted
};
}
}
/**
* Determine the best processing strategy based on file characteristics
*/
private static determineStrategy(fileSize: number, pageCount?: number): ProcessingStrategy {
// Handle corrupted or encrypted files
if (!pageCount || pageCount === 0) {
return 'metadata_only';
}
// Small files with few pages - process everything immediately
if (fileSize <= this.SIZE_THRESHOLDS.SMALL && pageCount <= this.PAGE_THRESHOLDS.FEW) {
return 'immediate_full';
}
// Medium files or many pages - priority pages first, then progressive
if (fileSize <= this.SIZE_THRESHOLDS.MEDIUM && pageCount <= this.PAGE_THRESHOLDS.MANY) {
return 'priority_pages';
}
// Large files or massive page counts - chunked processing
if (fileSize <= this.SIZE_THRESHOLDS.LARGE && pageCount <= this.PAGE_THRESHOLDS.MASSIVE) {
return 'progressive_chunked';
}
// Very large files - metadata only
return 'metadata_only';
}
/**
* Estimate processing time based on file characteristics and strategy
*/
private static estimateProcessingTime(
fileSize: number,
pageCount: number = 0,
strategy: ProcessingStrategy
): number {
const baseTimes = {
immediate_full: 200, // 200ms per page
priority_pages: 150, // 150ms per page (optimized)
progressive_chunked: 100, // 100ms per page (chunked)
metadata_only: 50 // 50ms total
};
const baseTime = baseTimes[strategy];
switch (strategy) {
case 'metadata_only':
return baseTime;
case 'immediate_full':
return pageCount * baseTime;
case 'priority_pages':
// Estimate time for priority pages (first 10)
const priorityPages = Math.min(pageCount, 10);
return priorityPages * baseTime;
case 'progressive_chunked':
// Estimate time for first chunk (20 pages)
const firstChunk = Math.min(pageCount, 20);
return firstChunk * baseTime;
default:
return pageCount * baseTime;
}
}
/**
* Get processing recommendations for a set of files
*/
static async analyzeMultipleFiles(files: File[]): Promise<{
analyses: Map<File, FileAnalysis>;
recommendations: {
totalEstimatedTime: number;
suggestedBatchSize: number;
shouldUseWebWorker: boolean;
memoryWarning: boolean;
};
}> {
const analyses = new Map<File, FileAnalysis>();
let totalEstimatedTime = 0;
let totalSize = 0;
let totalPages = 0;
// Analyze each file
for (const file of files) {
const analysis = await this.analyzeFile(file);
analyses.set(file, analysis);
totalEstimatedTime += analysis.estimatedProcessingTime;
totalSize += file.size;
totalPages += analysis.estimatedPageCount || 0;
}
// Generate recommendations
const recommendations = {
totalEstimatedTime,
suggestedBatchSize: this.calculateBatchSize(files.length, totalSize),
shouldUseWebWorker: totalPages > 100 || totalSize > this.SIZE_THRESHOLDS.MEDIUM,
memoryWarning: totalSize > this.SIZE_THRESHOLDS.LARGE || totalPages > this.PAGE_THRESHOLDS.MASSIVE
};
return { analyses, recommendations };
}
/**
* Calculate optimal batch size for processing multiple files
*/
private static calculateBatchSize(fileCount: number, totalSize: number): number {
// Process small batches for large total sizes
if (totalSize > this.SIZE_THRESHOLDS.LARGE) {
return Math.max(1, Math.floor(fileCount / 4));
}
if (totalSize > this.SIZE_THRESHOLDS.MEDIUM) {
return Math.max(2, Math.floor(fileCount / 2));
}
// Process all at once for smaller total sizes
return fileCount;
}
/**
* Check if a file appears to be a valid PDF
*/
static async isValidPDF(file: File): Promise<boolean> {
if (file.type !== 'application/pdf' && !file.name.toLowerCase().endsWith('.pdf')) {
return false;
}
try {
// Read first few bytes to check PDF header
const header = file.slice(0, 8);
const headerBytes = new Uint8Array(await header.arrayBuffer());
const headerString = String.fromCharCode(...headerBytes);
return headerString.startsWith('%PDF-');
} catch (error) {
return false;
}
}
}

View File

@@ -12,12 +12,12 @@ export class PDFExportService {
* Export PDF document with applied operations
*/
async exportPDF(
pdfDocument: PDFDocument,
pdfDocument: PDFDocument,
selectedPageIds: string[] = [],
options: ExportOptions = {}
): Promise<{ blob: Blob; filename: string } | { blobs: Blob[]; filenames: string[] }> {
const { selectedOnly = false, filename, splitDocuments = false } = options;
try {
// Determine which pages to export
const pagesToExport = selectedOnly && selectedPageIds.length > 0
@@ -57,16 +57,16 @@ export class PDFExportService {
for (const page of pages) {
// Get the original page from source document
const sourcePageIndex = page.pageNumber - 1;
if (sourcePageIndex >= 0 && sourcePageIndex < sourceDoc.getPageCount()) {
// Copy the page
const [copiedPage] = await newDoc.copyPages(sourceDoc, [sourcePageIndex]);
// Apply rotation
if (page.rotation !== 0) {
copiedPage.setRotation(degrees(page.rotation));
}
newDoc.addPage(copiedPage);
}
}
@@ -108,20 +108,20 @@ export class PDFExportService {
for (const endIndex of splitPoints) {
const segmentPages = pages.slice(startIndex, endIndex);
if (segmentPages.length > 0) {
const newDoc = await PDFLibDocument.create();
for (const page of segmentPages) {
const sourcePageIndex = page.pageNumber - 1;
if (sourcePageIndex >= 0 && sourcePageIndex < sourceDoc.getPageCount()) {
const [copiedPage] = await newDoc.copyPages(sourceDoc, [sourcePageIndex]);
if (page.rotation !== 0) {
copiedPage.setRotation(degrees(page.rotation));
}
newDoc.addPage(copiedPage);
}
}
@@ -130,16 +130,16 @@ export class PDFExportService {
newDoc.setCreator('Stirling PDF');
newDoc.setProducer('Stirling PDF');
newDoc.setTitle(`${baseFilename} - Part ${partNumber}`);
const pdfBytes = await newDoc.save();
const blob = new Blob([pdfBytes], { type: 'application/pdf' });
const filename = this.generateSplitFilename(baseFilename, partNumber);
blobs.push(blob);
filenames.push(filename);
partNumber++;
}
startIndex = endIndex;
}
@@ -172,11 +172,11 @@ export class PDFExportService {
link.href = url;
link.download = filename;
link.style.display = 'none';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
// Clean up the URL after a short delay
setTimeout(() => URL.revokeObjectURL(url), 1000);
}
@@ -185,8 +185,7 @@ export class PDFExportService {
* Download multiple files as a ZIP
*/
async downloadAsZip(blobs: Blob[], filenames: string[], zipFilename: string): Promise<void> {
// For now, download files individually
// TODO: Implement ZIP creation when needed
// For now, download files wherindividually
blobs.forEach((blob, index) => {
setTimeout(() => {
this.downloadFile(blob, filenames[index]);
@@ -208,7 +207,7 @@ export class PDFExportService {
errors.push('No pages available to export');
}
const pagesToExport = selectedOnly
const pagesToExport = selectedOnly
? pdfDocument.pages.filter(page => selectedPageIds.includes(page.id))
: pdfDocument.pages;
@@ -227,7 +226,7 @@ export class PDFExportService {
splitCount: number;
estimatedSize: string;
} {
const pagesToExport = selectedOnly
const pagesToExport = selectedOnly
? pdfDocument.pages.filter(page => selectedPageIds.includes(page.id))
: pdfDocument.pages;
@@ -260,4 +259,4 @@ export class PDFExportService {
}
// Export singleton instance
export const pdfExportService = new PDFExportService();
export const pdfExportService = new PDFExportService();

View File

@@ -0,0 +1,188 @@
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
import { ProcessedFile, ProcessingState, PDFPage } from '../types/processing';
import { ProcessingCache } from './processingCache';
// Set up PDF.js worker
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
export class PDFProcessingService {
private static instance: PDFProcessingService;
private cache = new ProcessingCache();
private processing = new Map<string, ProcessingState>();
private processingListeners = new Set<(states: Map<string, ProcessingState>) => void>();
private constructor() {}
static getInstance(): PDFProcessingService {
if (!PDFProcessingService.instance) {
PDFProcessingService.instance = new PDFProcessingService();
}
return PDFProcessingService.instance;
}
async getProcessedFile(file: File): Promise<ProcessedFile | null> {
const fileKey = this.generateFileKey(file);
// Check cache first
const cached = this.cache.get(fileKey);
if (cached) {
console.log('Cache hit for:', file.name);
return cached;
}
// Check if already processing
if (this.processing.has(fileKey)) {
console.log('Already processing:', file.name);
return null; // Will be available when processing completes
}
// Start processing
this.startProcessing(file, fileKey);
return null;
}
private async startProcessing(file: File, fileKey: string): Promise<void> {
// Set initial state
const state: ProcessingState = {
fileKey,
fileName: file.name,
status: 'processing',
progress: 0,
startedAt: Date.now()
};
this.processing.set(fileKey, state);
this.notifyListeners();
try {
// Process the file with progress updates
const processedFile = await this.processFileWithProgress(file, (progress) => {
state.progress = progress;
this.notifyListeners();
});
// Cache the result
this.cache.set(fileKey, processedFile);
// Update state to completed
state.status = 'completed';
state.progress = 100;
state.completedAt = Date.now();
this.notifyListeners();
// Remove from processing map after brief delay
setTimeout(() => {
this.processing.delete(fileKey);
this.notifyListeners();
}, 2000);
} catch (error) {
console.error('Processing failed for', file.name, ':', error);
state.status = 'error';
state.error = error instanceof Error ? error.message : 'Unknown error';
this.notifyListeners();
// Remove failed processing after delay
setTimeout(() => {
this.processing.delete(fileKey);
this.notifyListeners();
}, 5000);
}
}
private async processFileWithProgress(
file: File,
onProgress: (progress: number) => void
): Promise<ProcessedFile> {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const totalPages = pdf.numPages;
onProgress(10); // PDF loaded
const pages: PDFPage[] = [];
for (let i = 1; i <= totalPages; i++) {
const page = await pdf.getPage(i);
const viewport = page.getViewport({ scale: 0.5 });
const canvas = document.createElement('canvas');
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext('2d');
if (context) {
await page.render({ canvasContext: context, viewport }).promise;
const thumbnail = canvas.toDataURL();
pages.push({
id: `${file.name}-page-${i}`,
pageNumber: i,
thumbnail,
rotation: 0,
selected: false
});
}
// Update progress
const progress = 10 + (i / totalPages) * 85; // 10-95%
onProgress(progress);
}
pdf.destroy();
onProgress(100);
return {
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
pages,
totalPages,
metadata: {
title: file.name,
createdAt: new Date().toISOString(),
modifiedAt: new Date().toISOString()
}
};
}
// State subscription for components
onProcessingChange(callback: (states: Map<string, ProcessingState>) => void): () => void {
this.processingListeners.add(callback);
return () => this.processingListeners.delete(callback);
}
getProcessingStates(): Map<string, ProcessingState> {
return new Map(this.processing);
}
private notifyListeners(): void {
this.processingListeners.forEach(callback => callback(this.processing));
}
generateFileKey(file: File): string {
return `${file.name}-${file.size}-${file.lastModified}`;
}
// Cleanup method for activeFiles changes
cleanup(removedFiles: File[]): void {
removedFiles.forEach(file => {
const key = this.generateFileKey(file);
this.cache.delete(key);
this.processing.delete(key);
});
this.notifyListeners();
}
// Get cache stats (for debugging)
getCacheStats() {
return this.cache.getStats();
}
// Clear all cache and processing
clearAll(): void {
this.cache.clear();
this.processing.clear();
this.notifyListeners();
}
}
// Export singleton instance
export const pdfProcessingService = PDFProcessingService.getInstance();

View File

@@ -0,0 +1,138 @@
import { ProcessedFile, CacheConfig, CacheEntry, CacheStats } from '../types/processing';
export class ProcessingCache {
private cache = new Map<string, CacheEntry>();
private totalSize = 0;
constructor(private config: CacheConfig = {
maxFiles: 20,
maxSizeBytes: 2 * 1024 * 1024 * 1024, // 2GB
ttlMs: 30 * 60 * 1000 // 30 minutes
}) {}
set(key: string, data: ProcessedFile): void {
// Remove expired entries first
this.cleanup();
// Calculate entry size (rough estimate)
const size = this.calculateSize(data);
// Make room if needed
this.makeRoom(size);
this.cache.set(key, {
data,
size,
lastAccessed: Date.now(),
createdAt: Date.now()
});
this.totalSize += size;
}
get(key: string): ProcessedFile | null {
const entry = this.cache.get(key);
if (!entry) return null;
// Check TTL
if (Date.now() - entry.createdAt > this.config.ttlMs) {
this.delete(key);
return null;
}
// Update last accessed
entry.lastAccessed = Date.now();
return entry.data;
}
has(key: string): boolean {
const entry = this.cache.get(key);
if (!entry) return false;
// Check TTL
if (Date.now() - entry.createdAt > this.config.ttlMs) {
this.delete(key);
return false;
}
return true;
}
private makeRoom(neededSize: number): void {
// Remove oldest entries until we have space
while (
this.cache.size >= this.config.maxFiles ||
this.totalSize + neededSize > this.config.maxSizeBytes
) {
const oldestKey = this.findOldestEntry();
if (oldestKey) {
this.delete(oldestKey);
} else break;
}
}
private findOldestEntry(): string | null {
let oldest: { key: string; lastAccessed: number } | null = null;
for (const [key, entry] of this.cache) {
if (!oldest || entry.lastAccessed < oldest.lastAccessed) {
oldest = { key, lastAccessed: entry.lastAccessed };
}
}
return oldest?.key || null;
}
private cleanup(): void {
const now = Date.now();
for (const [key, entry] of this.cache) {
if (now - entry.createdAt > this.config.ttlMs) {
this.delete(key);
}
}
}
private calculateSize(data: ProcessedFile): number {
// Rough size estimation
let size = 0;
// Estimate size of thumbnails (main memory consumer)
data.pages.forEach(page => {
if (page.thumbnail) {
// Base64 thumbnail is roughly 50KB each
size += 50 * 1024;
}
});
// Add some overhead for other data
size += 10 * 1024; // 10KB overhead
return size;
}
delete(key: string): void {
const entry = this.cache.get(key);
if (entry) {
this.totalSize -= entry.size;
this.cache.delete(key);
}
}
clear(): void {
this.cache.clear();
this.totalSize = 0;
}
getStats(): CacheStats {
return {
entries: this.cache.size,
totalSizeBytes: this.totalSize,
maxSizeBytes: this.config.maxSizeBytes
};
}
// Get all cached keys (for debugging and cleanup)
getKeys(): string[] {
return Array.from(this.cache.keys());
}
}

View File

@@ -0,0 +1,282 @@
import { ProcessingError } from '../types/processing';
export class ProcessingErrorHandler {
private static readonly DEFAULT_MAX_RETRIES = 3;
private static readonly RETRY_DELAYS = [1000, 2000, 4000]; // Progressive backoff in ms
/**
* Create a ProcessingError from an unknown error
*/
static createProcessingError(
error: unknown,
retryCount: number = 0,
maxRetries: number = this.DEFAULT_MAX_RETRIES
): ProcessingError {
const originalError = error instanceof Error ? error : new Error(String(error));
const message = originalError.message;
// Determine error type based on error message and properties
const errorType = this.determineErrorType(originalError, message);
// Determine if error is recoverable
const recoverable = this.isRecoverable(errorType, retryCount, maxRetries);
return {
type: errorType,
message: this.formatErrorMessage(errorType, message),
recoverable,
retryCount,
maxRetries,
originalError
};
}
/**
* Determine the type of error based on error characteristics
*/
private static determineErrorType(error: Error, message: string): ProcessingError['type'] {
const lowerMessage = message.toLowerCase();
// Network-related errors
if (lowerMessage.includes('network') ||
lowerMessage.includes('fetch') ||
lowerMessage.includes('connection')) {
return 'network';
}
// Memory-related errors
if (lowerMessage.includes('memory') ||
lowerMessage.includes('quota') ||
lowerMessage.includes('allocation') ||
error.name === 'QuotaExceededError') {
return 'memory';
}
// Timeout errors
if (lowerMessage.includes('timeout') ||
lowerMessage.includes('aborted') ||
error.name === 'AbortError') {
return 'timeout';
}
// Cancellation
if (lowerMessage.includes('cancel') ||
lowerMessage.includes('abort') ||
error.name === 'AbortError') {
return 'cancelled';
}
// PDF corruption/parsing errors
if (lowerMessage.includes('pdf') ||
lowerMessage.includes('parse') ||
lowerMessage.includes('invalid') ||
lowerMessage.includes('corrupt') ||
lowerMessage.includes('malformed')) {
return 'corruption';
}
// Default to parsing error
return 'parsing';
}
/**
* Determine if an error is recoverable based on type and retry count
*/
private static isRecoverable(
errorType: ProcessingError['type'],
retryCount: number,
maxRetries: number
): boolean {
// Never recoverable
if (errorType === 'cancelled' || errorType === 'corruption') {
return false;
}
// Recoverable if we haven't exceeded retry count
if (retryCount >= maxRetries) {
return false;
}
// Memory errors are usually not recoverable
if (errorType === 'memory') {
return retryCount < 1; // Only one retry for memory errors
}
// Network and timeout errors are usually recoverable
return errorType === 'network' || errorType === 'timeout' || errorType === 'parsing';
}
/**
* Format error message for user display
*/
private static formatErrorMessage(errorType: ProcessingError['type'], originalMessage: string): string {
switch (errorType) {
case 'network':
return 'Network connection failed. Please check your internet connection and try again.';
case 'memory':
return 'Insufficient memory to process this file. Try closing other applications or processing a smaller file.';
case 'timeout':
return 'Processing timed out. This file may be too large or complex to process.';
case 'cancelled':
return 'Processing was cancelled by user.';
case 'corruption':
return 'This PDF file appears to be corrupted or encrypted. Please try a different file.';
case 'parsing':
return `Failed to process PDF: ${originalMessage}`;
default:
return `Processing failed: ${originalMessage}`;
}
}
/**
* Execute an operation with automatic retry logic
*/
static async executeWithRetry<T>(
operation: () => Promise<T>,
onError?: (error: ProcessingError) => void,
maxRetries: number = this.DEFAULT_MAX_RETRIES
): Promise<T> {
let lastError: ProcessingError | null = null;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
return await operation();
} catch (error) {
lastError = this.createProcessingError(error, attempt, maxRetries);
// Notify error handler
if (onError) {
onError(lastError);
}
// Don't retry if not recoverable
if (!lastError.recoverable) {
break;
}
// Don't retry on last attempt
if (attempt === maxRetries) {
break;
}
// Wait before retry with progressive backoff
const delay = this.RETRY_DELAYS[Math.min(attempt, this.RETRY_DELAYS.length - 1)];
await this.delay(delay);
console.log(`Retrying operation (attempt ${attempt + 2}/${maxRetries + 1}) after ${delay}ms delay`);
}
}
// All retries exhausted
throw lastError || new Error('Operation failed after all retries');
}
/**
* Create a timeout wrapper for operations
*/
static withTimeout<T>(
operation: () => Promise<T>,
timeoutMs: number,
timeoutMessage: string = 'Operation timed out'
): Promise<T> {
return new Promise<T>((resolve, reject) => {
const timeoutId = setTimeout(() => {
reject(new Error(timeoutMessage));
}, timeoutMs);
operation()
.then(result => {
clearTimeout(timeoutId);
resolve(result);
})
.catch(error => {
clearTimeout(timeoutId);
reject(error);
});
});
}
/**
* Create an AbortController that times out after specified duration
*/
static createTimeoutController(timeoutMs: number): AbortController {
const controller = new AbortController();
setTimeout(() => {
controller.abort();
}, timeoutMs);
return controller;
}
/**
* Check if an error indicates the operation should be retried
*/
static shouldRetry(error: ProcessingError): boolean {
return error.recoverable && error.retryCount < error.maxRetries;
}
/**
* Get user-friendly suggestions based on error type
*/
static getErrorSuggestions(error: ProcessingError): string[] {
switch (error.type) {
case 'network':
return [
'Check your internet connection',
'Try refreshing the page',
'Try again in a few moments'
];
case 'memory':
return [
'Close other browser tabs or applications',
'Try processing a smaller file',
'Restart your browser',
'Use a device with more memory'
];
case 'timeout':
return [
'Try processing a smaller file',
'Break large files into smaller sections',
'Check your internet connection speed'
];
case 'corruption':
return [
'Verify the PDF file opens in other applications',
'Try re-downloading the file',
'Try a different PDF file',
'Contact the file creator if it appears corrupted'
];
case 'parsing':
return [
'Verify this is a valid PDF file',
'Try a different PDF file',
'Contact support if the problem persists'
];
default:
return [
'Try refreshing the page',
'Try again in a few moments',
'Contact support if the problem persists'
];
}
}
/**
* Utility function for delays
*/
private static delay(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}

View File

@@ -0,0 +1,450 @@
/**
* High-performance thumbnail generation service using Web Workers
*/
interface ThumbnailResult {
pageNumber: number;
thumbnail: string;
success: boolean;
error?: string;
}
interface ThumbnailGenerationOptions {
scale?: number;
quality?: number;
batchSize?: number;
parallelBatches?: number;
}
interface CachedThumbnail {
thumbnail: string;
lastUsed: number;
sizeBytes: number;
}
export class ThumbnailGenerationService {
private workers: Worker[] = [];
private activeJobs = new Map<string, { resolve: Function; reject: Function; onProgress?: Function }>();
private jobCounter = 0;
private isGenerating = false;
// Session-based thumbnail cache
private thumbnailCache = new Map<string, CachedThumbnail>();
private maxCacheSizeBytes = 1024 * 1024 * 1024; // 1GB cache limit
private currentCacheSize = 0;
constructor(private maxWorkers: number = 3) {
this.initializeWorkers();
}
private initializeWorkers(): void {
const workerPromises: Promise<Worker | null>[] = [];
for (let i = 0; i < this.maxWorkers; i++) {
const workerPromise = new Promise<Worker | null>((resolve) => {
try {
console.log(`Attempting to create worker ${i}...`);
const worker = new Worker('/thumbnailWorker.js');
let workerReady = false;
let pingTimeout: NodeJS.Timeout;
worker.onmessage = (e) => {
const { type, data, jobId } = e.data;
// Handle PONG response to confirm worker is ready
if (type === 'PONG') {
workerReady = true;
clearTimeout(pingTimeout);
console.log(`✓ Worker ${i} is ready and responsive`);
resolve(worker);
return;
}
const job = this.activeJobs.get(jobId);
if (!job) return;
switch (type) {
case 'PROGRESS':
if (job.onProgress) {
job.onProgress(data);
}
break;
case 'COMPLETE':
job.resolve(data.thumbnails);
this.activeJobs.delete(jobId);
break;
case 'ERROR':
job.reject(new Error(data.error));
this.activeJobs.delete(jobId);
break;
}
};
worker.onerror = (error) => {
console.error(`✗ Worker ${i} failed with error:`, error);
clearTimeout(pingTimeout);
worker.terminate();
resolve(null);
};
// Test worker with timeout
pingTimeout = setTimeout(() => {
if (!workerReady) {
console.warn(`✗ Worker ${i} timed out (no PONG response)`);
worker.terminate();
resolve(null);
}
}, 3000); // Reduced timeout for faster feedback
// Send PING to test worker
try {
worker.postMessage({ type: 'PING' });
} catch (pingError) {
console.error(`✗ Failed to send PING to worker ${i}:`, pingError);
clearTimeout(pingTimeout);
worker.terminate();
resolve(null);
}
} catch (error) {
console.error(`✗ Failed to create worker ${i}:`, error);
resolve(null);
}
});
workerPromises.push(workerPromise);
}
// Wait for all workers to initialize or fail
Promise.all(workerPromises).then((workers) => {
this.workers = workers.filter((w): w is Worker => w !== null);
const successCount = this.workers.length;
const failCount = this.maxWorkers - successCount;
console.log(`🔧 Worker initialization complete: ${successCount}/${this.maxWorkers} workers ready`);
if (failCount > 0) {
console.warn(`⚠️ ${failCount} workers failed to initialize - will use main thread fallback`);
}
if (successCount === 0) {
console.warn('🚨 No Web Workers available - all thumbnail generation will use main thread');
}
});
}
/**
* Generate thumbnails for multiple pages using Web Workers
*/
async generateThumbnails(
pdfArrayBuffer: ArrayBuffer,
pageNumbers: number[],
options: ThumbnailGenerationOptions = {},
onProgress?: (progress: { completed: number; total: number; thumbnails: ThumbnailResult[] }) => void
): Promise<ThumbnailResult[]> {
if (this.isGenerating) {
console.warn('🚨 ThumbnailService: Thumbnail generation already in progress, rejecting new request');
throw new Error('Thumbnail generation already in progress');
}
console.log(`🎬 ThumbnailService: Starting thumbnail generation for ${pageNumbers.length} pages`);
this.isGenerating = true;
const {
scale = 0.2,
quality = 0.8,
batchSize = 20, // Pages per worker
parallelBatches = this.maxWorkers
} = options;
try {
// Check if workers are available, fallback to main thread if not
if (this.workers.length === 0) {
console.warn('No Web Workers available, falling back to main thread processing');
return await this.generateThumbnailsMainThread(pdfArrayBuffer, pageNumbers, scale, quality, onProgress);
}
// Split pages across workers
const workerBatches = this.distributeWork(pageNumbers, this.workers.length);
console.log(`🔧 ThumbnailService: Distributing ${pageNumbers.length} pages across ${this.workers.length} workers:`, workerBatches.map(batch => batch.length));
const jobPromises: Promise<ThumbnailResult[]>[] = [];
for (let i = 0; i < workerBatches.length; i++) {
const batch = workerBatches[i];
if (batch.length === 0) continue;
const worker = this.workers[i % this.workers.length];
const jobId = `job-${++this.jobCounter}`;
console.log(`🔧 ThumbnailService: Sending job ${jobId} with ${batch.length} pages to worker ${i}:`, batch);
const promise = new Promise<ThumbnailResult[]>((resolve, reject) => {
// Add timeout for worker jobs
const timeout = setTimeout(() => {
console.error(`⏰ ThumbnailService: Worker job ${jobId} timed out`);
this.activeJobs.delete(jobId);
reject(new Error(`Worker job ${jobId} timed out`));
}, 60000); // 1 minute timeout
// Create job with timeout handling
this.activeJobs.set(jobId, {
resolve: (result: any) => {
console.log(`✅ ThumbnailService: Job ${jobId} completed with ${result.length} thumbnails`);
clearTimeout(timeout);
resolve(result);
},
reject: (error: any) => {
console.error(`❌ ThumbnailService: Job ${jobId} failed:`, error);
clearTimeout(timeout);
reject(error);
},
onProgress: onProgress ? (progressData: any) => {
console.log(`📊 ThumbnailService: Job ${jobId} progress - ${progressData.completed}/${progressData.total} (${progressData.thumbnails.length} new)`);
onProgress(progressData);
} : undefined
});
worker.postMessage({
type: 'GENERATE_THUMBNAILS',
jobId,
data: {
pdfArrayBuffer,
pageNumbers: batch,
scale,
quality
}
});
});
jobPromises.push(promise);
}
// Wait for all workers to complete
const results = await Promise.all(jobPromises);
// Flatten and sort results by page number
const allThumbnails = results.flat().sort((a, b) => a.pageNumber - b.pageNumber);
console.log(`🎯 ThumbnailService: All workers completed, returning ${allThumbnails.length} thumbnails`);
return allThumbnails;
} catch (error) {
console.error('Web Worker thumbnail generation failed, falling back to main thread:', error);
return await this.generateThumbnailsMainThread(pdfArrayBuffer, pageNumbers, scale, quality, onProgress);
} finally {
console.log('🔄 ThumbnailService: Resetting isGenerating flag');
this.isGenerating = false;
}
}
/**
* Fallback thumbnail generation on main thread
*/
private async generateThumbnailsMainThread(
pdfArrayBuffer: ArrayBuffer,
pageNumbers: number[],
scale: number,
quality: number,
onProgress?: (progress: { completed: number; total: number; thumbnails: ThumbnailResult[] }) => void
): Promise<ThumbnailResult[]> {
console.log(`🔧 ThumbnailService: Fallback to main thread for ${pageNumbers.length} pages`);
// Import PDF.js dynamically for main thread
const { getDocument } = await import('pdfjs-dist');
// Load PDF once
const pdf = await getDocument({ data: pdfArrayBuffer }).promise;
console.log(`✓ ThumbnailService: PDF loaded on main thread`);
const allResults: ThumbnailResult[] = [];
let completed = 0;
const batchSize = 5; // Small batches for UI responsiveness
// Process pages in small batches
for (let i = 0; i < pageNumbers.length; i += batchSize) {
const batch = pageNumbers.slice(i, i + batchSize);
// Process batch sequentially (to avoid canvas conflicts)
for (const pageNumber of batch) {
try {
const page = await pdf.getPage(pageNumber);
const viewport = page.getViewport({ scale });
const canvas = document.createElement('canvas');
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext('2d');
if (!context) {
throw new Error('Could not get canvas context');
}
await page.render({ canvasContext: context, viewport }).promise;
const thumbnail = canvas.toDataURL('image/jpeg', quality);
allResults.push({ pageNumber, thumbnail, success: true });
} catch (error) {
console.error(`Failed to generate thumbnail for page ${pageNumber}:`, error);
allResults.push({
pageNumber,
thumbnail: '',
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
});
}
}
completed += batch.length;
// Report progress
if (onProgress) {
onProgress({
completed,
total: pageNumbers.length,
thumbnails: allResults.slice(-batch.length).filter(r => r.success)
});
}
// Small delay to keep UI responsive
if (i + batchSize < pageNumbers.length) {
await new Promise(resolve => setTimeout(resolve, 10));
}
}
// Clean up
pdf.destroy();
return allResults.filter(r => r.success);
}
/**
* Distribute work evenly across workers
*/
private distributeWork(pageNumbers: number[], numWorkers: number): number[][] {
const batches: number[][] = Array(numWorkers).fill(null).map(() => []);
pageNumbers.forEach((pageNum, index) => {
const workerIndex = index % numWorkers;
batches[workerIndex].push(pageNum);
});
return batches;
}
/**
* Generate a single thumbnail (fallback for individual pages)
*/
async generateSingleThumbnail(
pdfArrayBuffer: ArrayBuffer,
pageNumber: number,
options: ThumbnailGenerationOptions = {}
): Promise<string> {
const results = await this.generateThumbnails(pdfArrayBuffer, [pageNumber], options);
if (results.length === 0 || !results[0].success) {
throw new Error(`Failed to generate thumbnail for page ${pageNumber}`);
}
return results[0].thumbnail;
}
/**
* Add thumbnail to cache with size management
*/
addThumbnailToCache(pageId: string, thumbnail: string): void {
const thumbnailSizeBytes = thumbnail.length * 0.75; // Rough base64 size estimate
const now = Date.now();
// Add new thumbnail
this.thumbnailCache.set(pageId, {
thumbnail,
lastUsed: now,
sizeBytes: thumbnailSizeBytes
});
this.currentCacheSize += thumbnailSizeBytes;
// If we exceed 1GB, trigger cleanup
if (this.currentCacheSize > this.maxCacheSizeBytes) {
this.cleanupThumbnailCache();
}
}
/**
* Get thumbnail from cache and update last used timestamp
*/
getThumbnailFromCache(pageId: string): string | null {
const cached = this.thumbnailCache.get(pageId);
if (!cached) return null;
// Update last used timestamp
cached.lastUsed = Date.now();
return cached.thumbnail;
}
/**
* Clean up cache using LRU eviction
*/
private cleanupThumbnailCache(): void {
const entries = Array.from(this.thumbnailCache.entries());
// Sort by last used (oldest first)
entries.sort(([, a], [, b]) => a.lastUsed - b.lastUsed);
this.thumbnailCache.clear();
this.currentCacheSize = 0;
const targetSize = this.maxCacheSizeBytes * 0.8; // Clean to 80% of limit
// Keep most recently used entries until we hit target size
for (let i = entries.length - 1; i >= 0 && this.currentCacheSize < targetSize; i--) {
const [key, value] = entries[i];
this.thumbnailCache.set(key, value);
this.currentCacheSize += value.sizeBytes;
}
}
/**
* Clear all cached thumbnails
*/
clearThumbnailCache(): void {
this.thumbnailCache.clear();
this.currentCacheSize = 0;
}
/**
* Get cache statistics
*/
getCacheStats() {
return {
entries: this.thumbnailCache.size,
totalSizeBytes: this.currentCacheSize,
maxSizeBytes: this.maxCacheSizeBytes
};
}
/**
* Stop generation but keep cache and workers alive
*/
stopGeneration(): void {
this.activeJobs.clear();
this.isGenerating = false;
}
/**
* Terminate all workers and clear cache (only on explicit cleanup)
*/
destroy(): void {
this.workers.forEach(worker => worker.terminate());
this.workers = [];
this.activeJobs.clear();
this.isGenerating = false;
this.clearThumbnailCache();
}
}
// Export singleton instance
export const thumbnailGenerationService = new ThumbnailGenerationService();

View File

@@ -0,0 +1,300 @@
import JSZip from 'jszip';
export interface ZipExtractionResult {
success: boolean;
extractedFiles: File[];
errors: string[];
totalFiles: number;
extractedCount: number;
}
export interface ZipValidationResult {
isValid: boolean;
fileCount: number;
totalSizeBytes: number;
containsPDFs: boolean;
errors: string[];
}
export interface ZipExtractionProgress {
currentFile: string;
extractedCount: number;
totalFiles: number;
progress: number;
}
export class ZipFileService {
private readonly maxFileSize = 100 * 1024 * 1024; // 100MB per file
private readonly maxTotalSize = 500 * 1024 * 1024; // 500MB total extraction limit
private readonly supportedExtensions = ['.pdf'];
/**
* Validate a ZIP file without extracting it
*/
async validateZipFile(file: File): Promise<ZipValidationResult> {
const result: ZipValidationResult = {
isValid: false,
fileCount: 0,
totalSizeBytes: 0,
containsPDFs: false,
errors: []
};
try {
// Check file size
if (file.size > this.maxTotalSize) {
result.errors.push(`ZIP file too large: ${this.formatFileSize(file.size)} (max: ${this.formatFileSize(this.maxTotalSize)})`);
return result;
}
// Check file type
if (!this.isZipFile(file)) {
result.errors.push('File is not a valid ZIP archive');
return result;
}
// Load and validate ZIP contents
const zip = new JSZip();
const zipContents = await zip.loadAsync(file);
let totalSize = 0;
let fileCount = 0;
let containsPDFs = false;
// Analyze ZIP contents
for (const [filename, zipEntry] of Object.entries(zipContents.files)) {
if (zipEntry.dir) {
continue; // Skip directories
}
fileCount++;
const uncompressedSize = zipEntry._data?.uncompressedSize || 0;
totalSize += uncompressedSize;
// Check if file is a PDF
if (this.isPdfFile(filename)) {
containsPDFs = true;
}
// Check individual file size
if (uncompressedSize > this.maxFileSize) {
result.errors.push(`File "${filename}" too large: ${this.formatFileSize(uncompressedSize)} (max: ${this.formatFileSize(this.maxFileSize)})`);
}
}
// Check total uncompressed size
if (totalSize > this.maxTotalSize) {
result.errors.push(`Total uncompressed size too large: ${this.formatFileSize(totalSize)} (max: ${this.formatFileSize(this.maxTotalSize)})`);
}
result.fileCount = fileCount;
result.totalSizeBytes = totalSize;
result.containsPDFs = containsPDFs;
result.isValid = result.errors.length === 0 && containsPDFs;
if (!containsPDFs) {
result.errors.push('ZIP file does not contain any PDF files');
}
return result;
} catch (error) {
result.errors.push(`Failed to validate ZIP file: ${error instanceof Error ? error.message : 'Unknown error'}`);
return result;
}
}
/**
* Extract PDF files from a ZIP archive
*/
async extractPdfFiles(
file: File,
onProgress?: (progress: ZipExtractionProgress) => void
): Promise<ZipExtractionResult> {
const result: ZipExtractionResult = {
success: false,
extractedFiles: [],
errors: [],
totalFiles: 0,
extractedCount: 0
};
try {
// Validate ZIP file first
const validation = await this.validateZipFile(file);
if (!validation.isValid) {
result.errors = validation.errors;
return result;
}
// Load ZIP contents
const zip = new JSZip();
const zipContents = await zip.loadAsync(file);
// Get all PDF files
const pdfFiles = Object.entries(zipContents.files).filter(([filename, zipEntry]) =>
!zipEntry.dir && this.isPdfFile(filename)
);
result.totalFiles = pdfFiles.length;
// Extract each PDF file
for (let i = 0; i < pdfFiles.length; i++) {
const [filename, zipEntry] = pdfFiles[i];
try {
// Report progress
if (onProgress) {
onProgress({
currentFile: filename,
extractedCount: i,
totalFiles: pdfFiles.length,
progress: (i / pdfFiles.length) * 100
});
}
// Extract file content
const content = await zipEntry.async('uint8array');
// Create File object
const extractedFile = new File([content], this.sanitizeFilename(filename), {
type: 'application/pdf',
lastModified: zipEntry.date?.getTime() || Date.now()
});
// Validate extracted PDF
if (await this.isValidPdfFile(extractedFile)) {
result.extractedFiles.push(extractedFile);
result.extractedCount++;
} else {
result.errors.push(`File "${filename}" is not a valid PDF`);
}
} catch (error) {
result.errors.push(`Failed to extract "${filename}": ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
// Final progress report
if (onProgress) {
onProgress({
currentFile: '',
extractedCount: result.extractedCount,
totalFiles: result.totalFiles,
progress: 100
});
}
result.success = result.extractedCount > 0;
return result;
} catch (error) {
result.errors.push(`Failed to extract ZIP file: ${error instanceof Error ? error.message : 'Unknown error'}`);
return result;
}
}
/**
* Check if a file is a ZIP file based on type and extension
*/
private isZipFile(file: File): boolean {
const validTypes = [
'application/zip',
'application/x-zip-compressed',
'application/x-zip',
'application/octet-stream' // Some browsers use this for ZIP files
];
const validExtensions = ['.zip'];
const hasValidType = validTypes.includes(file.type);
const hasValidExtension = validExtensions.some(ext =>
file.name.toLowerCase().endsWith(ext)
);
return hasValidType || hasValidExtension;
}
/**
* Check if a filename indicates a PDF file
*/
private isPdfFile(filename: string): boolean {
return filename.toLowerCase().endsWith('.pdf');
}
/**
* Validate that a file is actually a PDF by checking its header
*/
private async isValidPdfFile(file: File): Promise<boolean> {
try {
// Read first few bytes to check PDF header
const buffer = await file.slice(0, 8).arrayBuffer();
const bytes = new Uint8Array(buffer);
// Check for PDF header: %PDF-
return bytes[0] === 0x25 && // %
bytes[1] === 0x50 && // P
bytes[2] === 0x44 && // D
bytes[3] === 0x46 && // F
bytes[4] === 0x2D; // -
} catch (error) {
return false;
}
}
/**
* Sanitize filename for safe use
*/
private sanitizeFilename(filename: string): string {
// Remove directory path and get just the filename
const basename = filename.split('/').pop() || filename;
// Remove or replace unsafe characters
return basename
.replace(/[<>:"/\\|?*]/g, '_') // Replace unsafe chars with underscore
.replace(/\s+/g, '_') // Replace spaces with underscores
.replace(/_{2,}/g, '_') // Replace multiple underscores with single
.replace(/^_|_$/g, ''); // Remove leading/trailing underscores
}
/**
* Format file size for display
*/
private formatFileSize(bytes: number): string {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}
/**
* Get file extension from filename
*/
private getFileExtension(filename: string): string {
return filename.substring(filename.lastIndexOf('.')).toLowerCase();
}
/**
* Check if ZIP file contains password protection
*/
private async isPasswordProtected(file: File): Promise<boolean> {
try {
const zip = new JSZip();
await zip.loadAsync(file);
// Check if any files are encrypted
for (const [filename, zipEntry] of Object.entries(zip.files)) {
if (zipEntry.options?.compression === 'STORE' && zipEntry._data?.compressedSize === 0) {
// This might indicate encryption, but JSZip doesn't provide direct encryption detection
// We'll handle this in the extraction phase
}
}
return false; // JSZip will throw an error if password is required
} catch (error) {
// If we can't load the ZIP, it might be password protected
const errorMessage = error instanceof Error ? error.message : '';
return errorMessage.includes('password') || errorMessage.includes('encrypted');
}
}
}
// Export singleton instance
export const zipFileService = new ZipFileService();