Files
Stirling-PDF/frontend/src/services/zipFileService.ts
ConnorYoh ab6edd3196 Feature/v2/toggle_for_auto_unzip (#4584)
## default 
<img width="1012" height="627"
alt="{BF57458D-50A6-4057-94F1-D6AB4628EFD8}"
src="https://github.com/user-attachments/assets/85e550ab-0aed-4341-be95-d5d3bc7146db"
/>

## disabled
<img width="1141" height="620"
alt="{140DB87B-05CF-4E0E-A14A-ED15075BD2EE}"
src="https://github.com/user-attachments/assets/e0f56e84-fb9d-4787-b5cb-ba7c5a54b1e1"
/>

## unzip options
<img width="530" height="255"
alt="{482CE185-73D5-4D90-91BB-B9305C711391}"
src="https://github.com/user-attachments/assets/609b18ee-4eae-4cee-afc1-5db01f9d1088"
/>
<img width="579" height="473"
alt="{4DFCA96D-792D-4370-8C62-4BA42C9F1A5F}"
src="https://github.com/user-attachments/assets/c67fa4af-04ef-41df-9420-65ce4247e25b"
/>

## pop up and maintains version metadata
<img width="1071" height="1220"
alt="{7F2A785C-5717-4A79-9D45-74BDA46DF273}"
src="https://github.com/user-attachments/assets/9374cd2a-b7e5-46c4-a722-e141ab42f0de"
/>

---------

Co-authored-by: Connor Yoh <connor@stirlingpdf.com>
2025-10-06 11:29:38 +00:00

564 lines
17 KiB
TypeScript

import JSZip, { JSZipObject } from 'jszip';
import { StirlingFileStub, createStirlingFile } from '../types/fileContext';
import { generateThumbnailForFile } from '../utils/thumbnailUtils';
import { fileStorage } from './fileStorage';
// Undocumented interface in JSZip for JSZipObject._data
interface CompressedObject {
compressedSize: number;
uncompressedSize: number;
crc32: number;
compression: object;
compressedContent: string|ArrayBuffer|Uint8Array|Buffer;
}
const getData = (zipEntry: JSZipObject): CompressedObject | undefined => {
return (zipEntry as any)._data as CompressedObject;
};
export interface ZipExtractionResult {
success: boolean;
extractedFiles: File[];
errors: string[];
totalFiles: number;
extractedCount: number;
}
export interface ZipValidationResult {
isValid: boolean;
fileCount: number;
totalSizeBytes: number;
containsPDFs: boolean;
errors: string[];
}
export interface ZipExtractionProgress {
currentFile: string;
extractedCount: number;
totalFiles: number;
progress: number;
}
export class ZipFileService {
private readonly maxFileSize = 100 * 1024 * 1024; // 100MB per file
private readonly maxTotalSize = 500 * 1024 * 1024; // 500MB total extraction limit
private readonly supportedExtensions = ['.pdf'];
// ZIP file validation constants
private static readonly VALID_ZIP_TYPES = [
'application/zip',
'application/x-zip-compressed',
'application/x-zip',
'application/octet-stream' // Some browsers use this for ZIP files
];
private static readonly VALID_ZIP_EXTENSIONS = ['.zip'];
/**
* Validate a ZIP file without extracting it
*/
async validateZipFile(file: File): Promise<ZipValidationResult> {
const result: ZipValidationResult = {
isValid: false,
fileCount: 0,
totalSizeBytes: 0,
containsPDFs: false,
errors: []
};
try {
// Check file size
if (file.size > this.maxTotalSize) {
result.errors.push(`ZIP file too large: ${this.formatFileSize(file.size)} (max: ${this.formatFileSize(this.maxTotalSize)})`);
return result;
}
// Check file type
if (!this.isZipFile(file)) {
result.errors.push('File is not a valid ZIP archive');
return result;
}
// Load and validate ZIP contents
const zip = new JSZip();
const zipContents = await zip.loadAsync(file);
let totalSize = 0;
let fileCount = 0;
let containsPDFs = false;
// Analyze ZIP contents
for (const [filename, zipEntry] of Object.entries(zipContents.files)) {
if (zipEntry.dir) {
continue; // Skip directories
}
fileCount++;
const uncompressedSize = getData(zipEntry)?.uncompressedSize || 0;
totalSize += uncompressedSize;
// Check if file is a PDF
if (this.isPdfFile(filename)) {
containsPDFs = true;
}
// Check individual file size
if (uncompressedSize > this.maxFileSize) {
result.errors.push(`File "${filename}" too large: ${this.formatFileSize(uncompressedSize)} (max: ${this.formatFileSize(this.maxFileSize)})`);
}
}
// Check total uncompressed size
if (totalSize > this.maxTotalSize) {
result.errors.push(`Total uncompressed size too large: ${this.formatFileSize(totalSize)} (max: ${this.formatFileSize(this.maxTotalSize)})`);
}
result.fileCount = fileCount;
result.totalSizeBytes = totalSize;
result.containsPDFs = containsPDFs;
result.isValid = result.errors.length === 0 && containsPDFs;
if (!containsPDFs) {
result.errors.push('ZIP file does not contain any PDF files');
}
return result;
} catch (error) {
result.errors.push(`Failed to validate ZIP file: ${error instanceof Error ? error.message : 'Unknown error'}`);
return result;
}
}
/**
* Create a ZIP file from an array of files
*/
async createZipFromFiles(files: File[], zipFilename: string): Promise<{ zipFile: File; size: number }> {
try {
const zip = new JSZip();
// Add each file to the ZIP
for (const file of files) {
const content = await file.arrayBuffer();
zip.file(file.name, content);
}
// Generate ZIP blob
const zipBlob = await zip.generateAsync({
type: 'blob',
compression: 'DEFLATE',
compressionOptions: { level: 6 }
});
const zipFile = new File([zipBlob], zipFilename, {
type: 'application/zip',
lastModified: Date.now()
});
return { zipFile, size: zipFile.size };
} catch (error) {
throw new Error(`Failed to create ZIP file: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Extract PDF files from a ZIP archive
*/
async extractPdfFiles(
file: File,
onProgress?: (progress: ZipExtractionProgress) => void
): Promise<ZipExtractionResult> {
const result: ZipExtractionResult = {
success: false,
extractedFiles: [],
errors: [],
totalFiles: 0,
extractedCount: 0
};
try {
// Validate ZIP file first
const validation = await this.validateZipFile(file);
if (!validation.isValid) {
result.errors = validation.errors;
return result;
}
// Load ZIP contents
const zip = new JSZip();
const zipContents = await zip.loadAsync(file);
// Get all PDF files
const pdfFiles = Object.entries(zipContents.files).filter(([filename, zipEntry]) =>
!zipEntry.dir && this.isPdfFile(filename)
);
result.totalFiles = pdfFiles.length;
// Extract each PDF file
for (let i = 0; i < pdfFiles.length; i++) {
const [filename, zipEntry] = pdfFiles[i];
try {
// Report progress
if (onProgress) {
onProgress({
currentFile: filename,
extractedCount: i,
totalFiles: pdfFiles.length,
progress: (i / pdfFiles.length) * 100
});
}
// Extract file content
const content = await zipEntry.async('uint8array');
// Create File object
const extractedFile = new File([content as any], this.sanitizeFilename(filename), {
type: 'application/pdf',
lastModified: zipEntry.date?.getTime() || Date.now()
});
// Validate extracted PDF
if (await this.isValidPdfFile(extractedFile)) {
result.extractedFiles.push(extractedFile);
result.extractedCount++;
} else {
result.errors.push(`File "${filename}" is not a valid PDF`);
}
} catch (error) {
result.errors.push(`Failed to extract "${filename}": ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
// Final progress report
if (onProgress) {
onProgress({
currentFile: '',
extractedCount: result.extractedCount,
totalFiles: result.totalFiles,
progress: 100
});
}
result.success = result.extractedCount > 0;
return result;
} catch (error) {
result.errors.push(`Failed to extract ZIP file: ${error instanceof Error ? error.message : 'Unknown error'}`);
return result;
}
}
/**
* Check if a file is a ZIP file based on type and extension
*/
public isZipFile(file: File): boolean {
const hasValidType = ZipFileService.VALID_ZIP_TYPES.includes(file.type);
const hasValidExtension = ZipFileService.VALID_ZIP_EXTENSIONS.some(ext =>
file.name.toLowerCase().endsWith(ext)
);
return hasValidType || hasValidExtension;
}
/**
* Check if a StirlingFileStub represents a ZIP file (for UI checks without loading full file)
*/
public isZipFileStub(stub: StirlingFileStub): boolean {
const hasValidType = stub.type && ZipFileService.VALID_ZIP_TYPES.includes(stub.type);
const hasValidExtension = ZipFileService.VALID_ZIP_EXTENSIONS.some(ext =>
stub.name.toLowerCase().endsWith(ext)
);
return hasValidType || hasValidExtension;
}
/**
* Check if a filename indicates a PDF file
*/
private isPdfFile(filename: string): boolean {
return filename.toLowerCase().endsWith('.pdf');
}
/**
* Validate that a file is actually a PDF by checking its header
*/
private async isValidPdfFile(file: File): Promise<boolean> {
try {
// Read first few bytes to check PDF header
const buffer = await file.slice(0, 8).arrayBuffer();
const bytes = new Uint8Array(buffer);
// Check for PDF header: %PDF-
return bytes[0] === 0x25 && // %
bytes[1] === 0x50 && // P
bytes[2] === 0x44 && // D
bytes[3] === 0x46 && // F
bytes[4] === 0x2D; // -
} catch {
return false;
}
}
/**
* Sanitize filename for safe use
*/
private sanitizeFilename(filename: string): string {
// Remove directory path and get just the filename
const basename = filename.split('/').pop() || filename;
// Remove or replace unsafe characters
return basename
.replace(/[<>:"/\\|?*]/g, '_') // Replace unsafe chars with underscore
.replace(/\s+/g, '_') // Replace spaces with underscores
.replace(/_{2,}/g, '_') // Replace multiple underscores with single
.replace(/^_|_$/g, ''); // Remove leading/trailing underscores
}
/**
* Format file size for display
*/
private formatFileSize(bytes: number): string {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}
/**
* Determine if a ZIP file should be extracted based on user preferences
*
* @param zipBlob - The ZIP file to check
* @param autoUnzip - User preference for auto-unzipping
* @param autoUnzipFileLimit - Maximum number of files to auto-extract
* @param skipAutoUnzip - Bypass preference check (for automation)
* @returns true if the ZIP should be extracted, false otherwise
*/
async shouldUnzip(
zipBlob: Blob | File,
autoUnzip: boolean,
autoUnzipFileLimit: number,
skipAutoUnzip: boolean = false
): Promise<boolean> {
try {
// Automation always extracts
if (skipAutoUnzip) {
return true;
}
// Check if auto-unzip is enabled
if (!autoUnzip) {
return false;
}
// Load ZIP and count files
const zip = new JSZip();
const zipContents = await zip.loadAsync(zipBlob);
// Count non-directory entries
const fileCount = Object.values(zipContents.files).filter(entry => !entry.dir).length;
// Only extract if within limit
return fileCount <= autoUnzipFileLimit;
} catch (error) {
console.error('Error checking shouldUnzip:', error);
// On error, default to not extracting (safer)
return false;
}
}
/**
* Extract all files from a ZIP archive (not limited to PDFs)
*/
async extractAllFiles(
file: File | Blob,
onProgress?: (progress: ZipExtractionProgress) => void
): Promise<ZipExtractionResult> {
const result: ZipExtractionResult = {
success: false,
extractedFiles: [],
errors: [],
totalFiles: 0,
extractedCount: 0
};
try {
// Load ZIP contents
const zip = new JSZip();
const zipContents = await zip.loadAsync(file);
// Get all files (not directories)
const allFiles = Object.entries(zipContents.files).filter(([, zipEntry]) =>
!zipEntry.dir
);
result.totalFiles = allFiles.length;
// Extract each file
for (let i = 0; i < allFiles.length; i++) {
const [filename, zipEntry] = allFiles[i];
try {
// Report progress
if (onProgress) {
onProgress({
currentFile: filename,
extractedCount: i,
totalFiles: allFiles.length,
progress: (i / allFiles.length) * 100
});
}
// Extract file content
const content = await zipEntry.async('blob');
// Create File object with appropriate MIME type
const mimeType = this.getMimeTypeFromExtension(filename);
const extractedFile = new File([content], filename, { type: mimeType });
result.extractedFiles.push(extractedFile);
result.extractedCount++;
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
result.errors.push(`Failed to extract "${filename}": ${errorMessage}`);
}
}
// Final progress report
if (onProgress) {
onProgress({
currentFile: '',
extractedCount: result.extractedCount,
totalFiles: result.totalFiles,
progress: 100
});
}
result.success = result.extractedFiles.length > 0;
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
result.errors.push(`Failed to process ZIP file: ${errorMessage}`);
}
return result;
}
/**
* Get MIME type based on file extension
*/
private getMimeTypeFromExtension(fileName: string): string {
const ext = fileName.toLowerCase().split('.').pop();
const mimeTypes: Record<string, string> = {
// Images
'png': 'image/png',
'jpg': 'image/jpeg',
'jpeg': 'image/jpeg',
'gif': 'image/gif',
'webp': 'image/webp',
'bmp': 'image/bmp',
'svg': 'image/svg+xml',
'tiff': 'image/tiff',
'tif': 'image/tiff',
// Documents
'pdf': 'application/pdf',
'txt': 'text/plain',
'html': 'text/html',
'css': 'text/css',
'js': 'application/javascript',
'json': 'application/json',
'xml': 'application/xml',
// Office documents
'doc': 'application/msword',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'xls': 'application/vnd.ms-excel',
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
// Archives
'zip': 'application/zip',
'rar': 'application/x-rar-compressed',
};
return mimeTypes[ext || ''] || 'application/octet-stream';
}
/**
* Extract PDF files from ZIP and store them in IndexedDB with preserved history metadata
* Used by both FileManager and FileEditor to avoid code duplication
*
* @param zipFile - The ZIP file to extract from
* @param zipStub - The StirlingFileStub for the ZIP (contains metadata to preserve)
* @returns Object with success status, extracted stubs, and any errors
*/
async extractAndStoreFilesWithHistory(
zipFile: File,
zipStub: StirlingFileStub
): Promise<{ success: boolean; extractedStubs: StirlingFileStub[]; errors: string[] }> {
const result = {
success: false,
extractedStubs: [] as StirlingFileStub[],
errors: [] as string[]
};
try {
// Extract PDF files from ZIP
const extractionResult = await this.extractPdfFiles(zipFile);
if (!extractionResult.success || extractionResult.extractedFiles.length === 0) {
result.errors = extractionResult.errors;
return result;
}
// Process each extracted file
for (const extractedFile of extractionResult.extractedFiles) {
try {
// Generate thumbnail
const thumbnail = await generateThumbnailForFile(extractedFile);
// Create StirlingFile
const newStirlingFile = createStirlingFile(extractedFile);
// Create StirlingFileStub with ZIP's history metadata
const stub: StirlingFileStub = {
id: newStirlingFile.fileId,
name: extractedFile.name,
size: extractedFile.size,
type: extractedFile.type,
lastModified: extractedFile.lastModified,
quickKey: newStirlingFile.quickKey,
createdAt: Date.now(),
isLeaf: true,
// Preserve ZIP's history - unzipping is NOT a tool operation
originalFileId: zipStub.originalFileId,
parentFileId: zipStub.parentFileId,
versionNumber: zipStub.versionNumber,
toolHistory: zipStub.toolHistory || [],
thumbnailUrl: thumbnail
};
// Store in IndexedDB
await fileStorage.storeStirlingFile(newStirlingFile, stub);
result.extractedStubs.push(stub);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
result.errors.push(`Failed to process "${extractedFile.name}": ${errorMessage}`);
}
}
result.success = result.extractedStubs.length > 0;
return result;
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
result.errors.push(`Failed to extract ZIP file: ${errorMessage}`);
return result;
}
}
}
// Export singleton instance
export const zipFileService = new ZipFileService();