Zip file changes

This commit is contained in:
Connor Yoh 2025-10-10 15:33:33 +01:00
parent b695e3900e
commit b2f0efdcd7
4 changed files with 112 additions and 35 deletions

View File

@ -104,13 +104,22 @@ const FileEditor = ({
// Handle PDF files normally
allExtractedFiles.push(file);
} else if (file.type === 'application/zip' || file.type === 'application/x-zip-compressed' || file.name.toLowerCase().endsWith('.zip')) {
// Handle ZIP files - only expand if they contain PDFs
// Handle ZIP files - extract all files except HTML
try {
// Check if ZIP contains HTML files - if so, don't extract
const containsHtml = await zipFileService.containsHtmlFiles(file);
if (containsHtml) {
// HTML files should stay zipped
allExtractedFiles.push(file);
continue;
}
// Validate ZIP file first
const validation = await zipFileService.validateZipFile(file);
if (validation.isValid && validation.containsPDFs) {
// ZIP contains PDFs - extract them
if (validation.isValid && validation.containsFiles) {
// ZIP contains files - extract them
setZipExtractionProgress({
isExtracting: true,
currentFile: file.name,
@ -119,7 +128,7 @@ const FileEditor = ({
totalFiles: validation.fileCount
});
const extractionResult = await zipFileService.extractPdfFiles(file, (progress) => {
const extractionResult = await zipFileService.extractAllFiles(file, (progress) => {
setZipExtractionProgress({
isExtracting: true,
currentFile: progress.currentFile,
@ -148,7 +157,7 @@ const FileEditor = ({
errors.push(`Failed to extract ZIP file "${file.name}": ${extractionResult.errors.join(', ')}`);
}
} else {
// ZIP doesn't contain PDFs or is invalid - treat as regular file
// ZIP is empty or invalid - treat as regular file
allExtractedFiles.push(file);
}
} catch (zipError) {

View File

@ -27,11 +27,11 @@ export const useToolResources = () => {
// Cleanup on unmount - use ref to avoid dependency on blobUrls state
const blobUrlsRef = useRef<string[]>([]);
useEffect(() => {
blobUrlsRef.current = blobUrls;
}, [blobUrls]);
useEffect(() => {
return () => {
blobUrlsRef.current.forEach(url => {
@ -85,6 +85,15 @@ export const useToolResources = () => {
const extractZipFiles = useCallback(async (zipBlob: Blob, skipAutoUnzip = false): Promise<File[]> => {
try {
// Check if ZIP contains HTML files - if so, keep as ZIP
const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' });
const containsHtml = await zipFileService.containsHtmlFiles(zipFile);
if (containsHtml) {
// HTML files should stay zipped
return [new File([zipBlob], 'result.zip', { type: 'application/zip' })];
}
// Check if we should extract based on preferences
const shouldExtract = await zipFileService.shouldUnzip(
zipBlob,
@ -97,8 +106,7 @@ export const useToolResources = () => {
return [new File([zipBlob], 'result.zip', { type: 'application/zip' })];
}
const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' });
const extractionResult = await zipFileService.extractPdfFiles(zipFile);
const extractionResult = await zipFileService.extractAllFiles(zipFile);
return extractionResult.success ? extractionResult.extractedFiles : [];
} catch (error) {
console.error('useToolResources.extractZipFiles - Error:', error);
@ -108,6 +116,15 @@ export const useToolResources = () => {
const extractAllZipFiles = useCallback(async (zipBlob: Blob, skipAutoUnzip = false): Promise<File[]> => {
try {
// Check if ZIP contains HTML files - if so, keep as ZIP
const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' });
const containsHtml = await zipFileService.containsHtmlFiles(zipFile);
if (containsHtml) {
// HTML files should stay zipped
return [new File([zipBlob], 'result.zip', { type: 'application/zip' })];
}
// Check if we should extract based on preferences
const shouldExtract = await zipFileService.shouldUnzip(
zipBlob,
@ -120,7 +137,6 @@ export const useToolResources = () => {
return [new File([zipBlob], 'result.zip', { type: 'application/zip' })];
}
const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' });
const extractionResult = await zipFileService.extractAllFiles(zipFile);
return extractionResult.success ? extractionResult.extractedFiles : [];
} catch (error) {

View File

@ -29,6 +29,7 @@ export interface ZipValidationResult {
fileCount: number;
totalSizeBytes: number;
containsPDFs: boolean;
containsFiles: boolean;
errors: string[];
}
@ -42,7 +43,6 @@ export interface ZipExtractionProgress {
export class ZipFileService {
private readonly maxFileSize = 100 * 1024 * 1024; // 100MB per file
private readonly maxTotalSize = 500 * 1024 * 1024; // 500MB total extraction limit
private readonly supportedExtensions = ['.pdf'];
// ZIP file validation constants
private static readonly VALID_ZIP_TYPES = [
@ -62,6 +62,7 @@ export class ZipFileService {
fileCount: 0,
totalSizeBytes: 0,
containsPDFs: false,
containsFiles: false,
errors: []
};
@ -115,10 +116,13 @@ export class ZipFileService {
result.fileCount = fileCount;
result.totalSizeBytes = totalSize;
result.containsPDFs = containsPDFs;
result.isValid = result.errors.length === 0 && containsPDFs;
result.containsFiles = fileCount > 0;
if (!containsPDFs) {
result.errors.push('ZIP file does not contain any PDF files');
// ZIP is valid if it has files and no size errors
result.isValid = result.errors.length === 0 && result.containsFiles;
if (!result.containsFiles) {
result.errors.push('ZIP file does not contain any files');
}
return result;
@ -278,6 +282,37 @@ export class ZipFileService {
return filename.toLowerCase().endsWith('.pdf');
}
/**
* Check if a filename indicates an HTML file
*/
private isHtmlFile(filename: string): boolean {
const lowerName = filename.toLowerCase();
return lowerName.endsWith('.html') || lowerName.endsWith('.htm') || lowerName.endsWith('.xhtml');
}
/**
* Check if a ZIP file contains HTML files
* Used to determine if the ZIP should be kept intact (HTML) or extracted (other files)
*/
async containsHtmlFiles(file: Blob | File): Promise<boolean> {
try {
const zip = new JSZip();
const zipContents = await zip.loadAsync(file);
// Check if any file is an HTML file
for (const [filename, zipEntry] of Object.entries(zipContents.files)) {
if (!zipEntry.dir && this.isHtmlFile(filename)) {
return true;
}
}
return false;
} catch (error) {
console.error('Error checking for HTML files:', error);
return false;
}
}
/**
* Validate that a file is actually a PDF by checking its header
*/
@ -486,9 +521,11 @@ export class ZipFileService {
}
/**
* Extract PDF files from ZIP and store them in IndexedDB with preserved history metadata
* Extract all files from ZIP and store them in IndexedDB with preserved history metadata
* Used by both FileManager and FileEditor to avoid code duplication
*
* Note: HTML files will NOT be extracted - the ZIP is kept intact when HTML is detected
*
* @param zipFile - The ZIP file to extract from
* @param zipStub - The StirlingFileStub for the ZIP (contains metadata to preserve)
* @returns Object with success status, extracted stubs, and any errors
@ -504,8 +541,15 @@ export class ZipFileService {
};
try {
// Extract PDF files from ZIP
const extractionResult = await this.extractPdfFiles(zipFile);
// Check if ZIP contains HTML files - if so, don't extract
const hasHtml = await this.containsHtmlFiles(zipFile);
if (hasHtml) {
result.errors.push('ZIP contains HTML files and will not be auto-extracted. Download the ZIP to access the files.');
return result;
}
// Extract all files from ZIP (not just PDFs)
const extractionResult = await this.extractAllFiles(zipFile);
if (!extractionResult.success || extractionResult.extractedFiles.length === 0) {
result.errors = extractionResult.errors;
@ -515,7 +559,7 @@ export class ZipFileService {
// Process each extracted file
for (const extractedFile of extractionResult.extractedFiles) {
try {
// Generate thumbnail
// Generate thumbnail (works for PDFs and images)
const thumbnail = await generateThumbnailForFile(extractedFile);
// Create StirlingFile

View File

@ -30,6 +30,7 @@ export class AutomationFileProcessor {
/**
* Extract files from a ZIP blob during automation execution, with fallback for non-ZIP files
* Extracts all file types (PDFs, images, etc.) except HTML files which stay zipped
*/
static async extractAutomationZipFiles(blob: Blob): Promise<AutomationProcessingResult> {
try {
@ -40,20 +41,26 @@ export class AutomationFileProcessor {
'application/zip'
);
const result = await zipFileService.extractPdfFiles(zipFile);
if (!result.success || result.extractedFiles.length === 0) {
// Fallback: treat as single PDF file
const fallbackFile = ResourceManager.createTimestampedFile(
blob,
AUTOMATION_CONSTANTS.RESULT_FILE_PREFIX,
'.pdf'
);
// Check if ZIP contains HTML files - if so, keep as ZIP
const containsHtml = await zipFileService.containsHtmlFiles(zipFile);
if (containsHtml) {
// HTML files should stay zipped - return ZIP as-is
return {
success: true,
files: [fallbackFile],
errors: [`ZIP extraction failed, treated as single file: ${result.errors?.join(', ') || 'Unknown error'}`]
files: [zipFile],
errors: []
};
}
// Extract all files (not just PDFs) - handles images from scanner-image-split, etc.
const result = await zipFileService.extractAllFiles(zipFile);
if (!result.success || result.extractedFiles.length === 0) {
// Fallback: keep as ZIP file (might be valid ZIP with extraction issues)
return {
success: true,
files: [zipFile],
errors: [`ZIP extraction failed, kept as ZIP: ${result.errors?.join(', ') || 'Unknown error'}`]
};
}
@ -63,18 +70,19 @@ export class AutomationFileProcessor {
errors: []
};
} catch (error) {
console.warn('Failed to extract automation ZIP files, falling back to single file:', error);
// Fallback: treat as single PDF file
console.warn('Failed to extract automation ZIP files, keeping as ZIP:', error);
// Fallback: keep as ZIP file for next automation step to handle
const fallbackFile = ResourceManager.createTimestampedFile(
blob,
AUTOMATION_CONSTANTS.RESULT_FILE_PREFIX,
'.pdf'
AUTOMATION_CONSTANTS.RESPONSE_ZIP_PREFIX,
'.zip',
'application/zip'
);
return {
success: true,
files: [fallbackFile],
errors: [`ZIP extraction failed, treated as single file: ${error}`]
errors: [`ZIP extraction failed, kept as ZIP: ${error}`]
};
}
}