diff --git a/frontend/src/components/fileEditor/FileEditor.tsx b/frontend/src/components/fileEditor/FileEditor.tsx index 5c76d2248..856c55b83 100644 --- a/frontend/src/components/fileEditor/FileEditor.tsx +++ b/frontend/src/components/fileEditor/FileEditor.tsx @@ -1,6 +1,6 @@ import React, { useState, useCallback, useRef, useMemo, useEffect } from 'react'; import { - Text, Center, Box, LoadingOverlay, Stack, Group + Text, Center, Box, LoadingOverlay, Stack } from '@mantine/core'; import { Dropzone } from '@mantine/dropzone'; import { useFileSelection, useFileState, useFileManagement, useFileActions } from '../../contexts/FileContext'; @@ -10,7 +10,6 @@ import { detectFileExtension } from '../../utils/fileUtils'; import FileEditorThumbnail from './FileEditorThumbnail'; import AddFileCard from './AddFileCard'; import FilePickerModal from '../shared/FilePickerModal'; -import SkeletonLoader from '../shared/SkeletonLoader'; import { FileId, StirlingFile } from '../../types/fileContext'; import { alert } from '../toast'; import { downloadBlob } from '../../utils/downloadUtils'; @@ -68,19 +67,6 @@ const FileEditor = ({ } }, [toolMode]); const [showFilePickerModal, setShowFilePickerModal] = useState(false); - const [zipExtractionProgress, setZipExtractionProgress] = useState<{ - isExtracting: boolean; - currentFile: string; - progress: number; - extractedCount: number; - totalFiles: number; - }>({ - isExtracting: false, - currentFile: '', - progress: 0, - extractedCount: 0, - totalFiles: 0 - }); // Get selected file IDs from context (defensive programming) const contextSelectedIds = Array.isArray(selectedFileIds) ? selectedFileIds : []; @@ -92,106 +78,26 @@ const FileEditor = ({ const localSelectedIds = contextSelectedIds; // Process uploaded files using context + // ZIP extraction is now handled automatically in FileContext based on user preferences const handleFileUpload = useCallback(async (uploadedFiles: File[]) => { _setError(null); try { - const allExtractedFiles: File[] = []; - const errors: string[] = []; - - for (const file of uploadedFiles) { - if (file.type === 'application/pdf') { - // Handle PDF files normally - allExtractedFiles.push(file); - } else if (file.type === 'application/zip' || file.type === 'application/x-zip-compressed' || file.name.toLowerCase().endsWith('.zip')) { - // Handle ZIP files - only expand if they contain PDFs - try { - // Validate ZIP file first - const validation = await zipFileService.validateZipFile(file); - - if (validation.isValid && validation.containsPDFs) { - // ZIP contains PDFs - extract them - setZipExtractionProgress({ - isExtracting: true, - currentFile: file.name, - progress: 0, - extractedCount: 0, - totalFiles: validation.fileCount - }); - - const extractionResult = await zipFileService.extractPdfFiles(file, (progress) => { - setZipExtractionProgress({ - isExtracting: true, - currentFile: progress.currentFile, - progress: progress.progress, - extractedCount: progress.extractedCount, - totalFiles: progress.totalFiles - }); - }); - - // Reset extraction progress - setZipExtractionProgress({ - isExtracting: false, - currentFile: '', - progress: 0, - extractedCount: 0, - totalFiles: 0 - }); - - if (extractionResult.success) { - allExtractedFiles.push(...extractionResult.extractedFiles); - - if (extractionResult.errors.length > 0) { - errors.push(...extractionResult.errors); - } - } else { - errors.push(`Failed to extract ZIP file "${file.name}": ${extractionResult.errors.join(', ')}`); - } - } else { - // ZIP doesn't contain PDFs or is invalid - treat as regular file - allExtractedFiles.push(file); - } - } catch (zipError) { - errors.push(`Failed to process ZIP file "${file.name}": ${zipError instanceof Error ? zipError.message : 'Unknown error'}`); - setZipExtractionProgress({ - isExtracting: false, - currentFile: '', - progress: 0, - extractedCount: 0, - totalFiles: 0 - }); - } - } else { - allExtractedFiles.push(file); - } - } - - // Show any errors - if (errors.length > 0) { - showError(errors.join('\n')); - } - - // Process all extracted files - if (allExtractedFiles.length > 0) { - // Add files to context and select them automatically - await addFiles(allExtractedFiles, { selectFiles: true }); - showStatus(`Added ${allExtractedFiles.length} files`, 'success'); + if (uploadedFiles.length > 0) { + // FileContext will automatically handle ZIP extraction based on user preferences + // - Respects autoUnzip setting + // - Respects autoUnzipFileLimit + // - HTML ZIPs stay intact + // - Non-ZIP files pass through unchanged + await addFiles(uploadedFiles, { selectFiles: true }); + showStatus(`Added ${uploadedFiles.length} file(s)`, 'success'); } } catch (err) { const errorMessage = err instanceof Error ? err.message : 'Failed to process files'; showError(errorMessage); console.error('File processing error:', err); - - // Reset extraction progress on error - setZipExtractionProgress({ - isExtracting: false, - currentFile: '', - progress: 0, - extractedCount: 0, - totalFiles: 0 - }); } - }, [addFiles]); + }, [addFiles, showStatus, showError]); const toggleFile = useCallback((fileId: FileId) => { const currentSelectedIds = contextSelectedIdsRef.current; @@ -394,7 +300,7 @@ const FileEditor = ({ - {activeStirlingFileStubs.length === 0 && !zipExtractionProgress.isExtracting ? ( + {activeStirlingFileStubs.length === 0 ? (
📁 @@ -402,43 +308,6 @@ const FileEditor = ({ Upload PDF files, ZIP archives, or load from storage to get started
- ) : activeStirlingFileStubs.length === 0 && zipExtractionProgress.isExtracting ? ( - - - - {/* ZIP Extraction Progress */} - {zipExtractionProgress.isExtracting && ( - - - Extracting ZIP archive... - {Math.round(zipExtractionProgress.progress)}% - - - {zipExtractionProgress.currentFile || 'Processing files...'} - - - {zipExtractionProgress.extractedCount} of {zipExtractionProgress.totalFiles} files extracted - -
-
-
- - )} - - - - ) : (
=> { - const stirlingFiles = await addFiles({ files, ...options }, stateRef, filesRef, dispatch, lifecycleManager, enablePersistence); + const addRawFiles = useCallback(async (files: File[], options?: { insertAfterPageId?: string; selectFiles?: boolean; skipAutoUnzip?: boolean }): Promise => { + const stirlingFiles = await addFiles( + { + files, + ...options, + // For direct file uploads: ALWAYS unzip (except HTML ZIPs) + // skipAutoUnzip bypasses preference checks - HTML detection still applies + skipAutoUnzip: true + }, + stateRef, + filesRef, + dispatch, + lifecycleManager, + enablePersistence + ); // Auto-select the newly added files if requested if (options?.selectFiles && stirlingFiles.length > 0) { diff --git a/frontend/src/contexts/file/fileActions.ts b/frontend/src/contexts/file/fileActions.ts index 3f3ec07c7..c1b23d408 100644 --- a/frontend/src/contexts/file/fileActions.ts +++ b/frontend/src/contexts/file/fileActions.ts @@ -18,6 +18,7 @@ import { FileLifecycleManager } from './lifecycle'; import { buildQuickKeySet } from './fileSelectors'; import { StirlingFile } from '../../types/fileContext'; import { fileStorage } from '../../services/fileStorage'; +import { zipFileService } from '../../services/zipFileService'; const DEBUG = process.env.NODE_ENV === 'development'; /** @@ -172,6 +173,11 @@ interface AddFileOptions { // Auto-selection after adding selectFiles?: boolean; + + // Auto-unzip control + autoUnzip?: boolean; + autoUnzipFileLimit?: number; + skipAutoUnzip?: boolean; // When true: always unzip (except HTML). Used for file uploads. When false: respect autoUnzip/autoUnzipFileLimit preferences. Used for tool outputs. } /** @@ -198,7 +204,58 @@ export async function addFiles( const { files = [] } = options; if (DEBUG) console.log(`📄 addFiles(raw): Adding ${files.length} files with immediate thumbnail generation`); + // ZIP pre-processing: Extract ZIP files with configurable behavior + // - File uploads: skipAutoUnzip=true → always extract (except HTML) + // - Tool outputs: skipAutoUnzip=false → respect user preferences + const filesToProcess: File[] = []; + const autoUnzip = options.autoUnzip ?? true; // Default to true + const autoUnzipFileLimit = options.autoUnzipFileLimit ?? 4; // Default limit + const skipAutoUnzip = options.skipAutoUnzip ?? false; + for (const file of files) { + // Check if file is a ZIP + if (zipFileService.isZipFile(file)) { + try { + if (DEBUG) console.log(`📄 addFiles: Detected ZIP file: ${file.name}`); + + // Check if ZIP contains HTML files - if so, keep as ZIP + const containsHtml = await zipFileService.containsHtmlFiles(file); + if (containsHtml) { + if (DEBUG) console.log(`📄 addFiles: ZIP contains HTML, keeping as ZIP: ${file.name}`); + filesToProcess.push(file); + continue; + } + + // Apply extraction with preferences + const extractedFiles = await zipFileService.extractWithPreferences(file, { + autoUnzip, + autoUnzipFileLimit, + skipAutoUnzip + }); + + if (extractedFiles.length === 1 && extractedFiles[0] === file) { + // ZIP was not extracted (over limit or autoUnzip disabled) + if (DEBUG) console.log(`📄 addFiles: ZIP not extracted (preferences): ${file.name}`); + } else { + // ZIP was extracted + if (DEBUG) console.log(`📄 addFiles: Extracted ${extractedFiles.length} files from ZIP: ${file.name}`); + } + + filesToProcess.push(...extractedFiles); + } catch (error) { + console.error(`📄 addFiles: Failed to process ZIP file ${file.name}:`, error); + // On error, keep the ZIP file as-is + filesToProcess.push(file); + } + } else { + // Not a ZIP file, add as-is + filesToProcess.push(file); + } + } + + if (DEBUG) console.log(`📄 addFiles: After ZIP processing, ${filesToProcess.length} files to add`); + + for (const file of filesToProcess) { const quickKey = createQuickKey(file); // Soft deduplication: Check if file already exists by metadata diff --git a/frontend/src/hooks/tools/extractImages/useExtractImagesOperation.ts b/frontend/src/hooks/tools/extractImages/useExtractImagesOperation.ts index 27a997e2a..a5ef98357 100644 --- a/frontend/src/hooks/tools/extractImages/useExtractImagesOperation.ts +++ b/frontend/src/hooks/tools/extractImages/useExtractImagesOperation.ts @@ -1,8 +1,9 @@ +import { useCallback } from 'react'; import { useTranslation } from 'react-i18next'; import { useToolOperation, ToolType } from '../shared/useToolOperation'; import { createStandardErrorHandler } from '../../../utils/toolErrorHandler'; import { ExtractImagesParameters, defaultParameters } from './useExtractImagesParameters'; -import JSZip from 'jszip'; +import { useToolResources } from '../shared/useToolResources'; // Static configuration that can be used by both the hook and automation executor export const buildExtractImagesFormData = (parameters: ExtractImagesParameters, file: File): FormData => { @@ -13,39 +14,28 @@ export const buildExtractImagesFormData = (parameters: ExtractImagesParameters, return formData; }; -// Response handler for extract-images which returns a ZIP file -const extractImagesResponseHandler = async (responseData: Blob, _originalFiles: File[]): Promise => { - const zip = new JSZip(); - const zipContent = await zip.loadAsync(responseData); - const extractedFiles: File[] = []; - - for (const [filename, file] of Object.entries(zipContent.files)) { - if (!file.dir) { - const blob = await file.async('blob'); - const extractedFile = new File([blob], filename, { type: blob.type }); - extractedFiles.push(extractedFile); - } - } - - return extractedFiles; -}; - -// Static configuration object +// Static configuration object (without response handler - will be added in hook) export const extractImagesOperationConfig = { toolType: ToolType.singleFile, buildFormData: buildExtractImagesFormData, operationType: 'extractImages', endpoint: '/api/v1/misc/extract-images', defaultParameters, - // Extract-images returns a ZIP file containing multiple image files - responseHandler: extractImagesResponseHandler, } as const; export const useExtractImagesOperation = () => { const { t } = useTranslation(); + const { extractZipFiles } = useToolResources(); + + // Response handler that respects auto-unzip preferences + const responseHandler = useCallback(async (blob: Blob, _originalFiles: File[]): Promise => { + // Extract images returns a ZIP file - use preference-aware extraction + return await extractZipFiles(blob); + }, [extractZipFiles]); return useToolOperation({ ...extractImagesOperationConfig, + responseHandler, getErrorMessage: createStandardErrorHandler(t('extractImages.error.failed', 'An error occurred while extracting images from the PDF.')) }); }; \ No newline at end of file diff --git a/frontend/src/hooks/tools/scannerImageSplit/useScannerImageSplitOperation.ts b/frontend/src/hooks/tools/scannerImageSplit/useScannerImageSplitOperation.ts index 65b4ba1c0..65f7a4b58 100644 --- a/frontend/src/hooks/tools/scannerImageSplit/useScannerImageSplitOperation.ts +++ b/frontend/src/hooks/tools/scannerImageSplit/useScannerImageSplitOperation.ts @@ -27,14 +27,14 @@ export const scannerImageSplitOperationConfig = { export const useScannerImageSplitOperation = () => { const { t } = useTranslation(); - const { extractAllZipFiles } = useToolResources(); + const { extractZipFiles } = useToolResources(); // Custom response handler that extracts ZIP files containing images // Can't add to exported config because it requires access to the hook so must be part of the hook const responseHandler = useCallback(async (blob: Blob, originalFiles: File[]): Promise => { try { // Scanner image split returns ZIP files with multiple images - const extractedFiles = await extractAllZipFiles(blob); + const extractedFiles = await extractZipFiles(blob); // If extraction succeeded and returned files, use them if (extractedFiles.length > 0) { @@ -49,7 +49,7 @@ export const useScannerImageSplitOperation = () => { const baseFileName = inputFileName.replace(/\.[^.]+$/, ''); const singleFile = new File([blob], `${baseFileName}.png`, { type: 'image/png' }); return [singleFile]; - }, [extractAllZipFiles]); + }, [extractZipFiles]); const config: ToolOperationConfig = { ...scannerImageSplitOperationConfig, diff --git a/frontend/src/hooks/tools/shared/useToolOperation.ts b/frontend/src/hooks/tools/shared/useToolOperation.ts index d28e5ce77..f5e575a14 100644 --- a/frontend/src/hooks/tools/shared/useToolOperation.ts +++ b/frontend/src/hooks/tools/shared/useToolOperation.ts @@ -151,7 +151,7 @@ export const useToolOperation = ( const { state, actions } = useToolState(); const { actions: fileActions } = useFileContext(); const { processFiles, cancelOperation: cancelApiCalls } = useToolApiCalls(); - const { generateThumbnails, createDownloadInfo, cleanupBlobUrls, extractZipFiles, extractAllZipFiles } = useToolResources(); + const { generateThumbnails, createDownloadInfo, cleanupBlobUrls, extractZipFiles } = useToolResources(); // Track last operation for undo functionality const lastOperationRef = useRef<{ @@ -259,11 +259,6 @@ export const useToolOperation = ( // Default: assume ZIP response for multi-file endpoints // Note: extractZipFiles will check preferences.autoUnzip setting processedFiles = await extractZipFiles(response.data); - - if (processedFiles.length === 0) { - // Try the generic extraction as fallback - processedFiles = await extractAllZipFiles(response.data); - } } // Assume all inputs succeeded together unless server provided an error earlier successSourceIds = validFiles.map(f => (f as any).fileId) as any; @@ -446,7 +441,7 @@ export const useToolOperation = ( actions.setLoading(false); actions.setProgress(null); } - }, [t, config, actions, addFiles, consumeFiles, processFiles, generateThumbnails, createDownloadInfo, cleanupBlobUrls, extractZipFiles, extractAllZipFiles]); + }, [t, config, actions, addFiles, consumeFiles, processFiles, generateThumbnails, createDownloadInfo, cleanupBlobUrls, extractZipFiles]); const cancelOperation = useCallback(() => { cancelApiCalls(); diff --git a/frontend/src/hooks/tools/shared/useToolResources.ts b/frontend/src/hooks/tools/shared/useToolResources.ts index 366730885..eb5a3f37d 100644 --- a/frontend/src/hooks/tools/shared/useToolResources.ts +++ b/frontend/src/hooks/tools/shared/useToolResources.ts @@ -27,11 +27,11 @@ export const useToolResources = () => { // Cleanup on unmount - use ref to avoid dependency on blobUrls state const blobUrlsRef = useRef([]); - + useEffect(() => { blobUrlsRef.current = blobUrls; }, [blobUrls]); - + useEffect(() => { return () => { blobUrlsRef.current.forEach(url => { @@ -85,50 +85,17 @@ export const useToolResources = () => { const extractZipFiles = useCallback(async (zipBlob: Blob, skipAutoUnzip = false): Promise => { try { - // Check if we should extract based on preferences - const shouldExtract = await zipFileService.shouldUnzip( - zipBlob, - preferences.autoUnzip, - preferences.autoUnzipFileLimit, + return await zipFileService.extractWithPreferences(zipBlob, { + autoUnzip: preferences.autoUnzip, + autoUnzipFileLimit: preferences.autoUnzipFileLimit, skipAutoUnzip - ); - - if (!shouldExtract) { - return [new File([zipBlob], 'result.zip', { type: 'application/zip' })]; - } - - const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' }); - const extractionResult = await zipFileService.extractPdfFiles(zipFile); - return extractionResult.success ? extractionResult.extractedFiles : []; + }); } catch (error) { console.error('useToolResources.extractZipFiles - Error:', error); return []; } }, [preferences.autoUnzip, preferences.autoUnzipFileLimit]); - const extractAllZipFiles = useCallback(async (zipBlob: Blob, skipAutoUnzip = false): Promise => { - try { - // Check if we should extract based on preferences - const shouldExtract = await zipFileService.shouldUnzip( - zipBlob, - preferences.autoUnzip, - preferences.autoUnzipFileLimit, - skipAutoUnzip - ); - - if (!shouldExtract) { - return [new File([zipBlob], 'result.zip', { type: 'application/zip' })]; - } - - const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' }); - const extractionResult = await zipFileService.extractAllFiles(zipFile); - return extractionResult.success ? extractionResult.extractedFiles : []; - } catch (error) { - console.error('useToolResources.extractAllZipFiles - Error:', error); - return []; - } - }, [preferences.autoUnzip, preferences.autoUnzipFileLimit]); - const createDownloadInfo = useCallback(async ( files: File[], operationType: string @@ -152,7 +119,6 @@ export const useToolResources = () => { generateThumbnailsWithMetadata, createDownloadInfo, extractZipFiles, - extractAllZipFiles, cleanupBlobUrls, }; }; diff --git a/frontend/src/services/zipFileService.ts b/frontend/src/services/zipFileService.ts index 45ec39219..2b5162cad 100644 --- a/frontend/src/services/zipFileService.ts +++ b/frontend/src/services/zipFileService.ts @@ -29,6 +29,7 @@ export interface ZipValidationResult { fileCount: number; totalSizeBytes: number; containsPDFs: boolean; + containsFiles: boolean; errors: string[]; } @@ -42,7 +43,6 @@ export interface ZipExtractionProgress { export class ZipFileService { private readonly maxFileSize = 100 * 1024 * 1024; // 100MB per file private readonly maxTotalSize = 500 * 1024 * 1024; // 500MB total extraction limit - private readonly supportedExtensions = ['.pdf']; // ZIP file validation constants private static readonly VALID_ZIP_TYPES = [ @@ -62,6 +62,7 @@ export class ZipFileService { fileCount: 0, totalSizeBytes: 0, containsPDFs: false, + containsFiles: false, errors: [] }; @@ -115,10 +116,13 @@ export class ZipFileService { result.fileCount = fileCount; result.totalSizeBytes = totalSize; result.containsPDFs = containsPDFs; - result.isValid = result.errors.length === 0 && containsPDFs; + result.containsFiles = fileCount > 0; - if (!containsPDFs) { - result.errors.push('ZIP file does not contain any PDF files'); + // ZIP is valid if it has files and no size errors + result.isValid = result.errors.length === 0 && result.containsFiles; + + if (!result.containsFiles) { + result.errors.push('ZIP file does not contain any files'); } return result; @@ -278,6 +282,37 @@ export class ZipFileService { return filename.toLowerCase().endsWith('.pdf'); } + /** + * Check if a filename indicates an HTML file + */ + private isHtmlFile(filename: string): boolean { + const lowerName = filename.toLowerCase(); + return lowerName.endsWith('.html') || lowerName.endsWith('.htm') || lowerName.endsWith('.xhtml'); + } + + /** + * Check if a ZIP file contains HTML files + * Used to determine if the ZIP should be kept intact (HTML) or extracted (other files) + */ + async containsHtmlFiles(file: Blob | File): Promise { + try { + const zip = new JSZip(); + const zipContents = await zip.loadAsync(file); + + // Check if any file is an HTML file + for (const [filename, zipEntry] of Object.entries(zipContents.files)) { + if (!zipEntry.dir && this.isHtmlFile(filename)) { + return true; + } + } + + return false; + } catch (error) { + console.error('Error checking for HTML files:', error); + return false; + } + } + /** * Validate that a file is actually a PDF by checking its header */ @@ -366,6 +401,62 @@ export class ZipFileService { } } + /** + * Extract files from ZIP with HTML detection and preference checking + * This is the unified method that handles the common pattern of: + * 1. Check for HTML files → keep zipped if present + * 2. Check user preferences → respect autoUnzipFileLimit + * 3. Extract files if appropriate + * + * @param zipBlob - The ZIP blob to process + * @param options - Extraction options + * @returns Array of files (either extracted or the ZIP itself) + */ + async extractWithPreferences( + zipBlob: Blob, + options: { + autoUnzip: boolean; + autoUnzipFileLimit: number; + skipAutoUnzip?: boolean; + } + ): Promise { + try { + // Create File object if not already + const zipFile = zipBlob instanceof File + ? zipBlob + : new File([zipBlob], 'result.zip', { type: 'application/zip' }); + + // Check if ZIP contains HTML files - if so, keep as ZIP + const containsHtml = await this.containsHtmlFiles(zipFile); + if (containsHtml) { + return [zipFile]; + } + + // Check if we should extract based on preferences + const shouldExtract = await this.shouldUnzip( + zipBlob, + options.autoUnzip, + options.autoUnzipFileLimit, + options.skipAutoUnzip || false + ); + + if (!shouldExtract) { + return [zipFile]; + } + + // Extract all files + const extractionResult = await this.extractAllFiles(zipFile); + return extractionResult.success ? extractionResult.extractedFiles : [zipFile]; + } catch (error) { + console.error('Error in extractWithPreferences:', error); + // On error, return ZIP as-is + const zipFile = zipBlob instanceof File + ? zipBlob + : new File([zipBlob], 'result.zip', { type: 'application/zip' }); + return [zipFile]; + } + } + /** * Extract all files from a ZIP archive (not limited to PDFs) */ @@ -486,9 +577,11 @@ export class ZipFileService { } /** - * Extract PDF files from ZIP and store them in IndexedDB with preserved history metadata + * Extract all files from ZIP and store them in IndexedDB with preserved history metadata * Used by both FileManager and FileEditor to avoid code duplication * + * Note: HTML files will NOT be extracted - the ZIP is kept intact when HTML is detected + * * @param zipFile - The ZIP file to extract from * @param zipStub - The StirlingFileStub for the ZIP (contains metadata to preserve) * @returns Object with success status, extracted stubs, and any errors @@ -504,8 +597,15 @@ export class ZipFileService { }; try { - // Extract PDF files from ZIP - const extractionResult = await this.extractPdfFiles(zipFile); + // Check if ZIP contains HTML files - if so, don't extract + const hasHtml = await this.containsHtmlFiles(zipFile); + if (hasHtml) { + result.errors.push('ZIP contains HTML files and will not be auto-extracted. Download the ZIP to access the files.'); + return result; + } + + // Extract all files from ZIP (not just PDFs) + const extractionResult = await this.extractAllFiles(zipFile); if (!extractionResult.success || extractionResult.extractedFiles.length === 0) { result.errors = extractionResult.errors; @@ -515,7 +615,7 @@ export class ZipFileService { // Process each extracted file for (const extractedFile of extractionResult.extractedFiles) { try { - // Generate thumbnail + // Generate thumbnail (works for PDFs and images) const thumbnail = await generateThumbnailForFile(extractedFile); // Create StirlingFile diff --git a/frontend/src/utils/automationFileProcessor.ts b/frontend/src/utils/automationFileProcessor.ts index d81dd3a1b..4b7417177 100644 --- a/frontend/src/utils/automationFileProcessor.ts +++ b/frontend/src/utils/automationFileProcessor.ts @@ -30,6 +30,7 @@ export class AutomationFileProcessor { /** * Extract files from a ZIP blob during automation execution, with fallback for non-ZIP files + * Extracts all file types (PDFs, images, etc.) except HTML files which stay zipped */ static async extractAutomationZipFiles(blob: Blob): Promise { try { @@ -40,20 +41,26 @@ export class AutomationFileProcessor { 'application/zip' ); - const result = await zipFileService.extractPdfFiles(zipFile); - - if (!result.success || result.extractedFiles.length === 0) { - // Fallback: treat as single PDF file - const fallbackFile = ResourceManager.createTimestampedFile( - blob, - AUTOMATION_CONSTANTS.RESULT_FILE_PREFIX, - '.pdf' - ); - + // Check if ZIP contains HTML files - if so, keep as ZIP + const containsHtml = await zipFileService.containsHtmlFiles(zipFile); + if (containsHtml) { + // HTML files should stay zipped - return ZIP as-is return { success: true, - files: [fallbackFile], - errors: [`ZIP extraction failed, treated as single file: ${result.errors?.join(', ') || 'Unknown error'}`] + files: [zipFile], + errors: [] + }; + } + + // Extract all files (not just PDFs) - handles images from scanner-image-split, etc. + const result = await zipFileService.extractAllFiles(zipFile); + + if (!result.success || result.extractedFiles.length === 0) { + // Fallback: keep as ZIP file (might be valid ZIP with extraction issues) + return { + success: true, + files: [zipFile], + errors: [`ZIP extraction failed, kept as ZIP: ${result.errors?.join(', ') || 'Unknown error'}`] }; } @@ -63,18 +70,19 @@ export class AutomationFileProcessor { errors: [] }; } catch (error) { - console.warn('Failed to extract automation ZIP files, falling back to single file:', error); - // Fallback: treat as single PDF file + console.warn('Failed to extract automation ZIP files, keeping as ZIP:', error); + // Fallback: keep as ZIP file for next automation step to handle const fallbackFile = ResourceManager.createTimestampedFile( blob, - AUTOMATION_CONSTANTS.RESULT_FILE_PREFIX, - '.pdf' + AUTOMATION_CONSTANTS.RESPONSE_ZIP_PREFIX, + '.zip', + 'application/zip' ); return { success: true, files: [fallbackFile], - errors: [`ZIP extraction failed, treated as single file: ${error}`] + errors: [`ZIP extraction failed, kept as ZIP: ${error}`] }; } }