Fix/V2/unzip_images (#4647)

Method Usage by Context

| Context | Method Used | Respects Preferences | HTML Detection |

|------------------------------|-------------------------------------------------------|------------------------|----------------|
| Tools (via useToolResources) | extractZipFiles() →
extractWithPreferences() |  Yes |  Yes |
| Automation | extractAutomationZipFiles() → extractAllFiles() |  No
(always extracts) |  Yes |
| Manual Unzip | extractAndStoreFilesWithHistory() → extractAllFiles() |
 No (always extracts) |  Yes |
| Auto-Upload | extractAllFiles() directly |  No (always extracts) | 
Yes |

  Detailed Behavior Matrix

| Context | HTML Files | Auto-Unzip OFF | Within Limit | Exceeds Limit |
Notes |

|--------------------------|-------------|----------------|--------------|---------------|----------------------------------------|
| Tools (useToolResources) | Keep zipped | Keep zipped | Extract all |
Keep zipped | Respects user preferences |
| Automation | Keep zipped | Extract all | Extract all | Extract all |
Ignores preferences (automation needs) |
| Manual Unzip | Keep zipped | Extract all | Extract all | Extract all |
User explicitly unzipping |
| Auto-Upload | Keep zipped | Extract all | Extract all | Extract all |
User dropped files |

  Simplified Decision Flow

  ZIP File Received
      │
      ├─ Contains HTML? → Keep as ZIP (all contexts)
      │
      └─ No HTML
          │
          ├─ Tools Context
          │   ├─ Auto-unzip OFF? → Keep as ZIP
          │   └─ Auto-unzip ON
          │       ├─ File count ≤ limit? → Extract all
          │       └─ File count > limit? → Keep as ZIP
          │
          └─ Automation/Manual/Auto-Upload
              └─ Extract all (ignore preferences)

  Key Changes from Previous Version
  
| Entry Point | Code Path | skipAutoUnzip | Respects Preferences? | HTML
Detection? | Extraction Behavior |

|-----------------------------------------------|----------------------------------------------------------------------------------------|---------------|-----------------------|---------------------------|-------------------------------------------------------------------------|
| Direct File Upload (FileEditor, LandingPage) |
FileContext.addRawFiles() → fileActions.addFiles() | True |  No |  Yes
| Always extract (except HTML ZIPs) |
| Tool Outputs (Split, Merge, etc.) | useToolResources.extractZipFiles()
→ zipFileService.extractWithPreferences() | false |  Yes |  Yes |
Conditional: Only if autoUnzip=true AND file count ≤ autoUnzipFileLimit
|
| Load from Storage (FileManager) | fileActions.addStirlingFileStubs() |
N/A | N/A | N/A | No extraction - files already processed |
| Automation Outputs |
AutomationFileProcessor.extractAutomationZipFiles() →
zipFileService.extractAllFiles() | N/A |  No |  Yes | Always extract
(except HTML ZIPs) |
| Manual Unzip Action (FileEditor context menu) |
zipFileService.extractAndStoreFilesWithHistory() → extractAllFiles() |
N/A |  No |  Yes (blocks extraction) | Always extract (except HTML
ZIPs) - explicit user action |

---------

Co-authored-by: Connor Yoh <connor@stirlingpdf.com>
This commit is contained in:
ConnorYoh 2025-10-15 15:17:44 +01:00 committed by GitHub
parent bcd7762594
commit 43887c8179
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 239 additions and 241 deletions

View File

@ -1,6 +1,6 @@
import React, { useState, useCallback, useRef, useMemo, useEffect } from 'react';
import {
Text, Center, Box, LoadingOverlay, Stack, Group
Text, Center, Box, LoadingOverlay, Stack
} from '@mantine/core';
import { Dropzone } from '@mantine/dropzone';
import { useFileSelection, useFileState, useFileManagement, useFileActions } from '../../contexts/FileContext';
@ -10,7 +10,6 @@ import { detectFileExtension } from '../../utils/fileUtils';
import FileEditorThumbnail from './FileEditorThumbnail';
import AddFileCard from './AddFileCard';
import FilePickerModal from '../shared/FilePickerModal';
import SkeletonLoader from '../shared/SkeletonLoader';
import { FileId, StirlingFile } from '../../types/fileContext';
import { alert } from '../toast';
import { downloadBlob } from '../../utils/downloadUtils';
@ -68,19 +67,6 @@ const FileEditor = ({
}
}, [toolMode]);
const [showFilePickerModal, setShowFilePickerModal] = useState(false);
const [zipExtractionProgress, setZipExtractionProgress] = useState<{
isExtracting: boolean;
currentFile: string;
progress: number;
extractedCount: number;
totalFiles: number;
}>({
isExtracting: false,
currentFile: '',
progress: 0,
extractedCount: 0,
totalFiles: 0
});
// Get selected file IDs from context (defensive programming)
const contextSelectedIds = Array.isArray(selectedFileIds) ? selectedFileIds : [];
@ -92,106 +78,26 @@ const FileEditor = ({
const localSelectedIds = contextSelectedIds;
// Process uploaded files using context
// ZIP extraction is now handled automatically in FileContext based on user preferences
const handleFileUpload = useCallback(async (uploadedFiles: File[]) => {
_setError(null);
try {
const allExtractedFiles: File[] = [];
const errors: string[] = [];
for (const file of uploadedFiles) {
if (file.type === 'application/pdf') {
// Handle PDF files normally
allExtractedFiles.push(file);
} else if (file.type === 'application/zip' || file.type === 'application/x-zip-compressed' || file.name.toLowerCase().endsWith('.zip')) {
// Handle ZIP files - only expand if they contain PDFs
try {
// Validate ZIP file first
const validation = await zipFileService.validateZipFile(file);
if (validation.isValid && validation.containsPDFs) {
// ZIP contains PDFs - extract them
setZipExtractionProgress({
isExtracting: true,
currentFile: file.name,
progress: 0,
extractedCount: 0,
totalFiles: validation.fileCount
});
const extractionResult = await zipFileService.extractPdfFiles(file, (progress) => {
setZipExtractionProgress({
isExtracting: true,
currentFile: progress.currentFile,
progress: progress.progress,
extractedCount: progress.extractedCount,
totalFiles: progress.totalFiles
});
});
// Reset extraction progress
setZipExtractionProgress({
isExtracting: false,
currentFile: '',
progress: 0,
extractedCount: 0,
totalFiles: 0
});
if (extractionResult.success) {
allExtractedFiles.push(...extractionResult.extractedFiles);
if (extractionResult.errors.length > 0) {
errors.push(...extractionResult.errors);
}
} else {
errors.push(`Failed to extract ZIP file "${file.name}": ${extractionResult.errors.join(', ')}`);
}
} else {
// ZIP doesn't contain PDFs or is invalid - treat as regular file
allExtractedFiles.push(file);
}
} catch (zipError) {
errors.push(`Failed to process ZIP file "${file.name}": ${zipError instanceof Error ? zipError.message : 'Unknown error'}`);
setZipExtractionProgress({
isExtracting: false,
currentFile: '',
progress: 0,
extractedCount: 0,
totalFiles: 0
});
}
} else {
allExtractedFiles.push(file);
}
}
// Show any errors
if (errors.length > 0) {
showError(errors.join('\n'));
}
// Process all extracted files
if (allExtractedFiles.length > 0) {
// Add files to context and select them automatically
await addFiles(allExtractedFiles, { selectFiles: true });
showStatus(`Added ${allExtractedFiles.length} files`, 'success');
if (uploadedFiles.length > 0) {
// FileContext will automatically handle ZIP extraction based on user preferences
// - Respects autoUnzip setting
// - Respects autoUnzipFileLimit
// - HTML ZIPs stay intact
// - Non-ZIP files pass through unchanged
await addFiles(uploadedFiles, { selectFiles: true });
showStatus(`Added ${uploadedFiles.length} file(s)`, 'success');
}
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Failed to process files';
showError(errorMessage);
console.error('File processing error:', err);
// Reset extraction progress on error
setZipExtractionProgress({
isExtracting: false,
currentFile: '',
progress: 0,
extractedCount: 0,
totalFiles: 0
});
}
}, [addFiles]);
}, [addFiles, showStatus, showError]);
const toggleFile = useCallback((fileId: FileId) => {
const currentSelectedIds = contextSelectedIdsRef.current;
@ -394,7 +300,7 @@ const FileEditor = ({
<Box p="md">
{activeStirlingFileStubs.length === 0 && !zipExtractionProgress.isExtracting ? (
{activeStirlingFileStubs.length === 0 ? (
<Center h="60vh">
<Stack align="center" gap="md">
<Text size="lg" c="dimmed">📁</Text>
@ -402,43 +308,6 @@ const FileEditor = ({
<Text size="sm" c="dimmed">Upload PDF files, ZIP archives, or load from storage to get started</Text>
</Stack>
</Center>
) : activeStirlingFileStubs.length === 0 && zipExtractionProgress.isExtracting ? (
<Box>
<SkeletonLoader type="controls" />
{/* ZIP Extraction Progress */}
{zipExtractionProgress.isExtracting && (
<Box mb="md" p="sm" style={{ backgroundColor: 'var(--mantine-color-orange-0)', borderRadius: 8 }}>
<Group justify="space-between" mb="xs">
<Text size="sm" fw={500}>Extracting ZIP archive...</Text>
<Text size="sm" c="dimmed">{Math.round(zipExtractionProgress.progress)}%</Text>
</Group>
<Text size="xs" c="dimmed" mb="xs">
{zipExtractionProgress.currentFile || 'Processing files...'}
</Text>
<Text size="xs" c="dimmed" mb="xs">
{zipExtractionProgress.extractedCount} of {zipExtractionProgress.totalFiles} files extracted
</Text>
<div style={{
width: '100%',
height: '4px',
backgroundColor: 'var(--mantine-color-gray-2)',
borderRadius: '2px',
overflow: 'hidden'
}}>
<div style={{
width: `${Math.round(zipExtractionProgress.progress)}%`,
height: '100%',
backgroundColor: 'var(--mantine-color-orange-6)',
transition: 'width 0.3s ease'
}} />
</div>
</Box>
)}
<SkeletonLoader type="fileGrid" count={6} />
</Box>
) : (
<div
style={{

View File

@ -79,8 +79,21 @@ function FileContextInner({
};
// File operations using unified addFiles helper with persistence
const addRawFiles = useCallback(async (files: File[], options?: { insertAfterPageId?: string; selectFiles?: boolean }): Promise<StirlingFile[]> => {
const stirlingFiles = await addFiles({ files, ...options }, stateRef, filesRef, dispatch, lifecycleManager, enablePersistence);
const addRawFiles = useCallback(async (files: File[], options?: { insertAfterPageId?: string; selectFiles?: boolean; skipAutoUnzip?: boolean }): Promise<StirlingFile[]> => {
const stirlingFiles = await addFiles(
{
files,
...options,
// For direct file uploads: ALWAYS unzip (except HTML ZIPs)
// skipAutoUnzip bypasses preference checks - HTML detection still applies
skipAutoUnzip: true
},
stateRef,
filesRef,
dispatch,
lifecycleManager,
enablePersistence
);
// Auto-select the newly added files if requested
if (options?.selectFiles && stirlingFiles.length > 0) {

View File

@ -18,6 +18,7 @@ import { FileLifecycleManager } from './lifecycle';
import { buildQuickKeySet } from './fileSelectors';
import { StirlingFile } from '../../types/fileContext';
import { fileStorage } from '../../services/fileStorage';
import { zipFileService } from '../../services/zipFileService';
const DEBUG = process.env.NODE_ENV === 'development';
/**
@ -172,6 +173,11 @@ interface AddFileOptions {
// Auto-selection after adding
selectFiles?: boolean;
// Auto-unzip control
autoUnzip?: boolean;
autoUnzipFileLimit?: number;
skipAutoUnzip?: boolean; // When true: always unzip (except HTML). Used for file uploads. When false: respect autoUnzip/autoUnzipFileLimit preferences. Used for tool outputs.
}
/**
@ -198,7 +204,58 @@ export async function addFiles(
const { files = [] } = options;
if (DEBUG) console.log(`📄 addFiles(raw): Adding ${files.length} files with immediate thumbnail generation`);
// ZIP pre-processing: Extract ZIP files with configurable behavior
// - File uploads: skipAutoUnzip=true → always extract (except HTML)
// - Tool outputs: skipAutoUnzip=false → respect user preferences
const filesToProcess: File[] = [];
const autoUnzip = options.autoUnzip ?? true; // Default to true
const autoUnzipFileLimit = options.autoUnzipFileLimit ?? 4; // Default limit
const skipAutoUnzip = options.skipAutoUnzip ?? false;
for (const file of files) {
// Check if file is a ZIP
if (zipFileService.isZipFile(file)) {
try {
if (DEBUG) console.log(`📄 addFiles: Detected ZIP file: ${file.name}`);
// Check if ZIP contains HTML files - if so, keep as ZIP
const containsHtml = await zipFileService.containsHtmlFiles(file);
if (containsHtml) {
if (DEBUG) console.log(`📄 addFiles: ZIP contains HTML, keeping as ZIP: ${file.name}`);
filesToProcess.push(file);
continue;
}
// Apply extraction with preferences
const extractedFiles = await zipFileService.extractWithPreferences(file, {
autoUnzip,
autoUnzipFileLimit,
skipAutoUnzip
});
if (extractedFiles.length === 1 && extractedFiles[0] === file) {
// ZIP was not extracted (over limit or autoUnzip disabled)
if (DEBUG) console.log(`📄 addFiles: ZIP not extracted (preferences): ${file.name}`);
} else {
// ZIP was extracted
if (DEBUG) console.log(`📄 addFiles: Extracted ${extractedFiles.length} files from ZIP: ${file.name}`);
}
filesToProcess.push(...extractedFiles);
} catch (error) {
console.error(`📄 addFiles: Failed to process ZIP file ${file.name}:`, error);
// On error, keep the ZIP file as-is
filesToProcess.push(file);
}
} else {
// Not a ZIP file, add as-is
filesToProcess.push(file);
}
}
if (DEBUG) console.log(`📄 addFiles: After ZIP processing, ${filesToProcess.length} files to add`);
for (const file of filesToProcess) {
const quickKey = createQuickKey(file);
// Soft deduplication: Check if file already exists by metadata

View File

@ -1,8 +1,9 @@
import { useCallback } from 'react';
import { useTranslation } from 'react-i18next';
import { useToolOperation, ToolType } from '../shared/useToolOperation';
import { createStandardErrorHandler } from '../../../utils/toolErrorHandler';
import { ExtractImagesParameters, defaultParameters } from './useExtractImagesParameters';
import JSZip from 'jszip';
import { useToolResources } from '../shared/useToolResources';
// Static configuration that can be used by both the hook and automation executor
export const buildExtractImagesFormData = (parameters: ExtractImagesParameters, file: File): FormData => {
@ -13,39 +14,28 @@ export const buildExtractImagesFormData = (parameters: ExtractImagesParameters,
return formData;
};
// Response handler for extract-images which returns a ZIP file
const extractImagesResponseHandler = async (responseData: Blob, _originalFiles: File[]): Promise<File[]> => {
const zip = new JSZip();
const zipContent = await zip.loadAsync(responseData);
const extractedFiles: File[] = [];
for (const [filename, file] of Object.entries(zipContent.files)) {
if (!file.dir) {
const blob = await file.async('blob');
const extractedFile = new File([blob], filename, { type: blob.type });
extractedFiles.push(extractedFile);
}
}
return extractedFiles;
};
// Static configuration object
// Static configuration object (without response handler - will be added in hook)
export const extractImagesOperationConfig = {
toolType: ToolType.singleFile,
buildFormData: buildExtractImagesFormData,
operationType: 'extractImages',
endpoint: '/api/v1/misc/extract-images',
defaultParameters,
// Extract-images returns a ZIP file containing multiple image files
responseHandler: extractImagesResponseHandler,
} as const;
export const useExtractImagesOperation = () => {
const { t } = useTranslation();
const { extractZipFiles } = useToolResources();
// Response handler that respects auto-unzip preferences
const responseHandler = useCallback(async (blob: Blob, _originalFiles: File[]): Promise<File[]> => {
// Extract images returns a ZIP file - use preference-aware extraction
return await extractZipFiles(blob);
}, [extractZipFiles]);
return useToolOperation<ExtractImagesParameters>({
...extractImagesOperationConfig,
responseHandler,
getErrorMessage: createStandardErrorHandler(t('extractImages.error.failed', 'An error occurred while extracting images from the PDF.'))
});
};

View File

@ -27,14 +27,14 @@ export const scannerImageSplitOperationConfig = {
export const useScannerImageSplitOperation = () => {
const { t } = useTranslation();
const { extractAllZipFiles } = useToolResources();
const { extractZipFiles } = useToolResources();
// Custom response handler that extracts ZIP files containing images
// Can't add to exported config because it requires access to the hook so must be part of the hook
const responseHandler = useCallback(async (blob: Blob, originalFiles: File[]): Promise<File[]> => {
try {
// Scanner image split returns ZIP files with multiple images
const extractedFiles = await extractAllZipFiles(blob);
const extractedFiles = await extractZipFiles(blob);
// If extraction succeeded and returned files, use them
if (extractedFiles.length > 0) {
@ -49,7 +49,7 @@ export const useScannerImageSplitOperation = () => {
const baseFileName = inputFileName.replace(/\.[^.]+$/, '');
const singleFile = new File([blob], `${baseFileName}.png`, { type: 'image/png' });
return [singleFile];
}, [extractAllZipFiles]);
}, [extractZipFiles]);
const config: ToolOperationConfig<ScannerImageSplitParameters> = {
...scannerImageSplitOperationConfig,

View File

@ -151,7 +151,7 @@ export const useToolOperation = <TParams>(
const { state, actions } = useToolState();
const { actions: fileActions } = useFileContext();
const { processFiles, cancelOperation: cancelApiCalls } = useToolApiCalls<TParams>();
const { generateThumbnails, createDownloadInfo, cleanupBlobUrls, extractZipFiles, extractAllZipFiles } = useToolResources();
const { generateThumbnails, createDownloadInfo, cleanupBlobUrls, extractZipFiles } = useToolResources();
// Track last operation for undo functionality
const lastOperationRef = useRef<{
@ -259,11 +259,6 @@ export const useToolOperation = <TParams>(
// Default: assume ZIP response for multi-file endpoints
// Note: extractZipFiles will check preferences.autoUnzip setting
processedFiles = await extractZipFiles(response.data);
if (processedFiles.length === 0) {
// Try the generic extraction as fallback
processedFiles = await extractAllZipFiles(response.data);
}
}
// Assume all inputs succeeded together unless server provided an error earlier
successSourceIds = validFiles.map(f => (f as any).fileId) as any;
@ -446,7 +441,7 @@ export const useToolOperation = <TParams>(
actions.setLoading(false);
actions.setProgress(null);
}
}, [t, config, actions, addFiles, consumeFiles, processFiles, generateThumbnails, createDownloadInfo, cleanupBlobUrls, extractZipFiles, extractAllZipFiles]);
}, [t, config, actions, addFiles, consumeFiles, processFiles, generateThumbnails, createDownloadInfo, cleanupBlobUrls, extractZipFiles]);
const cancelOperation = useCallback(() => {
cancelApiCalls();

View File

@ -27,11 +27,11 @@ export const useToolResources = () => {
// Cleanup on unmount - use ref to avoid dependency on blobUrls state
const blobUrlsRef = useRef<string[]>([]);
useEffect(() => {
blobUrlsRef.current = blobUrls;
}, [blobUrls]);
useEffect(() => {
return () => {
blobUrlsRef.current.forEach(url => {
@ -85,50 +85,17 @@ export const useToolResources = () => {
const extractZipFiles = useCallback(async (zipBlob: Blob, skipAutoUnzip = false): Promise<File[]> => {
try {
// Check if we should extract based on preferences
const shouldExtract = await zipFileService.shouldUnzip(
zipBlob,
preferences.autoUnzip,
preferences.autoUnzipFileLimit,
return await zipFileService.extractWithPreferences(zipBlob, {
autoUnzip: preferences.autoUnzip,
autoUnzipFileLimit: preferences.autoUnzipFileLimit,
skipAutoUnzip
);
if (!shouldExtract) {
return [new File([zipBlob], 'result.zip', { type: 'application/zip' })];
}
const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' });
const extractionResult = await zipFileService.extractPdfFiles(zipFile);
return extractionResult.success ? extractionResult.extractedFiles : [];
});
} catch (error) {
console.error('useToolResources.extractZipFiles - Error:', error);
return [];
}
}, [preferences.autoUnzip, preferences.autoUnzipFileLimit]);
const extractAllZipFiles = useCallback(async (zipBlob: Blob, skipAutoUnzip = false): Promise<File[]> => {
try {
// Check if we should extract based on preferences
const shouldExtract = await zipFileService.shouldUnzip(
zipBlob,
preferences.autoUnzip,
preferences.autoUnzipFileLimit,
skipAutoUnzip
);
if (!shouldExtract) {
return [new File([zipBlob], 'result.zip', { type: 'application/zip' })];
}
const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' });
const extractionResult = await zipFileService.extractAllFiles(zipFile);
return extractionResult.success ? extractionResult.extractedFiles : [];
} catch (error) {
console.error('useToolResources.extractAllZipFiles - Error:', error);
return [];
}
}, [preferences.autoUnzip, preferences.autoUnzipFileLimit]);
const createDownloadInfo = useCallback(async (
files: File[],
operationType: string
@ -152,7 +119,6 @@ export const useToolResources = () => {
generateThumbnailsWithMetadata,
createDownloadInfo,
extractZipFiles,
extractAllZipFiles,
cleanupBlobUrls,
};
};

View File

@ -29,6 +29,7 @@ export interface ZipValidationResult {
fileCount: number;
totalSizeBytes: number;
containsPDFs: boolean;
containsFiles: boolean;
errors: string[];
}
@ -42,7 +43,6 @@ export interface ZipExtractionProgress {
export class ZipFileService {
private readonly maxFileSize = 100 * 1024 * 1024; // 100MB per file
private readonly maxTotalSize = 500 * 1024 * 1024; // 500MB total extraction limit
private readonly supportedExtensions = ['.pdf'];
// ZIP file validation constants
private static readonly VALID_ZIP_TYPES = [
@ -62,6 +62,7 @@ export class ZipFileService {
fileCount: 0,
totalSizeBytes: 0,
containsPDFs: false,
containsFiles: false,
errors: []
};
@ -115,10 +116,13 @@ export class ZipFileService {
result.fileCount = fileCount;
result.totalSizeBytes = totalSize;
result.containsPDFs = containsPDFs;
result.isValid = result.errors.length === 0 && containsPDFs;
result.containsFiles = fileCount > 0;
if (!containsPDFs) {
result.errors.push('ZIP file does not contain any PDF files');
// ZIP is valid if it has files and no size errors
result.isValid = result.errors.length === 0 && result.containsFiles;
if (!result.containsFiles) {
result.errors.push('ZIP file does not contain any files');
}
return result;
@ -278,6 +282,37 @@ export class ZipFileService {
return filename.toLowerCase().endsWith('.pdf');
}
/**
* Check if a filename indicates an HTML file
*/
private isHtmlFile(filename: string): boolean {
const lowerName = filename.toLowerCase();
return lowerName.endsWith('.html') || lowerName.endsWith('.htm') || lowerName.endsWith('.xhtml');
}
/**
* Check if a ZIP file contains HTML files
* Used to determine if the ZIP should be kept intact (HTML) or extracted (other files)
*/
async containsHtmlFiles(file: Blob | File): Promise<boolean> {
try {
const zip = new JSZip();
const zipContents = await zip.loadAsync(file);
// Check if any file is an HTML file
for (const [filename, zipEntry] of Object.entries(zipContents.files)) {
if (!zipEntry.dir && this.isHtmlFile(filename)) {
return true;
}
}
return false;
} catch (error) {
console.error('Error checking for HTML files:', error);
return false;
}
}
/**
* Validate that a file is actually a PDF by checking its header
*/
@ -366,6 +401,62 @@ export class ZipFileService {
}
}
/**
* Extract files from ZIP with HTML detection and preference checking
* This is the unified method that handles the common pattern of:
* 1. Check for HTML files keep zipped if present
* 2. Check user preferences respect autoUnzipFileLimit
* 3. Extract files if appropriate
*
* @param zipBlob - The ZIP blob to process
* @param options - Extraction options
* @returns Array of files (either extracted or the ZIP itself)
*/
async extractWithPreferences(
zipBlob: Blob,
options: {
autoUnzip: boolean;
autoUnzipFileLimit: number;
skipAutoUnzip?: boolean;
}
): Promise<File[]> {
try {
// Create File object if not already
const zipFile = zipBlob instanceof File
? zipBlob
: new File([zipBlob], 'result.zip', { type: 'application/zip' });
// Check if ZIP contains HTML files - if so, keep as ZIP
const containsHtml = await this.containsHtmlFiles(zipFile);
if (containsHtml) {
return [zipFile];
}
// Check if we should extract based on preferences
const shouldExtract = await this.shouldUnzip(
zipBlob,
options.autoUnzip,
options.autoUnzipFileLimit,
options.skipAutoUnzip || false
);
if (!shouldExtract) {
return [zipFile];
}
// Extract all files
const extractionResult = await this.extractAllFiles(zipFile);
return extractionResult.success ? extractionResult.extractedFiles : [zipFile];
} catch (error) {
console.error('Error in extractWithPreferences:', error);
// On error, return ZIP as-is
const zipFile = zipBlob instanceof File
? zipBlob
: new File([zipBlob], 'result.zip', { type: 'application/zip' });
return [zipFile];
}
}
/**
* Extract all files from a ZIP archive (not limited to PDFs)
*/
@ -486,9 +577,11 @@ export class ZipFileService {
}
/**
* Extract PDF files from ZIP and store them in IndexedDB with preserved history metadata
* Extract all files from ZIP and store them in IndexedDB with preserved history metadata
* Used by both FileManager and FileEditor to avoid code duplication
*
* Note: HTML files will NOT be extracted - the ZIP is kept intact when HTML is detected
*
* @param zipFile - The ZIP file to extract from
* @param zipStub - The StirlingFileStub for the ZIP (contains metadata to preserve)
* @returns Object with success status, extracted stubs, and any errors
@ -504,8 +597,15 @@ export class ZipFileService {
};
try {
// Extract PDF files from ZIP
const extractionResult = await this.extractPdfFiles(zipFile);
// Check if ZIP contains HTML files - if so, don't extract
const hasHtml = await this.containsHtmlFiles(zipFile);
if (hasHtml) {
result.errors.push('ZIP contains HTML files and will not be auto-extracted. Download the ZIP to access the files.');
return result;
}
// Extract all files from ZIP (not just PDFs)
const extractionResult = await this.extractAllFiles(zipFile);
if (!extractionResult.success || extractionResult.extractedFiles.length === 0) {
result.errors = extractionResult.errors;
@ -515,7 +615,7 @@ export class ZipFileService {
// Process each extracted file
for (const extractedFile of extractionResult.extractedFiles) {
try {
// Generate thumbnail
// Generate thumbnail (works for PDFs and images)
const thumbnail = await generateThumbnailForFile(extractedFile);
// Create StirlingFile

View File

@ -30,6 +30,7 @@ export class AutomationFileProcessor {
/**
* Extract files from a ZIP blob during automation execution, with fallback for non-ZIP files
* Extracts all file types (PDFs, images, etc.) except HTML files which stay zipped
*/
static async extractAutomationZipFiles(blob: Blob): Promise<AutomationProcessingResult> {
try {
@ -40,20 +41,26 @@ export class AutomationFileProcessor {
'application/zip'
);
const result = await zipFileService.extractPdfFiles(zipFile);
if (!result.success || result.extractedFiles.length === 0) {
// Fallback: treat as single PDF file
const fallbackFile = ResourceManager.createTimestampedFile(
blob,
AUTOMATION_CONSTANTS.RESULT_FILE_PREFIX,
'.pdf'
);
// Check if ZIP contains HTML files - if so, keep as ZIP
const containsHtml = await zipFileService.containsHtmlFiles(zipFile);
if (containsHtml) {
// HTML files should stay zipped - return ZIP as-is
return {
success: true,
files: [fallbackFile],
errors: [`ZIP extraction failed, treated as single file: ${result.errors?.join(', ') || 'Unknown error'}`]
files: [zipFile],
errors: []
};
}
// Extract all files (not just PDFs) - handles images from scanner-image-split, etc.
const result = await zipFileService.extractAllFiles(zipFile);
if (!result.success || result.extractedFiles.length === 0) {
// Fallback: keep as ZIP file (might be valid ZIP with extraction issues)
return {
success: true,
files: [zipFile],
errors: [`ZIP extraction failed, kept as ZIP: ${result.errors?.join(', ') || 'Unknown error'}`]
};
}
@ -63,18 +70,19 @@ export class AutomationFileProcessor {
errors: []
};
} catch (error) {
console.warn('Failed to extract automation ZIP files, falling back to single file:', error);
// Fallback: treat as single PDF file
console.warn('Failed to extract automation ZIP files, keeping as ZIP:', error);
// Fallback: keep as ZIP file for next automation step to handle
const fallbackFile = ResourceManager.createTimestampedFile(
blob,
AUTOMATION_CONSTANTS.RESULT_FILE_PREFIX,
'.pdf'
AUTOMATION_CONSTANTS.RESPONSE_ZIP_PREFIX,
'.zip',
'application/zip'
);
return {
success: true,
files: [fallbackFile],
errors: [`ZIP extraction failed, treated as single file: ${error}`]
errors: [`ZIP extraction failed, kept as ZIP: ${error}`]
};
}
}