mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
Fix/V2/unzip_images (#4647)
Method Usage by Context | Context | Method Used | Respects Preferences | HTML Detection | |------------------------------|-------------------------------------------------------|------------------------|----------------| | Tools (via useToolResources) | extractZipFiles() → extractWithPreferences() | ✅ Yes | ✅ Yes | | Automation | extractAutomationZipFiles() → extractAllFiles() | ❌ No (always extracts) | ✅ Yes | | Manual Unzip | extractAndStoreFilesWithHistory() → extractAllFiles() | ❌ No (always extracts) | ✅ Yes | | Auto-Upload | extractAllFiles() directly | ❌ No (always extracts) | ✅ Yes | Detailed Behavior Matrix | Context | HTML Files | Auto-Unzip OFF | Within Limit | Exceeds Limit | Notes | |--------------------------|-------------|----------------|--------------|---------------|----------------------------------------| | Tools (useToolResources) | Keep zipped | Keep zipped | Extract all | Keep zipped | Respects user preferences | | Automation | Keep zipped | Extract all | Extract all | Extract all | Ignores preferences (automation needs) | | Manual Unzip | Keep zipped | Extract all | Extract all | Extract all | User explicitly unzipping | | Auto-Upload | Keep zipped | Extract all | Extract all | Extract all | User dropped files | Simplified Decision Flow ZIP File Received │ ├─ Contains HTML? → Keep as ZIP (all contexts) │ └─ No HTML │ ├─ Tools Context │ ├─ Auto-unzip OFF? → Keep as ZIP │ └─ Auto-unzip ON │ ├─ File count ≤ limit? → Extract all │ └─ File count > limit? → Keep as ZIP │ └─ Automation/Manual/Auto-Upload └─ Extract all (ignore preferences) Key Changes from Previous Version | Entry Point | Code Path | skipAutoUnzip | Respects Preferences? | HTML Detection? | Extraction Behavior | |-----------------------------------------------|----------------------------------------------------------------------------------------|---------------|-----------------------|---------------------------|-------------------------------------------------------------------------| | Direct File Upload (FileEditor, LandingPage) | FileContext.addRawFiles() → fileActions.addFiles() | True | ❌ No | ✅ Yes | Always extract (except HTML ZIPs) | | Tool Outputs (Split, Merge, etc.) | useToolResources.extractZipFiles() → zipFileService.extractWithPreferences() | false | ✅ Yes | ✅ Yes | Conditional: Only if autoUnzip=true AND file count ≤ autoUnzipFileLimit | | Load from Storage (FileManager) | fileActions.addStirlingFileStubs() | N/A | N/A | N/A | No extraction - files already processed | | Automation Outputs | AutomationFileProcessor.extractAutomationZipFiles() → zipFileService.extractAllFiles() | N/A | ❌ No | ✅ Yes | Always extract (except HTML ZIPs) | | Manual Unzip Action (FileEditor context menu) | zipFileService.extractAndStoreFilesWithHistory() → extractAllFiles() | N/A | ❌ No | ✅ Yes (blocks extraction) | Always extract (except HTML ZIPs) - explicit user action | --------- Co-authored-by: Connor Yoh <connor@stirlingpdf.com>
This commit is contained in:
parent
bcd7762594
commit
43887c8179
@ -1,6 +1,6 @@
|
||||
import React, { useState, useCallback, useRef, useMemo, useEffect } from 'react';
|
||||
import {
|
||||
Text, Center, Box, LoadingOverlay, Stack, Group
|
||||
Text, Center, Box, LoadingOverlay, Stack
|
||||
} from '@mantine/core';
|
||||
import { Dropzone } from '@mantine/dropzone';
|
||||
import { useFileSelection, useFileState, useFileManagement, useFileActions } from '../../contexts/FileContext';
|
||||
@ -10,7 +10,6 @@ import { detectFileExtension } from '../../utils/fileUtils';
|
||||
import FileEditorThumbnail from './FileEditorThumbnail';
|
||||
import AddFileCard from './AddFileCard';
|
||||
import FilePickerModal from '../shared/FilePickerModal';
|
||||
import SkeletonLoader from '../shared/SkeletonLoader';
|
||||
import { FileId, StirlingFile } from '../../types/fileContext';
|
||||
import { alert } from '../toast';
|
||||
import { downloadBlob } from '../../utils/downloadUtils';
|
||||
@ -68,19 +67,6 @@ const FileEditor = ({
|
||||
}
|
||||
}, [toolMode]);
|
||||
const [showFilePickerModal, setShowFilePickerModal] = useState(false);
|
||||
const [zipExtractionProgress, setZipExtractionProgress] = useState<{
|
||||
isExtracting: boolean;
|
||||
currentFile: string;
|
||||
progress: number;
|
||||
extractedCount: number;
|
||||
totalFiles: number;
|
||||
}>({
|
||||
isExtracting: false,
|
||||
currentFile: '',
|
||||
progress: 0,
|
||||
extractedCount: 0,
|
||||
totalFiles: 0
|
||||
});
|
||||
// Get selected file IDs from context (defensive programming)
|
||||
const contextSelectedIds = Array.isArray(selectedFileIds) ? selectedFileIds : [];
|
||||
|
||||
@ -92,106 +78,26 @@ const FileEditor = ({
|
||||
const localSelectedIds = contextSelectedIds;
|
||||
|
||||
// Process uploaded files using context
|
||||
// ZIP extraction is now handled automatically in FileContext based on user preferences
|
||||
const handleFileUpload = useCallback(async (uploadedFiles: File[]) => {
|
||||
_setError(null);
|
||||
|
||||
try {
|
||||
const allExtractedFiles: File[] = [];
|
||||
const errors: string[] = [];
|
||||
|
||||
for (const file of uploadedFiles) {
|
||||
if (file.type === 'application/pdf') {
|
||||
// Handle PDF files normally
|
||||
allExtractedFiles.push(file);
|
||||
} else if (file.type === 'application/zip' || file.type === 'application/x-zip-compressed' || file.name.toLowerCase().endsWith('.zip')) {
|
||||
// Handle ZIP files - only expand if they contain PDFs
|
||||
try {
|
||||
// Validate ZIP file first
|
||||
const validation = await zipFileService.validateZipFile(file);
|
||||
|
||||
if (validation.isValid && validation.containsPDFs) {
|
||||
// ZIP contains PDFs - extract them
|
||||
setZipExtractionProgress({
|
||||
isExtracting: true,
|
||||
currentFile: file.name,
|
||||
progress: 0,
|
||||
extractedCount: 0,
|
||||
totalFiles: validation.fileCount
|
||||
});
|
||||
|
||||
const extractionResult = await zipFileService.extractPdfFiles(file, (progress) => {
|
||||
setZipExtractionProgress({
|
||||
isExtracting: true,
|
||||
currentFile: progress.currentFile,
|
||||
progress: progress.progress,
|
||||
extractedCount: progress.extractedCount,
|
||||
totalFiles: progress.totalFiles
|
||||
});
|
||||
});
|
||||
|
||||
// Reset extraction progress
|
||||
setZipExtractionProgress({
|
||||
isExtracting: false,
|
||||
currentFile: '',
|
||||
progress: 0,
|
||||
extractedCount: 0,
|
||||
totalFiles: 0
|
||||
});
|
||||
|
||||
if (extractionResult.success) {
|
||||
allExtractedFiles.push(...extractionResult.extractedFiles);
|
||||
|
||||
if (extractionResult.errors.length > 0) {
|
||||
errors.push(...extractionResult.errors);
|
||||
}
|
||||
} else {
|
||||
errors.push(`Failed to extract ZIP file "${file.name}": ${extractionResult.errors.join(', ')}`);
|
||||
}
|
||||
} else {
|
||||
// ZIP doesn't contain PDFs or is invalid - treat as regular file
|
||||
allExtractedFiles.push(file);
|
||||
}
|
||||
} catch (zipError) {
|
||||
errors.push(`Failed to process ZIP file "${file.name}": ${zipError instanceof Error ? zipError.message : 'Unknown error'}`);
|
||||
setZipExtractionProgress({
|
||||
isExtracting: false,
|
||||
currentFile: '',
|
||||
progress: 0,
|
||||
extractedCount: 0,
|
||||
totalFiles: 0
|
||||
});
|
||||
}
|
||||
} else {
|
||||
allExtractedFiles.push(file);
|
||||
}
|
||||
}
|
||||
|
||||
// Show any errors
|
||||
if (errors.length > 0) {
|
||||
showError(errors.join('\n'));
|
||||
}
|
||||
|
||||
// Process all extracted files
|
||||
if (allExtractedFiles.length > 0) {
|
||||
// Add files to context and select them automatically
|
||||
await addFiles(allExtractedFiles, { selectFiles: true });
|
||||
showStatus(`Added ${allExtractedFiles.length} files`, 'success');
|
||||
if (uploadedFiles.length > 0) {
|
||||
// FileContext will automatically handle ZIP extraction based on user preferences
|
||||
// - Respects autoUnzip setting
|
||||
// - Respects autoUnzipFileLimit
|
||||
// - HTML ZIPs stay intact
|
||||
// - Non-ZIP files pass through unchanged
|
||||
await addFiles(uploadedFiles, { selectFiles: true });
|
||||
showStatus(`Added ${uploadedFiles.length} file(s)`, 'success');
|
||||
}
|
||||
} catch (err) {
|
||||
const errorMessage = err instanceof Error ? err.message : 'Failed to process files';
|
||||
showError(errorMessage);
|
||||
console.error('File processing error:', err);
|
||||
|
||||
// Reset extraction progress on error
|
||||
setZipExtractionProgress({
|
||||
isExtracting: false,
|
||||
currentFile: '',
|
||||
progress: 0,
|
||||
extractedCount: 0,
|
||||
totalFiles: 0
|
||||
});
|
||||
}
|
||||
}, [addFiles]);
|
||||
}, [addFiles, showStatus, showError]);
|
||||
|
||||
const toggleFile = useCallback((fileId: FileId) => {
|
||||
const currentSelectedIds = contextSelectedIdsRef.current;
|
||||
@ -394,7 +300,7 @@ const FileEditor = ({
|
||||
<Box p="md">
|
||||
|
||||
|
||||
{activeStirlingFileStubs.length === 0 && !zipExtractionProgress.isExtracting ? (
|
||||
{activeStirlingFileStubs.length === 0 ? (
|
||||
<Center h="60vh">
|
||||
<Stack align="center" gap="md">
|
||||
<Text size="lg" c="dimmed">📁</Text>
|
||||
@ -402,43 +308,6 @@ const FileEditor = ({
|
||||
<Text size="sm" c="dimmed">Upload PDF files, ZIP archives, or load from storage to get started</Text>
|
||||
</Stack>
|
||||
</Center>
|
||||
) : activeStirlingFileStubs.length === 0 && zipExtractionProgress.isExtracting ? (
|
||||
<Box>
|
||||
<SkeletonLoader type="controls" />
|
||||
|
||||
{/* ZIP Extraction Progress */}
|
||||
{zipExtractionProgress.isExtracting && (
|
||||
<Box mb="md" p="sm" style={{ backgroundColor: 'var(--mantine-color-orange-0)', borderRadius: 8 }}>
|
||||
<Group justify="space-between" mb="xs">
|
||||
<Text size="sm" fw={500}>Extracting ZIP archive...</Text>
|
||||
<Text size="sm" c="dimmed">{Math.round(zipExtractionProgress.progress)}%</Text>
|
||||
</Group>
|
||||
<Text size="xs" c="dimmed" mb="xs">
|
||||
{zipExtractionProgress.currentFile || 'Processing files...'}
|
||||
</Text>
|
||||
<Text size="xs" c="dimmed" mb="xs">
|
||||
{zipExtractionProgress.extractedCount} of {zipExtractionProgress.totalFiles} files extracted
|
||||
</Text>
|
||||
<div style={{
|
||||
width: '100%',
|
||||
height: '4px',
|
||||
backgroundColor: 'var(--mantine-color-gray-2)',
|
||||
borderRadius: '2px',
|
||||
overflow: 'hidden'
|
||||
}}>
|
||||
<div style={{
|
||||
width: `${Math.round(zipExtractionProgress.progress)}%`,
|
||||
height: '100%',
|
||||
backgroundColor: 'var(--mantine-color-orange-6)',
|
||||
transition: 'width 0.3s ease'
|
||||
}} />
|
||||
</div>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
|
||||
<SkeletonLoader type="fileGrid" count={6} />
|
||||
</Box>
|
||||
) : (
|
||||
<div
|
||||
style={{
|
||||
|
||||
@ -79,8 +79,21 @@ function FileContextInner({
|
||||
};
|
||||
|
||||
// File operations using unified addFiles helper with persistence
|
||||
const addRawFiles = useCallback(async (files: File[], options?: { insertAfterPageId?: string; selectFiles?: boolean }): Promise<StirlingFile[]> => {
|
||||
const stirlingFiles = await addFiles({ files, ...options }, stateRef, filesRef, dispatch, lifecycleManager, enablePersistence);
|
||||
const addRawFiles = useCallback(async (files: File[], options?: { insertAfterPageId?: string; selectFiles?: boolean; skipAutoUnzip?: boolean }): Promise<StirlingFile[]> => {
|
||||
const stirlingFiles = await addFiles(
|
||||
{
|
||||
files,
|
||||
...options,
|
||||
// For direct file uploads: ALWAYS unzip (except HTML ZIPs)
|
||||
// skipAutoUnzip bypasses preference checks - HTML detection still applies
|
||||
skipAutoUnzip: true
|
||||
},
|
||||
stateRef,
|
||||
filesRef,
|
||||
dispatch,
|
||||
lifecycleManager,
|
||||
enablePersistence
|
||||
);
|
||||
|
||||
// Auto-select the newly added files if requested
|
||||
if (options?.selectFiles && stirlingFiles.length > 0) {
|
||||
|
||||
@ -18,6 +18,7 @@ import { FileLifecycleManager } from './lifecycle';
|
||||
import { buildQuickKeySet } from './fileSelectors';
|
||||
import { StirlingFile } from '../../types/fileContext';
|
||||
import { fileStorage } from '../../services/fileStorage';
|
||||
import { zipFileService } from '../../services/zipFileService';
|
||||
const DEBUG = process.env.NODE_ENV === 'development';
|
||||
|
||||
/**
|
||||
@ -172,6 +173,11 @@ interface AddFileOptions {
|
||||
|
||||
// Auto-selection after adding
|
||||
selectFiles?: boolean;
|
||||
|
||||
// Auto-unzip control
|
||||
autoUnzip?: boolean;
|
||||
autoUnzipFileLimit?: number;
|
||||
skipAutoUnzip?: boolean; // When true: always unzip (except HTML). Used for file uploads. When false: respect autoUnzip/autoUnzipFileLimit preferences. Used for tool outputs.
|
||||
}
|
||||
|
||||
/**
|
||||
@ -198,7 +204,58 @@ export async function addFiles(
|
||||
const { files = [] } = options;
|
||||
if (DEBUG) console.log(`📄 addFiles(raw): Adding ${files.length} files with immediate thumbnail generation`);
|
||||
|
||||
// ZIP pre-processing: Extract ZIP files with configurable behavior
|
||||
// - File uploads: skipAutoUnzip=true → always extract (except HTML)
|
||||
// - Tool outputs: skipAutoUnzip=false → respect user preferences
|
||||
const filesToProcess: File[] = [];
|
||||
const autoUnzip = options.autoUnzip ?? true; // Default to true
|
||||
const autoUnzipFileLimit = options.autoUnzipFileLimit ?? 4; // Default limit
|
||||
const skipAutoUnzip = options.skipAutoUnzip ?? false;
|
||||
|
||||
for (const file of files) {
|
||||
// Check if file is a ZIP
|
||||
if (zipFileService.isZipFile(file)) {
|
||||
try {
|
||||
if (DEBUG) console.log(`📄 addFiles: Detected ZIP file: ${file.name}`);
|
||||
|
||||
// Check if ZIP contains HTML files - if so, keep as ZIP
|
||||
const containsHtml = await zipFileService.containsHtmlFiles(file);
|
||||
if (containsHtml) {
|
||||
if (DEBUG) console.log(`📄 addFiles: ZIP contains HTML, keeping as ZIP: ${file.name}`);
|
||||
filesToProcess.push(file);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Apply extraction with preferences
|
||||
const extractedFiles = await zipFileService.extractWithPreferences(file, {
|
||||
autoUnzip,
|
||||
autoUnzipFileLimit,
|
||||
skipAutoUnzip
|
||||
});
|
||||
|
||||
if (extractedFiles.length === 1 && extractedFiles[0] === file) {
|
||||
// ZIP was not extracted (over limit or autoUnzip disabled)
|
||||
if (DEBUG) console.log(`📄 addFiles: ZIP not extracted (preferences): ${file.name}`);
|
||||
} else {
|
||||
// ZIP was extracted
|
||||
if (DEBUG) console.log(`📄 addFiles: Extracted ${extractedFiles.length} files from ZIP: ${file.name}`);
|
||||
}
|
||||
|
||||
filesToProcess.push(...extractedFiles);
|
||||
} catch (error) {
|
||||
console.error(`📄 addFiles: Failed to process ZIP file ${file.name}:`, error);
|
||||
// On error, keep the ZIP file as-is
|
||||
filesToProcess.push(file);
|
||||
}
|
||||
} else {
|
||||
// Not a ZIP file, add as-is
|
||||
filesToProcess.push(file);
|
||||
}
|
||||
}
|
||||
|
||||
if (DEBUG) console.log(`📄 addFiles: After ZIP processing, ${filesToProcess.length} files to add`);
|
||||
|
||||
for (const file of filesToProcess) {
|
||||
const quickKey = createQuickKey(file);
|
||||
|
||||
// Soft deduplication: Check if file already exists by metadata
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
import { useCallback } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useToolOperation, ToolType } from '../shared/useToolOperation';
|
||||
import { createStandardErrorHandler } from '../../../utils/toolErrorHandler';
|
||||
import { ExtractImagesParameters, defaultParameters } from './useExtractImagesParameters';
|
||||
import JSZip from 'jszip';
|
||||
import { useToolResources } from '../shared/useToolResources';
|
||||
|
||||
// Static configuration that can be used by both the hook and automation executor
|
||||
export const buildExtractImagesFormData = (parameters: ExtractImagesParameters, file: File): FormData => {
|
||||
@ -13,39 +14,28 @@ export const buildExtractImagesFormData = (parameters: ExtractImagesParameters,
|
||||
return formData;
|
||||
};
|
||||
|
||||
// Response handler for extract-images which returns a ZIP file
|
||||
const extractImagesResponseHandler = async (responseData: Blob, _originalFiles: File[]): Promise<File[]> => {
|
||||
const zip = new JSZip();
|
||||
const zipContent = await zip.loadAsync(responseData);
|
||||
const extractedFiles: File[] = [];
|
||||
|
||||
for (const [filename, file] of Object.entries(zipContent.files)) {
|
||||
if (!file.dir) {
|
||||
const blob = await file.async('blob');
|
||||
const extractedFile = new File([blob], filename, { type: blob.type });
|
||||
extractedFiles.push(extractedFile);
|
||||
}
|
||||
}
|
||||
|
||||
return extractedFiles;
|
||||
};
|
||||
|
||||
// Static configuration object
|
||||
// Static configuration object (without response handler - will be added in hook)
|
||||
export const extractImagesOperationConfig = {
|
||||
toolType: ToolType.singleFile,
|
||||
buildFormData: buildExtractImagesFormData,
|
||||
operationType: 'extractImages',
|
||||
endpoint: '/api/v1/misc/extract-images',
|
||||
defaultParameters,
|
||||
// Extract-images returns a ZIP file containing multiple image files
|
||||
responseHandler: extractImagesResponseHandler,
|
||||
} as const;
|
||||
|
||||
export const useExtractImagesOperation = () => {
|
||||
const { t } = useTranslation();
|
||||
const { extractZipFiles } = useToolResources();
|
||||
|
||||
// Response handler that respects auto-unzip preferences
|
||||
const responseHandler = useCallback(async (blob: Blob, _originalFiles: File[]): Promise<File[]> => {
|
||||
// Extract images returns a ZIP file - use preference-aware extraction
|
||||
return await extractZipFiles(blob);
|
||||
}, [extractZipFiles]);
|
||||
|
||||
return useToolOperation<ExtractImagesParameters>({
|
||||
...extractImagesOperationConfig,
|
||||
responseHandler,
|
||||
getErrorMessage: createStandardErrorHandler(t('extractImages.error.failed', 'An error occurred while extracting images from the PDF.'))
|
||||
});
|
||||
};
|
||||
@ -27,14 +27,14 @@ export const scannerImageSplitOperationConfig = {
|
||||
|
||||
export const useScannerImageSplitOperation = () => {
|
||||
const { t } = useTranslation();
|
||||
const { extractAllZipFiles } = useToolResources();
|
||||
const { extractZipFiles } = useToolResources();
|
||||
|
||||
// Custom response handler that extracts ZIP files containing images
|
||||
// Can't add to exported config because it requires access to the hook so must be part of the hook
|
||||
const responseHandler = useCallback(async (blob: Blob, originalFiles: File[]): Promise<File[]> => {
|
||||
try {
|
||||
// Scanner image split returns ZIP files with multiple images
|
||||
const extractedFiles = await extractAllZipFiles(blob);
|
||||
const extractedFiles = await extractZipFiles(blob);
|
||||
|
||||
// If extraction succeeded and returned files, use them
|
||||
if (extractedFiles.length > 0) {
|
||||
@ -49,7 +49,7 @@ export const useScannerImageSplitOperation = () => {
|
||||
const baseFileName = inputFileName.replace(/\.[^.]+$/, '');
|
||||
const singleFile = new File([blob], `${baseFileName}.png`, { type: 'image/png' });
|
||||
return [singleFile];
|
||||
}, [extractAllZipFiles]);
|
||||
}, [extractZipFiles]);
|
||||
|
||||
const config: ToolOperationConfig<ScannerImageSplitParameters> = {
|
||||
...scannerImageSplitOperationConfig,
|
||||
|
||||
@ -151,7 +151,7 @@ export const useToolOperation = <TParams>(
|
||||
const { state, actions } = useToolState();
|
||||
const { actions: fileActions } = useFileContext();
|
||||
const { processFiles, cancelOperation: cancelApiCalls } = useToolApiCalls<TParams>();
|
||||
const { generateThumbnails, createDownloadInfo, cleanupBlobUrls, extractZipFiles, extractAllZipFiles } = useToolResources();
|
||||
const { generateThumbnails, createDownloadInfo, cleanupBlobUrls, extractZipFiles } = useToolResources();
|
||||
|
||||
// Track last operation for undo functionality
|
||||
const lastOperationRef = useRef<{
|
||||
@ -259,11 +259,6 @@ export const useToolOperation = <TParams>(
|
||||
// Default: assume ZIP response for multi-file endpoints
|
||||
// Note: extractZipFiles will check preferences.autoUnzip setting
|
||||
processedFiles = await extractZipFiles(response.data);
|
||||
|
||||
if (processedFiles.length === 0) {
|
||||
// Try the generic extraction as fallback
|
||||
processedFiles = await extractAllZipFiles(response.data);
|
||||
}
|
||||
}
|
||||
// Assume all inputs succeeded together unless server provided an error earlier
|
||||
successSourceIds = validFiles.map(f => (f as any).fileId) as any;
|
||||
@ -446,7 +441,7 @@ export const useToolOperation = <TParams>(
|
||||
actions.setLoading(false);
|
||||
actions.setProgress(null);
|
||||
}
|
||||
}, [t, config, actions, addFiles, consumeFiles, processFiles, generateThumbnails, createDownloadInfo, cleanupBlobUrls, extractZipFiles, extractAllZipFiles]);
|
||||
}, [t, config, actions, addFiles, consumeFiles, processFiles, generateThumbnails, createDownloadInfo, cleanupBlobUrls, extractZipFiles]);
|
||||
|
||||
const cancelOperation = useCallback(() => {
|
||||
cancelApiCalls();
|
||||
|
||||
@ -27,11 +27,11 @@ export const useToolResources = () => {
|
||||
|
||||
// Cleanup on unmount - use ref to avoid dependency on blobUrls state
|
||||
const blobUrlsRef = useRef<string[]>([]);
|
||||
|
||||
|
||||
useEffect(() => {
|
||||
blobUrlsRef.current = blobUrls;
|
||||
}, [blobUrls]);
|
||||
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
blobUrlsRef.current.forEach(url => {
|
||||
@ -85,50 +85,17 @@ export const useToolResources = () => {
|
||||
|
||||
const extractZipFiles = useCallback(async (zipBlob: Blob, skipAutoUnzip = false): Promise<File[]> => {
|
||||
try {
|
||||
// Check if we should extract based on preferences
|
||||
const shouldExtract = await zipFileService.shouldUnzip(
|
||||
zipBlob,
|
||||
preferences.autoUnzip,
|
||||
preferences.autoUnzipFileLimit,
|
||||
return await zipFileService.extractWithPreferences(zipBlob, {
|
||||
autoUnzip: preferences.autoUnzip,
|
||||
autoUnzipFileLimit: preferences.autoUnzipFileLimit,
|
||||
skipAutoUnzip
|
||||
);
|
||||
|
||||
if (!shouldExtract) {
|
||||
return [new File([zipBlob], 'result.zip', { type: 'application/zip' })];
|
||||
}
|
||||
|
||||
const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' });
|
||||
const extractionResult = await zipFileService.extractPdfFiles(zipFile);
|
||||
return extractionResult.success ? extractionResult.extractedFiles : [];
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('useToolResources.extractZipFiles - Error:', error);
|
||||
return [];
|
||||
}
|
||||
}, [preferences.autoUnzip, preferences.autoUnzipFileLimit]);
|
||||
|
||||
const extractAllZipFiles = useCallback(async (zipBlob: Blob, skipAutoUnzip = false): Promise<File[]> => {
|
||||
try {
|
||||
// Check if we should extract based on preferences
|
||||
const shouldExtract = await zipFileService.shouldUnzip(
|
||||
zipBlob,
|
||||
preferences.autoUnzip,
|
||||
preferences.autoUnzipFileLimit,
|
||||
skipAutoUnzip
|
||||
);
|
||||
|
||||
if (!shouldExtract) {
|
||||
return [new File([zipBlob], 'result.zip', { type: 'application/zip' })];
|
||||
}
|
||||
|
||||
const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' });
|
||||
const extractionResult = await zipFileService.extractAllFiles(zipFile);
|
||||
return extractionResult.success ? extractionResult.extractedFiles : [];
|
||||
} catch (error) {
|
||||
console.error('useToolResources.extractAllZipFiles - Error:', error);
|
||||
return [];
|
||||
}
|
||||
}, [preferences.autoUnzip, preferences.autoUnzipFileLimit]);
|
||||
|
||||
const createDownloadInfo = useCallback(async (
|
||||
files: File[],
|
||||
operationType: string
|
||||
@ -152,7 +119,6 @@ export const useToolResources = () => {
|
||||
generateThumbnailsWithMetadata,
|
||||
createDownloadInfo,
|
||||
extractZipFiles,
|
||||
extractAllZipFiles,
|
||||
cleanupBlobUrls,
|
||||
};
|
||||
};
|
||||
|
||||
@ -29,6 +29,7 @@ export interface ZipValidationResult {
|
||||
fileCount: number;
|
||||
totalSizeBytes: number;
|
||||
containsPDFs: boolean;
|
||||
containsFiles: boolean;
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
@ -42,7 +43,6 @@ export interface ZipExtractionProgress {
|
||||
export class ZipFileService {
|
||||
private readonly maxFileSize = 100 * 1024 * 1024; // 100MB per file
|
||||
private readonly maxTotalSize = 500 * 1024 * 1024; // 500MB total extraction limit
|
||||
private readonly supportedExtensions = ['.pdf'];
|
||||
|
||||
// ZIP file validation constants
|
||||
private static readonly VALID_ZIP_TYPES = [
|
||||
@ -62,6 +62,7 @@ export class ZipFileService {
|
||||
fileCount: 0,
|
||||
totalSizeBytes: 0,
|
||||
containsPDFs: false,
|
||||
containsFiles: false,
|
||||
errors: []
|
||||
};
|
||||
|
||||
@ -115,10 +116,13 @@ export class ZipFileService {
|
||||
result.fileCount = fileCount;
|
||||
result.totalSizeBytes = totalSize;
|
||||
result.containsPDFs = containsPDFs;
|
||||
result.isValid = result.errors.length === 0 && containsPDFs;
|
||||
result.containsFiles = fileCount > 0;
|
||||
|
||||
if (!containsPDFs) {
|
||||
result.errors.push('ZIP file does not contain any PDF files');
|
||||
// ZIP is valid if it has files and no size errors
|
||||
result.isValid = result.errors.length === 0 && result.containsFiles;
|
||||
|
||||
if (!result.containsFiles) {
|
||||
result.errors.push('ZIP file does not contain any files');
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -278,6 +282,37 @@ export class ZipFileService {
|
||||
return filename.toLowerCase().endsWith('.pdf');
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a filename indicates an HTML file
|
||||
*/
|
||||
private isHtmlFile(filename: string): boolean {
|
||||
const lowerName = filename.toLowerCase();
|
||||
return lowerName.endsWith('.html') || lowerName.endsWith('.htm') || lowerName.endsWith('.xhtml');
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a ZIP file contains HTML files
|
||||
* Used to determine if the ZIP should be kept intact (HTML) or extracted (other files)
|
||||
*/
|
||||
async containsHtmlFiles(file: Blob | File): Promise<boolean> {
|
||||
try {
|
||||
const zip = new JSZip();
|
||||
const zipContents = await zip.loadAsync(file);
|
||||
|
||||
// Check if any file is an HTML file
|
||||
for (const [filename, zipEntry] of Object.entries(zipContents.files)) {
|
||||
if (!zipEntry.dir && this.isHtmlFile(filename)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error) {
|
||||
console.error('Error checking for HTML files:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that a file is actually a PDF by checking its header
|
||||
*/
|
||||
@ -366,6 +401,62 @@ export class ZipFileService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract files from ZIP with HTML detection and preference checking
|
||||
* This is the unified method that handles the common pattern of:
|
||||
* 1. Check for HTML files → keep zipped if present
|
||||
* 2. Check user preferences → respect autoUnzipFileLimit
|
||||
* 3. Extract files if appropriate
|
||||
*
|
||||
* @param zipBlob - The ZIP blob to process
|
||||
* @param options - Extraction options
|
||||
* @returns Array of files (either extracted or the ZIP itself)
|
||||
*/
|
||||
async extractWithPreferences(
|
||||
zipBlob: Blob,
|
||||
options: {
|
||||
autoUnzip: boolean;
|
||||
autoUnzipFileLimit: number;
|
||||
skipAutoUnzip?: boolean;
|
||||
}
|
||||
): Promise<File[]> {
|
||||
try {
|
||||
// Create File object if not already
|
||||
const zipFile = zipBlob instanceof File
|
||||
? zipBlob
|
||||
: new File([zipBlob], 'result.zip', { type: 'application/zip' });
|
||||
|
||||
// Check if ZIP contains HTML files - if so, keep as ZIP
|
||||
const containsHtml = await this.containsHtmlFiles(zipFile);
|
||||
if (containsHtml) {
|
||||
return [zipFile];
|
||||
}
|
||||
|
||||
// Check if we should extract based on preferences
|
||||
const shouldExtract = await this.shouldUnzip(
|
||||
zipBlob,
|
||||
options.autoUnzip,
|
||||
options.autoUnzipFileLimit,
|
||||
options.skipAutoUnzip || false
|
||||
);
|
||||
|
||||
if (!shouldExtract) {
|
||||
return [zipFile];
|
||||
}
|
||||
|
||||
// Extract all files
|
||||
const extractionResult = await this.extractAllFiles(zipFile);
|
||||
return extractionResult.success ? extractionResult.extractedFiles : [zipFile];
|
||||
} catch (error) {
|
||||
console.error('Error in extractWithPreferences:', error);
|
||||
// On error, return ZIP as-is
|
||||
const zipFile = zipBlob instanceof File
|
||||
? zipBlob
|
||||
: new File([zipBlob], 'result.zip', { type: 'application/zip' });
|
||||
return [zipFile];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all files from a ZIP archive (not limited to PDFs)
|
||||
*/
|
||||
@ -486,9 +577,11 @@ export class ZipFileService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract PDF files from ZIP and store them in IndexedDB with preserved history metadata
|
||||
* Extract all files from ZIP and store them in IndexedDB with preserved history metadata
|
||||
* Used by both FileManager and FileEditor to avoid code duplication
|
||||
*
|
||||
* Note: HTML files will NOT be extracted - the ZIP is kept intact when HTML is detected
|
||||
*
|
||||
* @param zipFile - The ZIP file to extract from
|
||||
* @param zipStub - The StirlingFileStub for the ZIP (contains metadata to preserve)
|
||||
* @returns Object with success status, extracted stubs, and any errors
|
||||
@ -504,8 +597,15 @@ export class ZipFileService {
|
||||
};
|
||||
|
||||
try {
|
||||
// Extract PDF files from ZIP
|
||||
const extractionResult = await this.extractPdfFiles(zipFile);
|
||||
// Check if ZIP contains HTML files - if so, don't extract
|
||||
const hasHtml = await this.containsHtmlFiles(zipFile);
|
||||
if (hasHtml) {
|
||||
result.errors.push('ZIP contains HTML files and will not be auto-extracted. Download the ZIP to access the files.');
|
||||
return result;
|
||||
}
|
||||
|
||||
// Extract all files from ZIP (not just PDFs)
|
||||
const extractionResult = await this.extractAllFiles(zipFile);
|
||||
|
||||
if (!extractionResult.success || extractionResult.extractedFiles.length === 0) {
|
||||
result.errors = extractionResult.errors;
|
||||
@ -515,7 +615,7 @@ export class ZipFileService {
|
||||
// Process each extracted file
|
||||
for (const extractedFile of extractionResult.extractedFiles) {
|
||||
try {
|
||||
// Generate thumbnail
|
||||
// Generate thumbnail (works for PDFs and images)
|
||||
const thumbnail = await generateThumbnailForFile(extractedFile);
|
||||
|
||||
// Create StirlingFile
|
||||
|
||||
@ -30,6 +30,7 @@ export class AutomationFileProcessor {
|
||||
|
||||
/**
|
||||
* Extract files from a ZIP blob during automation execution, with fallback for non-ZIP files
|
||||
* Extracts all file types (PDFs, images, etc.) except HTML files which stay zipped
|
||||
*/
|
||||
static async extractAutomationZipFiles(blob: Blob): Promise<AutomationProcessingResult> {
|
||||
try {
|
||||
@ -40,20 +41,26 @@ export class AutomationFileProcessor {
|
||||
'application/zip'
|
||||
);
|
||||
|
||||
const result = await zipFileService.extractPdfFiles(zipFile);
|
||||
|
||||
if (!result.success || result.extractedFiles.length === 0) {
|
||||
// Fallback: treat as single PDF file
|
||||
const fallbackFile = ResourceManager.createTimestampedFile(
|
||||
blob,
|
||||
AUTOMATION_CONSTANTS.RESULT_FILE_PREFIX,
|
||||
'.pdf'
|
||||
);
|
||||
|
||||
// Check if ZIP contains HTML files - if so, keep as ZIP
|
||||
const containsHtml = await zipFileService.containsHtmlFiles(zipFile);
|
||||
if (containsHtml) {
|
||||
// HTML files should stay zipped - return ZIP as-is
|
||||
return {
|
||||
success: true,
|
||||
files: [fallbackFile],
|
||||
errors: [`ZIP extraction failed, treated as single file: ${result.errors?.join(', ') || 'Unknown error'}`]
|
||||
files: [zipFile],
|
||||
errors: []
|
||||
};
|
||||
}
|
||||
|
||||
// Extract all files (not just PDFs) - handles images from scanner-image-split, etc.
|
||||
const result = await zipFileService.extractAllFiles(zipFile);
|
||||
|
||||
if (!result.success || result.extractedFiles.length === 0) {
|
||||
// Fallback: keep as ZIP file (might be valid ZIP with extraction issues)
|
||||
return {
|
||||
success: true,
|
||||
files: [zipFile],
|
||||
errors: [`ZIP extraction failed, kept as ZIP: ${result.errors?.join(', ') || 'Unknown error'}`]
|
||||
};
|
||||
}
|
||||
|
||||
@ -63,18 +70,19 @@ export class AutomationFileProcessor {
|
||||
errors: []
|
||||
};
|
||||
} catch (error) {
|
||||
console.warn('Failed to extract automation ZIP files, falling back to single file:', error);
|
||||
// Fallback: treat as single PDF file
|
||||
console.warn('Failed to extract automation ZIP files, keeping as ZIP:', error);
|
||||
// Fallback: keep as ZIP file for next automation step to handle
|
||||
const fallbackFile = ResourceManager.createTimestampedFile(
|
||||
blob,
|
||||
AUTOMATION_CONSTANTS.RESULT_FILE_PREFIX,
|
||||
'.pdf'
|
||||
AUTOMATION_CONSTANTS.RESPONSE_ZIP_PREFIX,
|
||||
'.zip',
|
||||
'application/zip'
|
||||
);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
files: [fallbackFile],
|
||||
errors: [`ZIP extraction failed, treated as single file: ${error}`]
|
||||
errors: [`ZIP extraction failed, kept as ZIP: ${error}`]
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user