Stirling-PDF/frontend/src/utils/automationFileProcessor.ts
ConnorYoh 43887c8179
Fix/V2/unzip_images (#4647)
Method Usage by Context

| Context | Method Used | Respects Preferences | HTML Detection |

|------------------------------|-------------------------------------------------------|------------------------|----------------|
| Tools (via useToolResources) | extractZipFiles() →
extractWithPreferences() |  Yes |  Yes |
| Automation | extractAutomationZipFiles() → extractAllFiles() |  No
(always extracts) |  Yes |
| Manual Unzip | extractAndStoreFilesWithHistory() → extractAllFiles() |
 No (always extracts) |  Yes |
| Auto-Upload | extractAllFiles() directly |  No (always extracts) | 
Yes |

  Detailed Behavior Matrix

| Context | HTML Files | Auto-Unzip OFF | Within Limit | Exceeds Limit |
Notes |

|--------------------------|-------------|----------------|--------------|---------------|----------------------------------------|
| Tools (useToolResources) | Keep zipped | Keep zipped | Extract all |
Keep zipped | Respects user preferences |
| Automation | Keep zipped | Extract all | Extract all | Extract all |
Ignores preferences (automation needs) |
| Manual Unzip | Keep zipped | Extract all | Extract all | Extract all |
User explicitly unzipping |
| Auto-Upload | Keep zipped | Extract all | Extract all | Extract all |
User dropped files |

  Simplified Decision Flow

  ZIP File Received
      │
      ├─ Contains HTML? → Keep as ZIP (all contexts)
      │
      └─ No HTML
          │
          ├─ Tools Context
          │   ├─ Auto-unzip OFF? → Keep as ZIP
          │   └─ Auto-unzip ON
          │       ├─ File count ≤ limit? → Extract all
          │       └─ File count > limit? → Keep as ZIP
          │
          └─ Automation/Manual/Auto-Upload
              └─ Extract all (ignore preferences)

  Key Changes from Previous Version
  
| Entry Point | Code Path | skipAutoUnzip | Respects Preferences? | HTML
Detection? | Extraction Behavior |

|-----------------------------------------------|----------------------------------------------------------------------------------------|---------------|-----------------------|---------------------------|-------------------------------------------------------------------------|
| Direct File Upload (FileEditor, LandingPage) |
FileContext.addRawFiles() → fileActions.addFiles() | True |  No |  Yes
| Always extract (except HTML ZIPs) |
| Tool Outputs (Split, Merge, etc.) | useToolResources.extractZipFiles()
→ zipFileService.extractWithPreferences() | false |  Yes |  Yes |
Conditional: Only if autoUnzip=true AND file count ≤ autoUnzipFileLimit
|
| Load from Storage (FileManager) | fileActions.addStirlingFileStubs() |
N/A | N/A | N/A | No extraction - files already processed |
| Automation Outputs |
AutomationFileProcessor.extractAutomationZipFiles() →
zipFileService.extractAllFiles() | N/A |  No |  Yes | Always extract
(except HTML ZIPs) |
| Manual Unzip Action (FileEditor context menu) |
zipFileService.extractAndStoreFilesWithHistory() → extractAllFiles() |
N/A |  No |  Yes (blocks extraction) | Always extract (except HTML
ZIPs) - explicit user action |

---------

Co-authored-by: Connor Yoh <connor@stirlingpdf.com>
2025-10-15 14:17:44 +00:00

195 lines
5.6 KiB
TypeScript

/**
* File processing utilities specifically for automation workflows
*/
import axios from 'axios';
import { zipFileService } from '../services/zipFileService';
import { ResourceManager } from './resourceManager';
import { AUTOMATION_CONSTANTS } from '../constants/automation';
export interface AutomationProcessingOptions {
timeout?: number;
responseType?: 'blob' | 'json';
}
export interface AutomationProcessingResult {
success: boolean;
files: File[];
errors: string[];
}
export class AutomationFileProcessor {
/**
* Check if a blob is a ZIP file by examining its header
*/
static isZipFile(blob: Blob): boolean {
// This is a simple check - in a real implementation you might want to read the first few bytes
// For now, we'll rely on the extraction attempt and fallback
return blob.type === 'application/zip' || blob.type === 'application/x-zip-compressed';
}
/**
* Extract files from a ZIP blob during automation execution, with fallback for non-ZIP files
* Extracts all file types (PDFs, images, etc.) except HTML files which stay zipped
*/
static async extractAutomationZipFiles(blob: Blob): Promise<AutomationProcessingResult> {
try {
const zipFile = ResourceManager.createTimestampedFile(
blob,
AUTOMATION_CONSTANTS.RESPONSE_ZIP_PREFIX,
'.zip',
'application/zip'
);
// Check if ZIP contains HTML files - if so, keep as ZIP
const containsHtml = await zipFileService.containsHtmlFiles(zipFile);
if (containsHtml) {
// HTML files should stay zipped - return ZIP as-is
return {
success: true,
files: [zipFile],
errors: []
};
}
// Extract all files (not just PDFs) - handles images from scanner-image-split, etc.
const result = await zipFileService.extractAllFiles(zipFile);
if (!result.success || result.extractedFiles.length === 0) {
// Fallback: keep as ZIP file (might be valid ZIP with extraction issues)
return {
success: true,
files: [zipFile],
errors: [`ZIP extraction failed, kept as ZIP: ${result.errors?.join(', ') || 'Unknown error'}`]
};
}
return {
success: true,
files: result.extractedFiles,
errors: []
};
} catch (error) {
console.warn('Failed to extract automation ZIP files, keeping as ZIP:', error);
// Fallback: keep as ZIP file for next automation step to handle
const fallbackFile = ResourceManager.createTimestampedFile(
blob,
AUTOMATION_CONSTANTS.RESPONSE_ZIP_PREFIX,
'.zip',
'application/zip'
);
return {
success: true,
files: [fallbackFile],
errors: [`ZIP extraction failed, kept as ZIP: ${error}`]
};
}
}
/**
* Process a single file through an automation step
*/
static async processAutomationSingleFile(
endpoint: string,
formData: FormData,
originalFileName: string,
options: AutomationProcessingOptions = {}
): Promise<AutomationProcessingResult> {
try {
const response = await axios.post(endpoint, formData, {
responseType: options.responseType || 'blob',
timeout: options.timeout || AUTOMATION_CONSTANTS.OPERATION_TIMEOUT
});
if (response.status !== 200) {
return {
success: false,
files: [],
errors: [`Automation step failed - HTTP ${response.status}: ${response.statusText}`]
};
}
const resultFile = ResourceManager.createResultFile(
response.data,
originalFileName,
AUTOMATION_CONSTANTS.FILE_PREFIX
);
return {
success: true,
files: [resultFile],
errors: []
};
} catch (error: any) {
return {
success: false,
files: [],
errors: [`Automation step failed: ${error.response?.data || error.message}`]
};
}
}
/**
* Process multiple files through an automation step
*/
static async processAutomationMultipleFiles(
endpoint: string,
formData: FormData,
options: AutomationProcessingOptions = {}
): Promise<AutomationProcessingResult> {
try {
const response = await axios.post(endpoint, formData, {
responseType: options.responseType || 'blob',
timeout: options.timeout || AUTOMATION_CONSTANTS.OPERATION_TIMEOUT
});
if (response.status !== 200) {
return {
success: false,
files: [],
errors: [`Automation step failed - HTTP ${response.status}: ${response.statusText}`]
};
}
// Multi-file responses are typically ZIP files
return await this.extractAutomationZipFiles(response.data);
} catch (error: any) {
return {
success: false,
files: [],
errors: [`Automation step failed: ${error.response?.data || error.message}`]
};
}
}
/**
* Build form data for automation tool operations
*/
static buildAutomationFormData(
parameters: Record<string, any>,
files: File | File[],
fileFieldName: string = 'fileInput'
): FormData {
const formData = new FormData();
// Add files
if (Array.isArray(files)) {
files.forEach(file => formData.append(fileFieldName, file));
} else {
formData.append(fileFieldName, files);
}
// Add parameters
Object.entries(parameters).forEach(([key, value]) => {
if (Array.isArray(value)) {
value.forEach(item => formData.append(key, item));
} else if (value !== undefined && value !== null) {
formData.append(key, value);
}
});
return formData;
}
}