mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-08-11 13:48:37 +02:00
Clan up zip usUpdated OCR
This commit is contained in:
parent
4a38ff2731
commit
1770d7fa3b
@ -4,42 +4,7 @@ import { useTranslation } from 'react-i18next';
|
||||
import { OCRParameters } from '../../../components/tools/ocr/OCRSettings';
|
||||
import { useToolOperation, ToolOperationConfig } from '../shared/useToolOperation';
|
||||
import { createStandardErrorHandler } from '../../../utils/toolErrorHandler';
|
||||
|
||||
//Extract files from a ZIP blob
|
||||
async function extractZipFile(zipBlob: Blob): Promise<File[]> {
|
||||
const JSZip = await import('jszip');
|
||||
const zip = new JSZip.default();
|
||||
|
||||
const arrayBuffer = await zipBlob.arrayBuffer();
|
||||
const zipContent = await zip.loadAsync(arrayBuffer);
|
||||
|
||||
const extractedFiles: File[] = [];
|
||||
|
||||
for (const [filename, file] of Object.entries(zipContent.files)) {
|
||||
if (!file.dir) {
|
||||
const content = await file.async('blob');
|
||||
const extractedFile = new File([content], filename, { type: getMimeType(filename) });
|
||||
extractedFiles.push(extractedFile);
|
||||
}
|
||||
}
|
||||
|
||||
return extractedFiles;
|
||||
}
|
||||
|
||||
//Get MIME type based on file extension
|
||||
function getMimeType(filename: string): string {
|
||||
const ext = filename.toLowerCase().split('.').pop();
|
||||
switch (ext) {
|
||||
case 'pdf':
|
||||
return 'application/pdf';
|
||||
case 'txt':
|
||||
return 'text/plain';
|
||||
case 'zip':
|
||||
return 'application/zip';
|
||||
default:
|
||||
return 'application/octet-stream';
|
||||
}
|
||||
}
|
||||
import { useToolResources } from '../shared/useToolResources';
|
||||
|
||||
const buildFormData = (parameters: OCRParameters, file: File): FormData => {
|
||||
const formData = new FormData();
|
||||
@ -68,15 +33,12 @@ const buildFormData = (parameters: OCRParameters, file: File): FormData => {
|
||||
|
||||
export const useOCROperation = () => {
|
||||
const { t } = useTranslation();
|
||||
const { extractZipFiles } = useToolResources();
|
||||
|
||||
const customOCRProcessor = useCallback(async (
|
||||
parameters: OCRParameters,
|
||||
selectedFiles: File[]
|
||||
): Promise<File[]> => {
|
||||
if (parameters.languages.length === 0) {
|
||||
throw new Error(t('ocr.validation.languageRequired', 'Please select at least one language for OCR processing.'));
|
||||
}
|
||||
|
||||
const processedFiles: File[] = [];
|
||||
const failedFiles: string[] = [];
|
||||
|
||||
@ -85,7 +47,7 @@ export const useOCROperation = () => {
|
||||
const file = selectedFiles[i];
|
||||
|
||||
try {
|
||||
const formData = buildFormData(file, parameters);
|
||||
const formData = buildFormData(parameters, file);
|
||||
const response = await axios.post('/api/v1/misc/ocr-pdf', formData, {
|
||||
responseType: "blob"
|
||||
});
|
||||
@ -111,16 +73,22 @@ export const useOCROperation = () => {
|
||||
// Check if it's a ZIP file (OCR service returns ZIP when sidecar is enabled or for multi-file results)
|
||||
if (header.startsWith('PK')) {
|
||||
try {
|
||||
// Extract ZIP file contents
|
||||
const zipFiles = await extractZipFile(response.data);
|
||||
// Extract ZIP file contents using tool resources
|
||||
const zipBlob = new Blob([arrayBuffer]);
|
||||
const extractedFiles = await extractZipFiles(zipBlob);
|
||||
|
||||
// Add extracted files to processed files
|
||||
processedFiles.push(...zipFiles);
|
||||
if (extractedFiles.length > 0) {
|
||||
// Add extracted files to processed files
|
||||
processedFiles.push(...extractedFiles);
|
||||
} else {
|
||||
// Fallback to treating as single ZIP file if extraction failed
|
||||
const zipFile = new File([arrayBuffer], `ocr_${file.name}.zip`, { type: 'application/zip' });
|
||||
processedFiles.push(zipFile);
|
||||
}
|
||||
} catch (extractError) {
|
||||
// Fallback to treating as single ZIP file
|
||||
const blob = new Blob([response.data], { type: 'application/zip' });
|
||||
const processedFile = new File([blob], `ocr_${file.name}.zip`, { type: 'application/zip' });
|
||||
processedFiles.push(processedFile);
|
||||
const zipFile = new File([arrayBuffer], `ocr_${file.name}.zip`, { type: 'application/zip' });
|
||||
processedFiles.push(zipFile);
|
||||
}
|
||||
continue; // Skip the PDF validation for ZIP files
|
||||
}
|
||||
@ -150,7 +118,7 @@ export const useOCROperation = () => {
|
||||
throw new Error(`Response is not a valid PDF file. Header: "${header}"`);
|
||||
}
|
||||
|
||||
const blob = new Blob([response.data], { type: contentType });
|
||||
const blob = new Blob([arrayBuffer], { type: contentType });
|
||||
const processedFile = new File([blob], `ocr_${file.name}`, { type: contentType });
|
||||
|
||||
processedFiles.push(processedFile);
|
||||
|
@ -171,10 +171,8 @@ export const useToolOperation = <TParams = void>(
|
||||
|
||||
// Handle response based on responseHandler
|
||||
if (config.responseHandler?.type === 'zip' && config.responseHandler?.useZipExtractor) {
|
||||
const zipFile = new File([response.data], 'results.zip', { type: 'application/zip' });
|
||||
const { zipFileService } = await import('../../../services/zipFileService');
|
||||
const extractionResult = await zipFileService.extractPdfFiles(zipFile);
|
||||
processedFiles = extractionResult.success ? extractionResult.extractedFiles : [];
|
||||
// Use tool resources for ZIP extraction
|
||||
processedFiles = await toolResources.extractZipFiles(response.data);
|
||||
} else {
|
||||
// Single file response
|
||||
const filename = validFiles.length === 1
|
||||
|
@ -1,5 +1,6 @@
|
||||
import { useState, useCallback, useEffect } from 'react';
|
||||
import { generateThumbnailForFile } from '../../../utils/thumbnailUtils';
|
||||
import { zipFileService } from '../../../services/zipFileService';
|
||||
|
||||
export const useToolResources = () => {
|
||||
const [blobUrls, setBlobUrls] = useState<string[]>([]);
|
||||
@ -48,6 +49,12 @@ export const useToolResources = () => {
|
||||
return thumbnails;
|
||||
}, []);
|
||||
|
||||
const extractZipFiles = useCallback(async (zipBlob: Blob): Promise<File[]> => {
|
||||
const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' });
|
||||
const extractionResult = await zipFileService.extractPdfFiles(zipFile);
|
||||
return extractionResult.success ? extractionResult.extractedFiles : [];
|
||||
}, []);
|
||||
|
||||
const createDownloadInfo = useCallback(async (
|
||||
files: File[],
|
||||
operationType: string
|
||||
@ -58,24 +65,18 @@ export const useToolResources = () => {
|
||||
return { url, filename: files[0].name };
|
||||
}
|
||||
|
||||
// Multiple files - create zip
|
||||
const JSZip = (await import('jszip')).default;
|
||||
const zip = new JSZip();
|
||||
|
||||
files.forEach(file => {
|
||||
zip.file(file.name, file);
|
||||
});
|
||||
|
||||
const zipBlob = await zip.generateAsync({ type: 'blob' });
|
||||
const url = URL.createObjectURL(zipBlob);
|
||||
// Multiple files - create zip using shared service
|
||||
const { zipFile } = await zipFileService.createZipFromFiles(files, `${operationType}_results.zip`);
|
||||
const url = URL.createObjectURL(zipFile);
|
||||
addBlobUrl(url);
|
||||
|
||||
return { url, filename: `${operationType}_results.zip` };
|
||||
return { url, filename: zipFile.name };
|
||||
}, [addBlobUrl]);
|
||||
|
||||
return {
|
||||
generateThumbnails,
|
||||
createDownloadInfo,
|
||||
extractZipFiles,
|
||||
cleanupBlobUrls,
|
||||
};
|
||||
};
|
@ -1,4 +1,4 @@
|
||||
import { zipFileService } from '../services/zipFileService';
|
||||
// Note: This utility should be used with useToolResources for ZIP operations
|
||||
|
||||
export interface ResponseHandler {
|
||||
type: 'single' | 'zip' | 'custom';
|
||||
@ -12,6 +12,7 @@ const defaultResponseHandler: ResponseHandler = {
|
||||
|
||||
/**
|
||||
* Processes API response blob based on handler configuration
|
||||
* Note: For ZIP extraction, use useToolResources.extractZipFiles instead
|
||||
*/
|
||||
export const processResponse = async (
|
||||
blob: Blob,
|
||||
@ -24,9 +25,8 @@ export const processResponse = async (
|
||||
switch (handler.type) {
|
||||
case 'zip':
|
||||
if (handler.useZipExtractor) {
|
||||
const zipFile = new File([blob], 'result.zip', { type: 'application/zip' });
|
||||
const extractionResult = await zipFileService.extractPdfFiles(zipFile);
|
||||
return extractionResult.success ? extractionResult.extractedFiles : [];
|
||||
// This path should be avoided - use useToolResources.extractZipFiles instead
|
||||
throw new Error('ZIP extraction should use useToolResources.extractZipFiles');
|
||||
}
|
||||
// Fall through to custom if no zip extractor
|
||||
case 'custom':
|
||||
|
Loading…
Reference in New Issue
Block a user