mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-08-16 13:47:28 +02:00
Clan up zip usUpdated OCR
This commit is contained in:
parent
4a38ff2731
commit
1770d7fa3b
@ -4,42 +4,7 @@ import { useTranslation } from 'react-i18next';
|
|||||||
import { OCRParameters } from '../../../components/tools/ocr/OCRSettings';
|
import { OCRParameters } from '../../../components/tools/ocr/OCRSettings';
|
||||||
import { useToolOperation, ToolOperationConfig } from '../shared/useToolOperation';
|
import { useToolOperation, ToolOperationConfig } from '../shared/useToolOperation';
|
||||||
import { createStandardErrorHandler } from '../../../utils/toolErrorHandler';
|
import { createStandardErrorHandler } from '../../../utils/toolErrorHandler';
|
||||||
|
import { useToolResources } from '../shared/useToolResources';
|
||||||
//Extract files from a ZIP blob
|
|
||||||
async function extractZipFile(zipBlob: Blob): Promise<File[]> {
|
|
||||||
const JSZip = await import('jszip');
|
|
||||||
const zip = new JSZip.default();
|
|
||||||
|
|
||||||
const arrayBuffer = await zipBlob.arrayBuffer();
|
|
||||||
const zipContent = await zip.loadAsync(arrayBuffer);
|
|
||||||
|
|
||||||
const extractedFiles: File[] = [];
|
|
||||||
|
|
||||||
for (const [filename, file] of Object.entries(zipContent.files)) {
|
|
||||||
if (!file.dir) {
|
|
||||||
const content = await file.async('blob');
|
|
||||||
const extractedFile = new File([content], filename, { type: getMimeType(filename) });
|
|
||||||
extractedFiles.push(extractedFile);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return extractedFiles;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Get MIME type based on file extension
|
|
||||||
function getMimeType(filename: string): string {
|
|
||||||
const ext = filename.toLowerCase().split('.').pop();
|
|
||||||
switch (ext) {
|
|
||||||
case 'pdf':
|
|
||||||
return 'application/pdf';
|
|
||||||
case 'txt':
|
|
||||||
return 'text/plain';
|
|
||||||
case 'zip':
|
|
||||||
return 'application/zip';
|
|
||||||
default:
|
|
||||||
return 'application/octet-stream';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const buildFormData = (parameters: OCRParameters, file: File): FormData => {
|
const buildFormData = (parameters: OCRParameters, file: File): FormData => {
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
@ -68,15 +33,12 @@ const buildFormData = (parameters: OCRParameters, file: File): FormData => {
|
|||||||
|
|
||||||
export const useOCROperation = () => {
|
export const useOCROperation = () => {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
|
const { extractZipFiles } = useToolResources();
|
||||||
|
|
||||||
const customOCRProcessor = useCallback(async (
|
const customOCRProcessor = useCallback(async (
|
||||||
parameters: OCRParameters,
|
parameters: OCRParameters,
|
||||||
selectedFiles: File[]
|
selectedFiles: File[]
|
||||||
): Promise<File[]> => {
|
): Promise<File[]> => {
|
||||||
if (parameters.languages.length === 0) {
|
|
||||||
throw new Error(t('ocr.validation.languageRequired', 'Please select at least one language for OCR processing.'));
|
|
||||||
}
|
|
||||||
|
|
||||||
const processedFiles: File[] = [];
|
const processedFiles: File[] = [];
|
||||||
const failedFiles: string[] = [];
|
const failedFiles: string[] = [];
|
||||||
|
|
||||||
@ -85,7 +47,7 @@ export const useOCROperation = () => {
|
|||||||
const file = selectedFiles[i];
|
const file = selectedFiles[i];
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const formData = buildFormData(file, parameters);
|
const formData = buildFormData(parameters, file);
|
||||||
const response = await axios.post('/api/v1/misc/ocr-pdf', formData, {
|
const response = await axios.post('/api/v1/misc/ocr-pdf', formData, {
|
||||||
responseType: "blob"
|
responseType: "blob"
|
||||||
});
|
});
|
||||||
@ -111,16 +73,22 @@ export const useOCROperation = () => {
|
|||||||
// Check if it's a ZIP file (OCR service returns ZIP when sidecar is enabled or for multi-file results)
|
// Check if it's a ZIP file (OCR service returns ZIP when sidecar is enabled or for multi-file results)
|
||||||
if (header.startsWith('PK')) {
|
if (header.startsWith('PK')) {
|
||||||
try {
|
try {
|
||||||
// Extract ZIP file contents
|
// Extract ZIP file contents using tool resources
|
||||||
const zipFiles = await extractZipFile(response.data);
|
const zipBlob = new Blob([arrayBuffer]);
|
||||||
|
const extractedFiles = await extractZipFiles(zipBlob);
|
||||||
|
|
||||||
// Add extracted files to processed files
|
if (extractedFiles.length > 0) {
|
||||||
processedFiles.push(...zipFiles);
|
// Add extracted files to processed files
|
||||||
|
processedFiles.push(...extractedFiles);
|
||||||
|
} else {
|
||||||
|
// Fallback to treating as single ZIP file if extraction failed
|
||||||
|
const zipFile = new File([arrayBuffer], `ocr_${file.name}.zip`, { type: 'application/zip' });
|
||||||
|
processedFiles.push(zipFile);
|
||||||
|
}
|
||||||
} catch (extractError) {
|
} catch (extractError) {
|
||||||
// Fallback to treating as single ZIP file
|
// Fallback to treating as single ZIP file
|
||||||
const blob = new Blob([response.data], { type: 'application/zip' });
|
const zipFile = new File([arrayBuffer], `ocr_${file.name}.zip`, { type: 'application/zip' });
|
||||||
const processedFile = new File([blob], `ocr_${file.name}.zip`, { type: 'application/zip' });
|
processedFiles.push(zipFile);
|
||||||
processedFiles.push(processedFile);
|
|
||||||
}
|
}
|
||||||
continue; // Skip the PDF validation for ZIP files
|
continue; // Skip the PDF validation for ZIP files
|
||||||
}
|
}
|
||||||
@ -150,7 +118,7 @@ export const useOCROperation = () => {
|
|||||||
throw new Error(`Response is not a valid PDF file. Header: "${header}"`);
|
throw new Error(`Response is not a valid PDF file. Header: "${header}"`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const blob = new Blob([response.data], { type: contentType });
|
const blob = new Blob([arrayBuffer], { type: contentType });
|
||||||
const processedFile = new File([blob], `ocr_${file.name}`, { type: contentType });
|
const processedFile = new File([blob], `ocr_${file.name}`, { type: contentType });
|
||||||
|
|
||||||
processedFiles.push(processedFile);
|
processedFiles.push(processedFile);
|
||||||
|
@ -171,10 +171,8 @@ export const useToolOperation = <TParams = void>(
|
|||||||
|
|
||||||
// Handle response based on responseHandler
|
// Handle response based on responseHandler
|
||||||
if (config.responseHandler?.type === 'zip' && config.responseHandler?.useZipExtractor) {
|
if (config.responseHandler?.type === 'zip' && config.responseHandler?.useZipExtractor) {
|
||||||
const zipFile = new File([response.data], 'results.zip', { type: 'application/zip' });
|
// Use tool resources for ZIP extraction
|
||||||
const { zipFileService } = await import('../../../services/zipFileService');
|
processedFiles = await toolResources.extractZipFiles(response.data);
|
||||||
const extractionResult = await zipFileService.extractPdfFiles(zipFile);
|
|
||||||
processedFiles = extractionResult.success ? extractionResult.extractedFiles : [];
|
|
||||||
} else {
|
} else {
|
||||||
// Single file response
|
// Single file response
|
||||||
const filename = validFiles.length === 1
|
const filename = validFiles.length === 1
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import { useState, useCallback, useEffect } from 'react';
|
import { useState, useCallback, useEffect } from 'react';
|
||||||
import { generateThumbnailForFile } from '../../../utils/thumbnailUtils';
|
import { generateThumbnailForFile } from '../../../utils/thumbnailUtils';
|
||||||
|
import { zipFileService } from '../../../services/zipFileService';
|
||||||
|
|
||||||
export const useToolResources = () => {
|
export const useToolResources = () => {
|
||||||
const [blobUrls, setBlobUrls] = useState<string[]>([]);
|
const [blobUrls, setBlobUrls] = useState<string[]>([]);
|
||||||
@ -48,6 +49,12 @@ export const useToolResources = () => {
|
|||||||
return thumbnails;
|
return thumbnails;
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
const extractZipFiles = useCallback(async (zipBlob: Blob): Promise<File[]> => {
|
||||||
|
const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' });
|
||||||
|
const extractionResult = await zipFileService.extractPdfFiles(zipFile);
|
||||||
|
return extractionResult.success ? extractionResult.extractedFiles : [];
|
||||||
|
}, []);
|
||||||
|
|
||||||
const createDownloadInfo = useCallback(async (
|
const createDownloadInfo = useCallback(async (
|
||||||
files: File[],
|
files: File[],
|
||||||
operationType: string
|
operationType: string
|
||||||
@ -58,24 +65,18 @@ export const useToolResources = () => {
|
|||||||
return { url, filename: files[0].name };
|
return { url, filename: files[0].name };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Multiple files - create zip
|
// Multiple files - create zip using shared service
|
||||||
const JSZip = (await import('jszip')).default;
|
const { zipFile } = await zipFileService.createZipFromFiles(files, `${operationType}_results.zip`);
|
||||||
const zip = new JSZip();
|
const url = URL.createObjectURL(zipFile);
|
||||||
|
|
||||||
files.forEach(file => {
|
|
||||||
zip.file(file.name, file);
|
|
||||||
});
|
|
||||||
|
|
||||||
const zipBlob = await zip.generateAsync({ type: 'blob' });
|
|
||||||
const url = URL.createObjectURL(zipBlob);
|
|
||||||
addBlobUrl(url);
|
addBlobUrl(url);
|
||||||
|
|
||||||
return { url, filename: `${operationType}_results.zip` };
|
return { url, filename: zipFile.name };
|
||||||
}, [addBlobUrl]);
|
}, [addBlobUrl]);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
generateThumbnails,
|
generateThumbnails,
|
||||||
createDownloadInfo,
|
createDownloadInfo,
|
||||||
|
extractZipFiles,
|
||||||
cleanupBlobUrls,
|
cleanupBlobUrls,
|
||||||
};
|
};
|
||||||
};
|
};
|
@ -1,4 +1,4 @@
|
|||||||
import { zipFileService } from '../services/zipFileService';
|
// Note: This utility should be used with useToolResources for ZIP operations
|
||||||
|
|
||||||
export interface ResponseHandler {
|
export interface ResponseHandler {
|
||||||
type: 'single' | 'zip' | 'custom';
|
type: 'single' | 'zip' | 'custom';
|
||||||
@ -12,6 +12,7 @@ const defaultResponseHandler: ResponseHandler = {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Processes API response blob based on handler configuration
|
* Processes API response blob based on handler configuration
|
||||||
|
* Note: For ZIP extraction, use useToolResources.extractZipFiles instead
|
||||||
*/
|
*/
|
||||||
export const processResponse = async (
|
export const processResponse = async (
|
||||||
blob: Blob,
|
blob: Blob,
|
||||||
@ -24,9 +25,8 @@ export const processResponse = async (
|
|||||||
switch (handler.type) {
|
switch (handler.type) {
|
||||||
case 'zip':
|
case 'zip':
|
||||||
if (handler.useZipExtractor) {
|
if (handler.useZipExtractor) {
|
||||||
const zipFile = new File([blob], 'result.zip', { type: 'application/zip' });
|
// This path should be avoided - use useToolResources.extractZipFiles instead
|
||||||
const extractionResult = await zipFileService.extractPdfFiles(zipFile);
|
throw new Error('ZIP extraction should use useToolResources.extractZipFiles');
|
||||||
return extractionResult.success ? extractionResult.extractedFiles : [];
|
|
||||||
}
|
}
|
||||||
// Fall through to custom if no zip extractor
|
// Fall through to custom if no zip extractor
|
||||||
case 'custom':
|
case 'custom':
|
||||||
|
Loading…
Reference in New Issue
Block a user