Fix-convert-V2 (#5147)

Custom processors can now return consume all inputs flag. This allows to
have many inputs to single output consumption

Fixed multi call conversion logic
This commit is contained in:
ConnorYoh
2025-12-03 17:39:49 +00:00
committed by GitHub
parent 5d827df08c
commit f2bffe2dc6
13 changed files with 207 additions and 87 deletions

View File

@@ -1,5 +1,5 @@
import { useTranslation } from 'react-i18next';
import { ToolType, useToolOperation } from '@app/hooks/tools/shared/useToolOperation';
import { ToolType, useToolOperation, CustomProcessorResult } from '@app/hooks/tools/shared/useToolOperation';
import { AdjustContrastParameters, defaultParameters } from '@app/hooks/tools/adjustContrast/useAdjustContrastParameters';
import { PDFDocument as PDFLibDocument } from 'pdf-lib';
import { applyAdjustmentsToCanvas } from '@app/components/tools/adjustContrast/utils';
@@ -46,7 +46,7 @@ async function buildAdjustedPdfForFile(file: File, params: AdjustContrastParamet
return out;
}
async function processPdfClientSide(params: AdjustContrastParameters, files: File[]): Promise<File[]> {
async function processPdfClientSide(params: AdjustContrastParameters, files: File[]): Promise<CustomProcessorResult> {
// Limit concurrency to avoid exhausting memory/CPU while still getting speedups
// Heuristic: use up to 4 workers on capable machines, otherwise 2-3
let CONCURRENCY_LIMIT = 2;
@@ -72,7 +72,12 @@ async function processPdfClientSide(params: AdjustContrastParameters, files: Fil
return results;
};
return mapWithConcurrency(files, CONCURRENCY_LIMIT, (file) => buildAdjustedPdfForFile(file, params));
const processedFiles = await mapWithConcurrency(files, CONCURRENCY_LIMIT, (file) => buildAdjustedPdfForFile(file, params));
return {
files: processedFiles,
consumedAllInputs: false,
};
}
export const adjustContrastOperationConfig = {

View File

@@ -36,7 +36,10 @@ export function useAutomateOperation() {
);
console.log(`✅ Automation completed, returning ${finalResults.length} files`);
return finalResults;
return {
files: finalResults,
consumedAllInputs: false,
};
}, [toolRegistry]);
return useToolOperation<AutomateParameters>({

View File

@@ -3,8 +3,8 @@ import apiClient from '@app/services/apiClient';
import { useTranslation } from 'react-i18next';
import { ConvertParameters, defaultParameters } from '@app/hooks/tools/convert/useConvertParameters';
import { createFileFromApiResponse } from '@app/utils/fileResponseUtils';
import { useToolOperation, ToolType } from '@app/hooks/tools/shared/useToolOperation';
import { getEndpointUrl, isImageFormat, isWebFormat } from '@app/utils/convertUtils';
import { useToolOperation, ToolType, CustomProcessorResult } from '@app/hooks/tools/shared/useToolOperation';
import { getEndpointUrl, isImageFormat, isWebFormat, isOfficeFormat } from '@app/utils/convertUtils';
// Static function that can be used by both the hook and automation executor
export const shouldProcessFilesSeparately = (
@@ -21,6 +21,10 @@ export const shouldProcessFilesSeparately = (
(parameters.fromExtension === 'pdf' && parameters.toExtension === 'pdfa') ||
// PDF to text-like formats should be one output per input
(parameters.fromExtension === 'pdf' && ['txt', 'rtf', 'csv'].includes(parameters.toExtension)) ||
// PDF to office format conversions (each PDF should generate its own office file)
(parameters.fromExtension === 'pdf' && isOfficeFormat(parameters.toExtension)) ||
// Office files to PDF conversions (each file should be processed separately via LibreOffice)
(isOfficeFormat(parameters.fromExtension) && parameters.toExtension === 'pdf') ||
// Web files to PDF conversions (each web file should generate its own PDF)
((isWebFormat(parameters.fromExtension) || parameters.fromExtension === 'web') &&
parameters.toExtension === 'pdf') ||
@@ -98,7 +102,7 @@ export const createFileFromResponse = (
export const convertProcessor = async (
parameters: ConvertParameters,
selectedFiles: File[]
): Promise<File[]> => {
): Promise<CustomProcessorResult> => {
const processedFiles: File[] = [];
const endpoint = getEndpointUrl(parameters.fromExtension, parameters.toExtension);
@@ -107,7 +111,9 @@ export const convertProcessor = async (
}
// Convert-specific routing logic: decide batch vs individual processing
if (shouldProcessFilesSeparately(selectedFiles, parameters)) {
const isSeparateProcessing = shouldProcessFilesSeparately(selectedFiles, parameters);
if (isSeparateProcessing) {
// Individual processing for complex cases (PDF→image, smart detection, etc.)
for (const file of selectedFiles) {
try {
@@ -134,7 +140,14 @@ export const convertProcessor = async (
processedFiles.push(convertedFile);
}
return processedFiles;
// When batch processing multiple files into one output (e.g., 3 images → 1 PDF),
// mark all inputs as consumed even though there's only 1 output file
const isCombiningMultiple = !isSeparateProcessing && selectedFiles.length > 1;
return {
files: processedFiles,
consumedAllInputs: isCombiningMultiple,
};
};
// Static configuration object
@@ -151,7 +164,7 @@ export const useConvertOperation = () => {
const customConvertProcessor = useCallback(async (
parameters: ConvertParameters,
selectedFiles: File[]
): Promise<File[]> => {
): Promise<CustomProcessorResult> => {
return convertProcessor(parameters, selectedFiles);
}, []);

View File

@@ -1,6 +1,6 @@
import apiClient from '@app/services/apiClient';
import { useTranslation } from 'react-i18next';
import { ToolType, useToolOperation } from '@app/hooks/tools/shared/useToolOperation';
import { ToolType, useToolOperation, CustomProcessorResult } from '@app/hooks/tools/shared/useToolOperation';
import { createStandardErrorHandler } from '@app/utils/toolErrorHandler';
import { ExtractPagesParameters, defaultParameters } from '@app/hooks/tools/extractPages/useExtractPagesParameters';
import { pdfWorkerManager } from '@app/services/pdfWorkerManager';
@@ -23,7 +23,7 @@ async function resolveSelectionToCsv(expression: string, file: File): Promise<st
export const extractPagesOperationConfig = {
toolType: ToolType.custom,
operationType: 'extractPages',
customProcessor: async (parameters: ExtractPagesParameters, files: File[]): Promise<File[]> => {
customProcessor: async (parameters: ExtractPagesParameters, files: File[]): Promise<CustomProcessorResult> => {
const outputs: File[] = [];
for (const file of files) {
@@ -43,7 +43,10 @@ export const extractPagesOperationConfig = {
outputs.push(outFile);
}
return outputs;
return {
files: outputs,
consumedAllInputs: false,
};
},
defaultParameters,
} as const;

View File

@@ -1,10 +1,10 @@
import { useTranslation } from 'react-i18next';
import { useToolOperation, ToolType } from '@app/hooks/tools/shared/useToolOperation';
import { useToolOperation, ToolType, CustomProcessorResult } from '@app/hooks/tools/shared/useToolOperation';
import { createStandardErrorHandler } from '@app/utils/toolErrorHandler';
import { RemoveAnnotationsParameters, defaultParameters } from '@app/hooks/tools/removeAnnotations/useRemoveAnnotationsParameters';
import { PDFDocument, PDFName, PDFRef, PDFDict } from 'pdf-lib';
// Client-side PDF processing using PDF-lib
const removeAnnotationsProcessor = async (_parameters: RemoveAnnotationsParameters, files: File[]): Promise<File[]> => {
const removeAnnotationsProcessor = async (_parameters: RemoveAnnotationsParameters, files: File[]): Promise<CustomProcessorResult> => {
const processedFiles: File[] = [];
for (const file of files) {
@@ -75,7 +75,10 @@ const removeAnnotationsProcessor = async (_parameters: RemoveAnnotationsParamete
}
}
return processedFiles;
return {
files: processedFiles,
consumedAllInputs: false,
};
};
// Static configuration object

View File

@@ -4,6 +4,7 @@ import apiClient from '@app/services/apiClient'; // Our configured instance
import { processResponse, ResponseHandler } from '@app/utils/toolResponseProcessor';
import { isEmptyOutput } from '@app/services/errorUtils';
import type { ProcessingProgress } from '@app/hooks/tools/shared/useToolState';
import type { StirlingFile, FileId } from '@app/types/fileContext';
export interface ApiCallsConfig<TParams = void> {
endpoint: string | ((params: TParams) => string);
@@ -18,14 +19,14 @@ export const useToolApiCalls = <TParams = void>() => {
const processFiles = useCallback(async (
params: TParams,
validFiles: File[],
validFiles: StirlingFile[],
config: ApiCallsConfig<TParams>,
onProgress: (progress: ProcessingProgress) => void,
onStatus: (status: string) => void,
markFileError?: (fileId: string) => void,
): Promise<{ outputFiles: File[]; successSourceIds: string[] }> => {
markFileError?: (fileId: FileId) => void,
): Promise<{ outputFiles: File[]; successSourceIds: FileId[] }> => {
const processedFiles: File[] = [];
const successSourceIds: string[] = [];
const successSourceIds: FileId[] = [];
const failedFiles: string[] = [];
const total = validFiles.length;
@@ -35,7 +36,7 @@ export const useToolApiCalls = <TParams = void>() => {
for (let i = 0; i < validFiles.length; i++) {
const file = validFiles[i];
console.debug('[processFiles] Start', { index: i, total, name: file.name, fileId: (file as any).fileId });
console.debug('[processFiles] Start', { index: i, total, name: file.name, fileId: file.fileId });
onProgress({ current: i + 1, total, currentFileName: file.name });
onStatus(`Processing ${file.name} (${i + 1}/${total})`);
@@ -47,7 +48,7 @@ export const useToolApiCalls = <TParams = void>() => {
responseType: 'blob',
cancelToken: cancelTokenRef.current?.token,
});
console.debug('[processFiles] Response OK', { name: file.name, status: (response as any)?.status });
console.debug('[processFiles] Response OK', { name: file.name, status: response.status });
// Forward to shared response processor (uses tool-specific responseHandler if provided)
const responseFiles = await processResponse(
@@ -63,7 +64,7 @@ export const useToolApiCalls = <TParams = void>() => {
console.warn('[processFiles] Empty output treated as failure', { name: file.name });
failedFiles.push(file.name);
try {
(markFileError as any)?.((file as any).fileId);
markFileError?.(file.fileId);
} catch (e) {
console.debug('markFileError', e);
}
@@ -71,7 +72,7 @@ export const useToolApiCalls = <TParams = void>() => {
}
processedFiles.push(...responseFiles);
// record source id as successful
successSourceIds.push((file as any).fileId);
successSourceIds.push(file.fileId);
console.debug('[processFiles] Success', { name: file.name, produced: responseFiles.length });
} catch (error) {
@@ -82,7 +83,7 @@ export const useToolApiCalls = <TParams = void>() => {
failedFiles.push(file.name);
// mark errored file so UI can highlight
try {
(markFileError as any)?.((file as any).fileId);
markFileError?.(file.fileId);
} catch (e) {
console.debug('markFileError', e);
}

View File

@@ -8,6 +8,7 @@ import { useToolResources } from '@app/hooks/tools/shared/useToolResources';
import { extractErrorMessage } from '@app/utils/toolErrorHandler';
import { StirlingFile, extractFiles, FileId, StirlingFileStub, createStirlingFile } from '@app/types/fileContext';
import { FILE_EVENTS } from '@app/services/errorUtils';
import { getFilenameWithoutExtension } from '@app/utils/fileUtils';
import { ResponseHandler } from '@app/utils/toolResponseProcessor';
import { createChildStub, generateProcessedFileMetadata } from '@app/contexts/file/fileActions';
import { ToolOperation } from '@app/types/file';
@@ -23,6 +24,20 @@ export enum ToolType {
custom,
}
/**
* Result from custom processor with optional metadata about input consumption.
*/
export interface CustomProcessorResult {
/** Processed output files */
files: File[];
/**
* When true, marks all input files as successfully consumed regardless of output count.
* Use when operation combines N inputs into fewer outputs (e.g., 3 images → 1 PDF).
* When false/undefined, uses filename-based mapping to determine which inputs succeeded.
*/
consumedAllInputs?: boolean;
}
/**
* Configuration for tool operations defining processing behavior and API integration.
*
@@ -98,8 +113,12 @@ export interface CustomToolOperationConfig<TParams> extends BaseToolOperationCon
* Custom processing logic that completely bypasses standard file processing.
* This tool handles all API calls, response processing, and file creation.
* Use for tools with complex routing logic or non-standard processing requirements.
*
* Returns CustomProcessorResult with:
* - files: Processed output files
* - consumedAllInputs: true if operation combines N inputs → fewer outputs
*/
customProcessor: (params: TParams, files: File[]) => Promise<File[]>;
customProcessor: (params: TParams, files: File[]) => Promise<CustomProcessorResult>;
}
export type ToolOperationConfig<TParams = void> = SingleFileToolOperationConfig<TParams> | MultiFileToolOperationConfig<TParams> | CustomToolOperationConfig<TParams>;
@@ -172,17 +191,17 @@ export const useToolOperation = <TParams>(
}
// Handle zero-byte inputs explicitly: mark as error and continue with others
const zeroByteFiles = selectedFiles.filter(file => (file as any)?.size === 0);
const zeroByteFiles = selectedFiles.filter(file => file.size === 0);
if (zeroByteFiles.length > 0) {
try {
for (const f of zeroByteFiles) {
(fileActions.markFileError as any)((f as any).fileId);
fileActions.markFileError(f.fileId);
}
} catch (e) {
console.log('markFileError', e);
}
}
const validFiles = selectedFiles.filter(file => (file as any)?.size > 0);
const validFiles: StirlingFile[] = selectedFiles.filter(file => file.size > 0);
if (validFiles.length === 0) {
actions.setError(t('noValidFiles', 'No valid files to process'));
return;
@@ -215,7 +234,7 @@ export const useToolOperation = <TParams>(
try {
let processedFiles: File[];
let successSourceIds: string[] = [];
let successSourceIds: FileId[] = [];
// Use original files directly (no PDF metadata injection - history stored in IndexedDB)
const filesForAPI = extractFiles(validFiles);
@@ -233,14 +252,14 @@ export const useToolOperation = <TParams>(
console.debug('[useToolOperation] Multi-file start', { count: filesForAPI.length });
const result = await processFiles(
params,
filesForAPI,
validFiles,
apiCallsConfig,
actions.setProgress,
actions.setStatus,
fileActions.markFileError as any
fileActions.markFileError
);
processedFiles = result.outputFiles;
successSourceIds = result.successSourceIds as any;
successSourceIds = result.successSourceIds;
console.debug('[useToolOperation] Multi-file results', { outputFiles: processedFiles.length, successSources: result.successSourceIds.length });
break;
}
@@ -268,30 +287,40 @@ export const useToolOperation = <TParams>(
processedFiles = await extractZipFiles(response.data);
}
// Assume all inputs succeeded together unless server provided an error earlier
successSourceIds = validFiles.map(f => (f as any).fileId) as any;
successSourceIds = validFiles.map(f => f.fileId);
break;
}
case ToolType.custom: {
actions.setStatus('Processing files...');
processedFiles = await config.customProcessor(params, filesForAPI);
// Try to map outputs back to inputs by filename (before extension)
const inputBaseNames = new Map<string, string>();
for (const f of validFiles) {
const base = (f.name || '').replace(/\.[^.]+$/, '').toLowerCase();
inputBaseNames.set(base, (f as any).fileId);
}
const mappedSuccess: string[] = [];
for (const out of processedFiles) {
const base = (out.name || '').replace(/\.[^.]+$/, '').toLowerCase();
const id = inputBaseNames.get(base);
if (id) mappedSuccess.push(id);
}
// Fallback to naive alignment if names don't match
if (mappedSuccess.length === 0) {
successSourceIds = validFiles.slice(0, processedFiles.length).map(f => (f as any).fileId) as any;
const result = await config.customProcessor(params, filesForAPI);
processedFiles = result.files;
const consumedAllInputs = result.consumedAllInputs || false;
// If consumedAllInputs flag is set, mark all inputs as successful
// (used for operations that combine N inputs into fewer outputs)
if (consumedAllInputs) {
successSourceIds = validFiles.map(f => f.fileId);
} else {
successSourceIds = mappedSuccess as any;
// Try to map outputs back to inputs by filename (before extension)
const inputBaseNames = new Map<string, FileId>();
for (const f of validFiles) {
const base = getFilenameWithoutExtension(f.name || '');
inputBaseNames.set(base, f.fileId);
}
const mappedSuccess: FileId[] = [];
for (const out of processedFiles) {
const base = getFilenameWithoutExtension(out.name || '');
const id = inputBaseNames.get(base);
if (id) mappedSuccess.push(id);
}
// Fallback to naive alignment if names don't match
if (mappedSuccess.length === 0) {
successSourceIds = validFiles.slice(0, processedFiles.length).map(f => f.fileId);
} else {
successSourceIds = mappedSuccess;
}
}
break;
}
@@ -299,16 +328,16 @@ export const useToolOperation = <TParams>(
// Normalize error flags across tool types: mark failures, clear successes
try {
const allInputIds = validFiles.map(f => (f as any).fileId) as unknown as string[];
const okSet = new Set((successSourceIds as unknown as string[]) || []);
const allInputIds = validFiles.map(f => f.fileId);
const okSet = new Set(successSourceIds);
// Clear errors on successes
for (const okId of okSet) {
try { (fileActions.clearFileError as any)(okId); } catch (_e) { void _e; }
try { fileActions.clearFileError(okId); } catch (_e) { void _e; }
}
// Mark errors on inputs that didn't succeed
for (const id of allInputIds) {
if (!okSet.has(id)) {
try { (fileActions.markFileError as any)(id); } catch (_e) { void _e; }
try { fileActions.markFileError(id); } catch (_e) { void _e; }
}
}
} catch (_e) { void _e; }
@@ -316,12 +345,12 @@ export const useToolOperation = <TParams>(
if (externalErrorFileIds.length > 0) {
// If backend told us which sources failed, prefer that mapping
successSourceIds = validFiles
.map(f => (f as any).fileId)
.filter(id => !externalErrorFileIds.includes(id)) as any;
.map(f => f.fileId)
.filter(id => !externalErrorFileIds.includes(id));
// Also mark failed IDs immediately
try {
for (const badId of externalErrorFileIds) {
(fileActions.markFileError as any)(badId);
fileActions.markFileError(badId as FileId);
}
} catch (_e) { void _e; }
}
@@ -370,7 +399,7 @@ export const useToolOperation = <TParams>(
);
// Always create child stubs linking back to the successful source inputs
const successInputStubs = successSourceIds
.map((id) => selectors.getStirlingFileStub(id as any))
.map((id) => selectors.getStirlingFileStub(id))
.filter(Boolean) as StirlingFileStub[];
if (successInputStubs.length !== processedFiles.length) {
@@ -396,7 +425,7 @@ export const useToolOperation = <TParams>(
return createStirlingFile(file, childStub.id);
});
// Build consumption arrays aligned to the successful source IDs
const toConsumeInputIds = successSourceIds.filter((id: string) => inputFileIds.includes(id as any)) as unknown as FileId[];
const toConsumeInputIds = successSourceIds.filter((id) => inputFileIds.includes(id));
// Outputs and stubs are already ordered by success sequence
console.debug('[useToolOperation] Consuming files', { inputCount: inputFileIds.length, toConsume: toConsumeInputIds.length });
const outputFileIds = await consumeFiles(toConsumeInputIds, outputStirlingFiles, outputStirlingFileStubs);
@@ -413,25 +442,27 @@ export const useToolOperation = <TParams>(
} catch (error: any) {
// Centralized 422 handler: mark provided IDs in errorFileIds
try {
const status = (error?.response?.status as number | undefined);
if (status === 422) {
const status = error?.response?.status;
if (typeof status === 'number' && status === 422) {
const payload = error?.response?.data;
let parsed: any = payload;
let parsed: unknown = payload;
if (typeof payload === 'string') {
try { parsed = JSON.parse(payload); } catch { parsed = payload; }
} else if (payload && typeof (payload as any).text === 'function') {
} else if (payload && typeof (payload as Blob).text === 'function') {
// Blob or Response-like object from axios when responseType='blob'
const text = await (payload as Blob).text();
try { parsed = JSON.parse(text); } catch { parsed = text; }
}
let ids: string[] | undefined = Array.isArray(parsed?.errorFileIds) ? parsed.errorFileIds : undefined;
let ids: string[] | undefined = Array.isArray((parsed as { errorFileIds?: unknown })?.errorFileIds)
? (parsed as { errorFileIds: string[] }).errorFileIds
: undefined;
if (!ids && typeof parsed === 'string') {
const match = parsed.match(/[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}/g);
if (match && match.length > 0) ids = Array.from(new Set(match));
}
if (ids && ids.length > 0) {
for (const badId of ids) {
try { (fileActions.markFileError as any)(badId); } catch (_e) { void _e; }
try { fileActions.markFileError(badId as FileId); } catch (_e) { void _e; }
}
actions.setStatus('Process failed due to invalid/corrupted file(s)');
// Avoid duplicating toast messaging here

View File

@@ -158,8 +158,8 @@ export const executeToolOperationWithPrefix = async (
try {
// Check if tool uses custom processor (like Convert tool)
if (config.customProcessor) {
const resultFiles = await config.customProcessor(parameters, files);
return resultFiles;
const result = await config.customProcessor(parameters, files);
return result.files;
}
// Execute based on tool type

View File

@@ -60,6 +60,18 @@ export const isWebFormat = (extension: string): boolean => {
return ['html', 'zip'].includes(extension.toLowerCase());
};
/**
* Checks if the given extension is an office format (Word, Excel, PowerPoint, OpenOffice)
* These formats use LibreOffice for conversion and require individual file processing
*/
export const isOfficeFormat = (extension: string): boolean => {
return [
'docx', 'doc', 'odt', // Word processors
'xlsx', 'xls', 'ods', // Spreadsheets
'pptx', 'ppt', 'odp' // Presentations
].includes(extension.toLowerCase());
};
/**
* Gets available target extensions for a given source extension
* Extracted from useConvertParameters to be reusable in automation settings

View File

@@ -52,6 +52,29 @@ export function detectFileExtension(filename: string): string {
return extension;
}
/**
* Removes the file extension from a filename
* @param filename - The filename to process
* @param options - Options for processing
* @param options.preserveCase - If true, preserves original case. If false (default), converts to lowercase
* @returns Filename without extension
* @example
* getFilenameWithoutExtension('document.pdf') // 'document'
* getFilenameWithoutExtension('my.file.name.txt') // 'my.file.name'
* getFilenameWithoutExtension('REPORT.PDF', { preserveCase: true }) // 'REPORT'
*/
export function getFilenameWithoutExtension(
filename: string,
options: { preserveCase?: boolean } = {}
): string {
if (!filename || typeof filename !== 'string') return '';
const { preserveCase = false } = options;
const withoutExtension = filename.replace(/\.[^.]+$/, '');
return preserveCase ? withoutExtension : withoutExtension.toLowerCase();
}
/**
* Checks if a file is a PDF based on extension and MIME type
* @param file - File or file-like object with name and type properties