Add Sanitize UI (#4123)

# Description of Changes

Implementation of Sanitize UI for V2.

Also removes parameter validation from standard tool hooks because the
logic would have to be duplicated between parameter handling and
operation hooks, and the nicer workflow is for the tools to reject using
the Go button if the validation fails, rather than the operation hook
checking it, since that can't appear in the UI.

Co-authored-by: James <james@crosscourtanalytics.com>
Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
Co-authored-by: ConnorYoh <40631091+ConnorYoh@users.noreply.github.com>
This commit is contained in:
James Brunton
2025-08-12 16:05:59 +01:00
committed by GitHub
parent adf6feea27
commit 8eeb4c148c
17 changed files with 688 additions and 56 deletions

View File

@@ -38,12 +38,6 @@ export const useCompressOperation = () => {
buildFormData,
filePrefix: 'compressed_',
multiFileEndpoint: false, // Individual API calls per file
validateParams: (params) => {
if (params.compressionMethod === 'filesize' && !params.fileSizeValue) {
return { valid: false, errors: [t('compress.validation.fileSizeRequired', 'File size value is required when using filesize method')] };
}
return { valid: true };
},
getErrorMessage: createStandardErrorHandler(t('compress.error.failed', 'An error occurred while compressing the PDF.'))
});
};

View File

@@ -134,9 +134,6 @@ export const useConvertOperation = () => {
buildFormData, // Not used with customProcessor but required
filePrefix: 'converted_',
customProcessor: customConvertProcessor, // Convert handles its own routing
validateParams: (params) => {
return { valid: true };
},
getErrorMessage: (error) => {
if (error.response?.data && typeof error.response.data === 'string') {
return error.response.data;

View File

@@ -103,10 +103,6 @@ export const useOCROperation = () => {
filePrefix: 'ocr_',
multiFileEndpoint: false, // Process files individually
responseHandler, // use shared flow
validateParams: (params) =>
params.languages.length === 0
? { valid: false, errors: [t('ocr.validation.languageRequired', 'Please select at least one language for OCR processing.')] }
: { valid: true },
getErrorMessage: (error) =>
error.message?.includes('OCR tools') && error.message?.includes('not installed')
? 'OCR tools (OCRmyPDF or Tesseract) are not installed on the server. Use the standard or fat Docker image instead of ultra-lite, or install OCR tools manually.'

View File

@@ -0,0 +1,32 @@
import { useTranslation } from 'react-i18next';
import { useToolOperation } from '../shared/useToolOperation';
import { createStandardErrorHandler } from '../../../utils/toolErrorHandler';
import { SanitizeParameters } from './useSanitizeParameters';
const buildFormData = (parameters: SanitizeParameters, file: File): FormData => {
const formData = new FormData();
formData.append('fileInput', file);
// Add parameters
formData.append('removeJavaScript', parameters.removeJavaScript.toString());
formData.append('removeEmbeddedFiles', parameters.removeEmbeddedFiles.toString());
formData.append('removeXMPMetadata', parameters.removeXMPMetadata.toString());
formData.append('removeMetadata', parameters.removeMetadata.toString());
formData.append('removeLinks', parameters.removeLinks.toString());
formData.append('removeFonts', parameters.removeFonts.toString());
return formData;
};
export const useSanitizeOperation = () => {
const { t } = useTranslation();
return useToolOperation<SanitizeParameters>({
operationType: 'sanitize',
endpoint: '/api/v1/security/sanitize-pdf',
buildFormData,
filePrefix: t('sanitize.filenamePrefix', 'sanitized') + '_',
multiFileEndpoint: false, // Individual API calls per file
getErrorMessage: createStandardErrorHandler(t('sanitize.error.failed', 'An error occurred while sanitising the PDF.'))
});
};

View File

@@ -0,0 +1,90 @@
import { describe, expect, test } from 'vitest';
import { renderHook, act } from '@testing-library/react';
import { defaultParameters, useSanitizeParameters } from './useSanitizeParameters';
describe('useSanitizeParameters', () => {
test('should initialize with default parameters', () => {
const { result } = renderHook(() => useSanitizeParameters());
expect(result.current.parameters).toStrictEqual(defaultParameters);
});
test('should update individual parameters', () => {
const { result } = renderHook(() => useSanitizeParameters());
act(() => {
result.current.updateParameter('removeXMPMetadata', true);
});
expect(result.current.parameters).toStrictEqual({
...defaultParameters, // Other params unchanged
removeXMPMetadata: true,
});
});
test('should reset parameters to defaults', () => {
const { result } = renderHook(() => useSanitizeParameters());
// First, change some parameters
act(() => {
result.current.updateParameter('removeXMPMetadata', true);
result.current.updateParameter('removeJavaScript', false);
});
expect(result.current.parameters.removeXMPMetadata).toBe(true);
expect(result.current.parameters.removeJavaScript).toBe(false);
// Then reset
act(() => {
result.current.resetParameters();
});
expect(result.current.parameters).toStrictEqual(defaultParameters);
});
test('should return correct endpoint name', () => {
const { result } = renderHook(() => useSanitizeParameters());
expect(result.current.getEndpointName()).toBe('sanitize-pdf');
});
test('should validate parameters correctly', () => {
const { result } = renderHook(() => useSanitizeParameters());
// Default state should be valid (has removeJavaScript and removeEmbeddedFiles enabled)
expect(result.current.validateParameters()).toBe(true);
// Turn off all parameters - should be invalid
act(() => {
result.current.updateParameter('removeJavaScript', false);
result.current.updateParameter('removeEmbeddedFiles', false);
});
expect(result.current.validateParameters()).toBe(false);
// Turn on one parameter - should be valid again
act(() => {
result.current.updateParameter('removeLinks', true);
});
expect(result.current.validateParameters()).toBe(true);
});
test('should handle all parameter types correctly', () => {
const { result } = renderHook(() => useSanitizeParameters());
const allParameters = Object.keys(defaultParameters) as (keyof typeof defaultParameters)[];
allParameters.forEach(param => {
act(() => {
result.current.updateParameter(param, true);
});
expect(result.current.parameters[param]).toBe(true);
act(() => {
result.current.updateParameter(param, false);
});
expect(result.current.parameters[param]).toBe(false);
});
});
});

View File

@@ -0,0 +1,53 @@
import { useState, useCallback } from 'react';
export interface SanitizeParameters {
removeJavaScript: boolean;
removeEmbeddedFiles: boolean;
removeXMPMetadata: boolean;
removeMetadata: boolean;
removeLinks: boolean;
removeFonts: boolean;
}
export const defaultParameters: SanitizeParameters = {
removeJavaScript: true,
removeEmbeddedFiles: true,
removeXMPMetadata: false,
removeMetadata: false,
removeLinks: false,
removeFonts: false,
};
export const useSanitizeParameters = () => {
const [parameters, setParameters] = useState<SanitizeParameters>(defaultParameters);
const updateParameter = useCallback(<K extends keyof SanitizeParameters>(
key: K,
value: SanitizeParameters[K]
) => {
setParameters(prev => ({
...prev,
[key]: value
}));
}, []);
const resetParameters = useCallback(() => {
setParameters(defaultParameters);
}, []);
const validateParameters = useCallback(() => {
return Object.values(parameters).some(value => value === true);
}, [parameters]);
const getEndpointName = () => {
return 'sanitize-pdf'
};
return {
parameters,
updateParameter,
resetParameters,
validateParameters,
getEndpointName,
};
};

View File

@@ -9,11 +9,6 @@ import { extractErrorMessage } from '../../../utils/toolErrorHandler';
import { createOperation } from '../../../utils/toolOperationTracker';
import { ResponseHandler } from '../../../utils/toolResponseProcessor';
export interface ValidationResult {
valid: boolean;
errors?: string[];
}
// Re-export for backwards compatibility
export type { ProcessingProgress, ResponseHandler };
@@ -64,9 +59,6 @@ export interface ToolOperationConfig<TParams = void> {
*/
customProcessor?: (params: TParams, files: File[]) => Promise<File[]>;
/** Validate parameters before execution. Return validation errors if invalid. */
validateParams?: (params: TParams) => ValidationResult;
/** Extract user-friendly error messages from API errors */
getErrorMessage?: (error: any) => string;
}
@@ -129,14 +121,6 @@ export const useToolOperation = <TParams = void>(
return;
}
if (config.validateParams) {
const validation = config.validateParams(params);
if (!validation.valid) {
actions.setError(validation.errors?.join(', ') || 'Invalid parameters');
return;
}
}
const validFiles = selectedFiles.filter(file => file.size > 0);
if (validFiles.length === 0) {
actions.setError(t('noValidFiles', 'No valid files to process'));
@@ -186,7 +170,7 @@ export const useToolOperation = <TParams = void>(
// Individual file processing - separate API call per file
const apiCallsConfig: ApiCallsConfig<TParams> = {
endpoint: config.endpoint,
buildFormData: (file: File, params: TParams) => (config.buildFormData as any /* FIX ME */)(file, params),
buildFormData: (file: File, params: TParams) => (config.buildFormData as (params: TParams, file: File) => FormData /* FIX ME */)(params, file),
filePrefix: config.filePrefix,
responseHandler: config.responseHandler
};

View File

@@ -1,7 +1,5 @@
import { useCallback } from 'react';
import axios from 'axios';
import { useTranslation } from 'react-i18next';
import { useToolOperation, ToolOperationConfig } from '../shared/useToolOperation';
import { useToolOperation } from '../shared/useToolOperation';
import { createStandardErrorHandler } from '../../../utils/toolErrorHandler';
import { SplitParameters } from '../../../components/tools/split/SplitSettings';
import { SPLIT_MODES } from '../../../constants/splitConstants';
@@ -66,17 +64,6 @@ export const useSplitOperation = () => {
buildFormData: buildFormData, // Multi-file signature: (params, selectedFiles) => FormData
filePrefix: 'split_',
multiFileEndpoint: true, // Single API call with all files
validateParams: (params) => {
if (!params.mode) {
return { valid: false, errors: [t('split.validation.modeRequired', 'Split mode is required')] };
}
if (params.mode === SPLIT_MODES.BY_PAGES && !params.pages) {
return { valid: false, errors: [t('split.validation.pagesRequired', 'Page numbers are required for split by pages')] };
}
return { valid: true };
},
getErrorMessage: createStandardErrorHandler(t('split.error.failed', 'An error occurred while splitting the PDF.'))
});
};

View File

@@ -1,5 +1,5 @@
import { useState } from 'react';
import { SPLIT_MODES, SPLIT_TYPES, ENDPOINTS, type SplitMode, type SplitType } from '../../../constants/splitConstants';
import { SPLIT_MODES, SPLIT_TYPES, ENDPOINTS, type SplitMode } from '../../../constants/splitConstants';
import { SplitParameters } from '../../../components/tools/split/SplitSettings';
export interface SplitParametersHook {
@@ -63,4 +63,4 @@ export const useSplitParameters = (): SplitParametersHook => {
validateParameters,
getEndpointName,
};
};
};

View File

@@ -4,6 +4,7 @@ import ContentCutIcon from "@mui/icons-material/ContentCut";
import ZoomInMapIcon from "@mui/icons-material/ZoomInMap";
import SwapHorizIcon from "@mui/icons-material/SwapHoriz";
import ApiIcon from "@mui/icons-material/Api";
import CleaningServicesIcon from "@mui/icons-material/CleaningServices";
import { useMultipleEndpointsEnabled } from "./useEndpointConfig";
import { Tool, ToolDefinition, BaseToolProps, ToolRegistry } from "../types/tool";
@@ -75,6 +76,15 @@ const toolDefinitions: Record<string, ToolDefinition> = {
description: "Extract text from images using OCR",
endpoints: ["ocr-pdf"]
},
sanitize: {
id: "sanitize",
icon: <CleaningServicesIcon />,
component: React.lazy(() => import("../tools/Sanitize")),
maxFiles: -1,
category: "security",
description: "Remove potentially harmful elements from PDF files",
endpoints: ["sanitize-pdf"]
},
};