From 68f806bb61275f76272e53f907735ee8139e7eec Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Thu, 18 Sep 2025 15:12:09 -0600 Subject: [PATCH] Cleanup onnx detector (#20128) * Cleanup onnx detector * Fix * Fix classification cropping * Deprioritize openvino * Send model type * Use model type to decide if model can use full optimization * Clenanup * Cleanup --- .../real_time/custom_classification.py | 10 +-- frigate/detectors/detection_runners.py | 47 +++++++++++--- frigate/detectors/plugins/onnx.py | 65 +++---------------- frigate/detectors/plugins/openvino.py | 2 +- frigate/embeddings/onnx/base_embedding.py | 6 -- frigate/embeddings/onnx/face_embedding.py | 3 +- frigate/embeddings/onnx/jina_v1_embedding.py | 10 ++- frigate/embeddings/onnx/jina_v2_embedding.py | 2 + frigate/embeddings/onnx/lpr_embedding.py | 6 +- frigate/embeddings/types.py | 14 ++++ 10 files changed, 82 insertions(+), 83 deletions(-) create mode 100644 frigate/embeddings/types.py diff --git a/frigate/data_processing/real_time/custom_classification.py b/frigate/data_processing/real_time/custom_classification.py index daa9fee96..841267a60 100644 --- a/frigate/data_processing/real_time/custom_classification.py +++ b/frigate/data_processing/real_time/custom_classification.py @@ -133,9 +133,9 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi): x:x2, ] - if input.shape != (224, 224): + if frame.shape != (224, 224): try: - input = cv2.resize(input, (224, 224)) + frame = cv2.resize(frame, (224, 224)) except Exception: logger.warning("Failed to resize image for state classification") return @@ -258,11 +258,11 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi): x:x2, ] - if input.shape != (224, 224): + if crop.shape != (224, 224): try: - input = cv2.resize(input, (224, 224)) + crop = cv2.resize(crop, (224, 224)) except Exception: - logger.warning("Failed to resize image for object classification") + logger.warning("Failed to resize image for state classification") return input = np.expand_dims(crop, axis=0) diff --git a/frigate/detectors/detection_runners.py b/frigate/detectors/detection_runners.py index a034d2379..3e304d63c 100644 --- a/frigate/detectors/detection_runners.py +++ b/frigate/detectors/detection_runners.py @@ -101,6 +101,19 @@ class CudaGraphRunner(BaseModelRunner): for more complex models like CLIP or PaddleOCR. """ + @staticmethod + def is_complex_model(model_type: str) -> bool: + # Import here to avoid circular imports + from frigate.detectors.detector_config import ModelTypeEnum + from frigate.embeddings.types import EnrichmentModelTypeEnum + + return model_type in [ + ModelTypeEnum.yolonas.value, + EnrichmentModelTypeEnum.paddleocr.value, + EnrichmentModelTypeEnum.jina_v1.value, + EnrichmentModelTypeEnum.jina_v2.value, + ] + def __init__(self, session: ort.InferenceSession, cuda_device_id: int): self._session = session self._cuda_device_id = cuda_device_id @@ -156,10 +169,17 @@ class CudaGraphRunner(BaseModelRunner): class OpenVINOModelRunner(BaseModelRunner): """OpenVINO model runner that handles inference efficiently.""" - def __init__(self, model_path: str, device: str, complex_model: bool, **kwargs): + @staticmethod + def is_complex_model(model_type: str) -> bool: + # Import here to avoid circular imports + from frigate.embeddings.types import EnrichmentModelTypeEnum + + return model_type in [EnrichmentModelTypeEnum.paddleocr.value] + + def __init__(self, model_path: str, device: str, model_type: str, **kwargs): self.model_path = model_path self.device = device - self.complex_model = complex_model + self.complex_model = OpenVINOModelRunner.is_complex_model(model_type) if not os.path.isfile(model_path): raise FileNotFoundError(f"OpenVINO model file {model_path} not found.") @@ -183,7 +203,7 @@ class OpenVINOModelRunner(BaseModelRunner): self.infer_request = self.compiled_model.create_infer_request() self.input_tensor: ov.Tensor | None = None - if not complex_model: + if not self.complex_model: try: input_shape = self.compiled_model.inputs[0].get_shape() input_element_type = self.compiled_model.inputs[0].get_element_type() @@ -381,28 +401,35 @@ class RKNNModelRunner(BaseModelRunner): def get_optimized_runner( - model_path: str, device: str | None, complex_model: bool = True, **kwargs + model_path: str, device: str | None, model_type: str, **kwargs ) -> BaseModelRunner: """Get an optimized runner for the hardware.""" + device = device or "AUTO" if is_rknn_compatible(model_path): rknn_path = auto_convert_model(model_path) if rknn_path: return RKNNModelRunner(rknn_path) - if device != "CPU" and is_openvino_gpu_npu_available(): - return OpenVINOModelRunner( - model_path, device or "AUTO", complex_model, **kwargs - ) - providers, options = get_ort_providers(device == "CPU", device, **kwargs) + + if providers[0] == "CPUExecutionProvider": + # In the default image, ONNXRuntime is used so we will only get CPUExecutionProvider + # In other images we will get CUDA / ROCm which are preferred over OpenVINO + # There is currently no way to prioritize OpenVINO over CUDA / ROCm in these images + if device != "CPU" and is_openvino_gpu_npu_available(): + return OpenVINOModelRunner(model_path, device, model_type, **kwargs) + ortSession = ort.InferenceSession( model_path, providers=providers, provider_options=options, ) - if not complex_model and providers[0] == "CUDAExecutionProvider": + if ( + not CudaGraphRunner.is_complex_model(model_type) + and providers[0] == "CUDAExecutionProvider" + ): return CudaGraphRunner(ortSession, options[0]["device_id"]) return ONNXModelRunner(ortSession) diff --git a/frigate/detectors/plugins/onnx.py b/frigate/detectors/plugins/onnx.py index 7c7670733..6c9e510ce 100644 --- a/frigate/detectors/plugins/onnx.py +++ b/frigate/detectors/plugins/onnx.py @@ -1,18 +1,16 @@ import logging import numpy as np -import onnxruntime as ort from pydantic import Field from typing_extensions import Literal from frigate.detectors.detection_api import DetectionApi -from frigate.detectors.detection_runners import CudaGraphRunner +from frigate.detectors.detection_runners import get_optimized_runner from frigate.detectors.detector_config import ( BaseDetectorConfig, ModelTypeEnum, ) from frigate.util.model import ( - get_ort_providers, post_process_dfine, post_process_rfdetr, post_process_yolo, @@ -38,80 +36,35 @@ class ONNXDetector(DetectionApi): path = detector_config.model.path logger.info(f"ONNX: loading {detector_config.model.path}") - providers, options = get_ort_providers( - detector_config.device == "CPU", detector_config.device - ) - - # Enable CUDA Graphs only for supported models when using CUDA EP - if "CUDAExecutionProvider" in providers: - cuda_idx = providers.index("CUDAExecutionProvider") - # mutate only this call's provider options - options[cuda_idx] = { - **options[cuda_idx], - "enable_cuda_graph": True, - } - - sess_options = None - - if providers[0] == "ROCMExecutionProvider": - # avoid AMD GPU kernel crashes - sess_options = ort.SessionOptions() - sess_options.graph_optimization_level = ( - ort.GraphOptimizationLevel.ORT_DISABLE_ALL - ) - - self.model = ort.InferenceSession( - path, providers=providers, provider_options=options + self.runner = get_optimized_runner( + path, + detector_config.device, + model_type=detector_config.model.model_type, ) self.onnx_model_type = detector_config.model.model_type self.onnx_model_px = detector_config.model.input_pixel_format self.onnx_model_shape = detector_config.model.input_tensor - path = detector_config.model.path if self.onnx_model_type == ModelTypeEnum.yolox: self.calculate_grids_strides() - self._cuda_device_id = 0 - self._cg_runner: CudaGraphRunner | None = None - - try: - if "CUDAExecutionProvider" in providers: - self._cuda_device_id = options[cuda_idx].get("device_id", 0) - - if options[cuda_idx].get("enable_cuda_graph"): - self._cg_runner = CudaGraphRunner(self.model, self._cuda_device_id) - except Exception: - pass - logger.info(f"ONNX: {path} loaded") def detect_raw(self, tensor_input: np.ndarray): if self.onnx_model_type == ModelTypeEnum.dfine: - tensor_output = self.model.run( - None, + tensor_output = self.runner.run( { "images": tensor_input, "orig_target_sizes": np.array( [[self.height, self.width]], dtype=np.int64 ), - }, + } ) return post_process_dfine(tensor_output, self.width, self.height) - model_input_name = self.model.get_inputs()[0].name - - if self._cg_runner is not None: - try: - # Run using CUDA graphs if available - tensor_output = self._cg_runner.run({model_input_name: tensor_input}) - except Exception as e: - logger.warning(f"CUDA Graphs failed, falling back to regular run: {e}") - self._cg_runner = None - tensor_output = self.model.run(None, {model_input_name: tensor_input}) - else: - # Use regular run if CUDA graphs are not available - tensor_output = self.model.run(None, {model_input_name: tensor_input}) + model_input_name = self.runner.get_input_names()[0] + tensor_output = self.runner.run({model_input_name: tensor_input}) if self.onnx_model_type == ModelTypeEnum.rfdetr: return post_process_rfdetr(tensor_output) diff --git a/frigate/detectors/plugins/openvino.py b/frigate/detectors/plugins/openvino.py index 789912b4a..bda5c8871 100644 --- a/frigate/detectors/plugins/openvino.py +++ b/frigate/detectors/plugins/openvino.py @@ -45,7 +45,7 @@ class OvDetector(DetectionApi): self.runner = OpenVINOModelRunner( model_path=detector_config.model.path, device=detector_config.device, - complex_model=False, + model_type=detector_config.model.model_type, ) # For dfine models, also pre-allocate target sizes tensor diff --git a/frigate/embeddings/onnx/base_embedding.py b/frigate/embeddings/onnx/base_embedding.py index fcadd2852..c0bd58475 100644 --- a/frigate/embeddings/onnx/base_embedding.py +++ b/frigate/embeddings/onnx/base_embedding.py @@ -3,7 +3,6 @@ import logging import os from abc import ABC, abstractmethod -from enum import Enum from io import BytesIO from typing import Any @@ -18,11 +17,6 @@ from frigate.util.downloader import ModelDownloader logger = logging.getLogger(__name__) -class EmbeddingTypeEnum(str, Enum): - thumbnail = "thumbnail" - description = "description" - - class BaseEmbedding(ABC): """Base embedding class.""" diff --git a/frigate/embeddings/onnx/face_embedding.py b/frigate/embeddings/onnx/face_embedding.py index 4e7e142fc..77f2dbdca 100644 --- a/frigate/embeddings/onnx/face_embedding.py +++ b/frigate/embeddings/onnx/face_embedding.py @@ -7,6 +7,7 @@ import numpy as np from frigate.const import MODEL_CACHE_DIR from frigate.detectors.detection_runners import get_optimized_runner +from frigate.embeddings.types import EnrichmentModelTypeEnum from frigate.log import redirect_output_to_logger from frigate.util.downloader import ModelDownloader @@ -151,7 +152,7 @@ class ArcfaceEmbedding(BaseEmbedding): self.runner = get_optimized_runner( os.path.join(self.download_path, self.model_file), device=self.config.device or "GPU", - complex_model=False, + model_type=EnrichmentModelTypeEnum.arcface.value, ) def _preprocess_inputs(self, raw_inputs): diff --git a/frigate/embeddings/onnx/jina_v1_embedding.py b/frigate/embeddings/onnx/jina_v1_embedding.py index 169ee453d..e64d8da39 100644 --- a/frigate/embeddings/onnx/jina_v1_embedding.py +++ b/frigate/embeddings/onnx/jina_v1_embedding.py @@ -4,15 +4,17 @@ import logging import os import warnings -# importing this without pytorch or others causes a warning -# https://github.com/huggingface/transformers/issues/27214 -# suppressed by setting env TRANSFORMERS_NO_ADVISORY_WARNINGS=1 from transformers import AutoFeatureExtractor, AutoTokenizer from transformers.utils.logging import disable_progress_bar from frigate.comms.inter_process import InterProcessRequestor from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE from frigate.detectors.detection_runners import BaseModelRunner, get_optimized_runner + +# importing this without pytorch or others causes a warning +# https://github.com/huggingface/transformers/issues/27214 +# suppressed by setting env TRANSFORMERS_NO_ADVISORY_WARNINGS=1 +from frigate.embeddings.types import EnrichmentModelTypeEnum from frigate.types import ModelStatusTypesEnum from frigate.util.downloader import ModelDownloader @@ -128,6 +130,7 @@ class JinaV1TextEmbedding(BaseEmbedding): self.runner = get_optimized_runner( os.path.join(self.download_path, self.model_file), self.device, + model_type=EnrichmentModelTypeEnum.jina_v1.value, ) def _preprocess_inputs(self, raw_inputs): @@ -206,6 +209,7 @@ class JinaV1ImageEmbedding(BaseEmbedding): self.runner = get_optimized_runner( os.path.join(self.download_path, self.model_file), self.device, + model_type=EnrichmentModelTypeEnum.jina_v1.value, ) def _preprocess_inputs(self, raw_inputs): diff --git a/frigate/embeddings/onnx/jina_v2_embedding.py b/frigate/embeddings/onnx/jina_v2_embedding.py index 94e608512..44cc6c12b 100644 --- a/frigate/embeddings/onnx/jina_v2_embedding.py +++ b/frigate/embeddings/onnx/jina_v2_embedding.py @@ -12,6 +12,7 @@ from transformers.utils.logging import disable_progress_bar, set_verbosity_error from frigate.comms.inter_process import InterProcessRequestor from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE from frigate.detectors.detection_runners import get_optimized_runner +from frigate.embeddings.types import EnrichmentModelTypeEnum from frigate.types import ModelStatusTypesEnum from frigate.util.downloader import ModelDownloader @@ -128,6 +129,7 @@ class JinaV2Embedding(BaseEmbedding): self.runner = get_optimized_runner( os.path.join(self.download_path, self.model_file), self.device, + model_type=EnrichmentModelTypeEnum.jina_v2.value, ) def _preprocess_image(self, image_data: bytes | Image.Image) -> np.ndarray: diff --git a/frigate/embeddings/onnx/lpr_embedding.py b/frigate/embeddings/onnx/lpr_embedding.py index 30b8d372c..d41531d19 100644 --- a/frigate/embeddings/onnx/lpr_embedding.py +++ b/frigate/embeddings/onnx/lpr_embedding.py @@ -8,6 +8,7 @@ import numpy as np from frigate.comms.inter_process import InterProcessRequestor from frigate.const import MODEL_CACHE_DIR from frigate.detectors.detection_runners import BaseModelRunner, get_optimized_runner +from frigate.embeddings.types import EnrichmentModelTypeEnum from frigate.types import ModelStatusTypesEnum from frigate.util.downloader import ModelDownloader @@ -79,6 +80,7 @@ class PaddleOCRDetection(BaseEmbedding): self.runner = get_optimized_runner( os.path.join(self.download_path, self.model_file), self.device, + model_type=EnrichmentModelTypeEnum.paddleocr.value, ) def _preprocess_inputs(self, raw_inputs): @@ -138,6 +140,7 @@ class PaddleOCRClassification(BaseEmbedding): self.runner = get_optimized_runner( os.path.join(self.download_path, self.model_file), self.device, + model_type=EnrichmentModelTypeEnum.paddleocr.value, ) def _preprocess_inputs(self, raw_inputs): @@ -198,6 +201,7 @@ class PaddleOCRRecognition(BaseEmbedding): self.runner = get_optimized_runner( os.path.join(self.download_path, self.model_file), self.device, + model_type=EnrichmentModelTypeEnum.paddleocr.value, ) def _preprocess_inputs(self, raw_inputs): @@ -258,7 +262,7 @@ class LicensePlateDetector(BaseEmbedding): self.runner = get_optimized_runner( os.path.join(self.download_path, self.model_file), self.device, - complex_model=False, + model_type="yolov9", ) def _preprocess_inputs(self, raw_inputs): diff --git a/frigate/embeddings/types.py b/frigate/embeddings/types.py new file mode 100644 index 000000000..2ed404437 --- /dev/null +++ b/frigate/embeddings/types.py @@ -0,0 +1,14 @@ +from enum import Enum + + +class EmbeddingTypeEnum(str, Enum): + thumbnail = "thumbnail" + description = "description" + + +class EnrichmentModelTypeEnum(str, Enum): + arcface = "arcface" + facenet = "facenet" + jina_v1 = "jina_v1" + jina_v2 = "jina_v2" + paddleocr = "paddleocr"