From 68f806bb61275f76272e53f907735ee8139e7eec Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Thu, 18 Sep 2025 15:12:09 -0600
Subject: [PATCH] Cleanup onnx detector (#20128)

* Cleanup onnx detector

* Fix

* Fix classification cropping

* Deprioritize openvino

* Send model type

* Use model type to decide if model can use full optimization

* Clenanup

* Cleanup
---
 .../real_time/custom_classification.py        | 10 +--
 frigate/detectors/detection_runners.py        | 47 +++++++++++---
 frigate/detectors/plugins/onnx.py             | 65 +++----------------
 frigate/detectors/plugins/openvino.py         |  2 +-
 frigate/embeddings/onnx/base_embedding.py     |  6 --
 frigate/embeddings/onnx/face_embedding.py     |  3 +-
 frigate/embeddings/onnx/jina_v1_embedding.py  | 10 ++-
 frigate/embeddings/onnx/jina_v2_embedding.py  |  2 +
 frigate/embeddings/onnx/lpr_embedding.py      |  6 +-
 frigate/embeddings/types.py                   | 14 ++++
 10 files changed, 82 insertions(+), 83 deletions(-)
 create mode 100644 frigate/embeddings/types.py

diff --git a/frigate/data_processing/real_time/custom_classification.py b/frigate/data_processing/real_time/custom_classification.py
index daa9fee96..841267a60 100644
--- a/frigate/data_processing/real_time/custom_classification.py
+++ b/frigate/data_processing/real_time/custom_classification.py
@@ -133,9 +133,9 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
             x:x2,
         ]
 
-        if input.shape != (224, 224):
+        if frame.shape != (224, 224):
             try:
-                input = cv2.resize(input, (224, 224))
+                frame = cv2.resize(frame, (224, 224))
             except Exception:
                 logger.warning("Failed to resize image for state classification")
                 return
@@ -258,11 +258,11 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi):
             x:x2,
         ]
 
-        if input.shape != (224, 224):
+        if crop.shape != (224, 224):
             try:
-                input = cv2.resize(input, (224, 224))
+                crop = cv2.resize(crop, (224, 224))
             except Exception:
-                logger.warning("Failed to resize image for object classification")
+                logger.warning("Failed to resize image for state classification")
                 return
 
         input = np.expand_dims(crop, axis=0)
diff --git a/frigate/detectors/detection_runners.py b/frigate/detectors/detection_runners.py
index a034d2379..3e304d63c 100644
--- a/frigate/detectors/detection_runners.py
+++ b/frigate/detectors/detection_runners.py
@@ -101,6 +101,19 @@ class CudaGraphRunner(BaseModelRunner):
     for more complex models like CLIP or PaddleOCR.
     """
 
+    @staticmethod
+    def is_complex_model(model_type: str) -> bool:
+        # Import here to avoid circular imports
+        from frigate.detectors.detector_config import ModelTypeEnum
+        from frigate.embeddings.types import EnrichmentModelTypeEnum
+
+        return model_type in [
+            ModelTypeEnum.yolonas.value,
+            EnrichmentModelTypeEnum.paddleocr.value,
+            EnrichmentModelTypeEnum.jina_v1.value,
+            EnrichmentModelTypeEnum.jina_v2.value,
+        ]
+
     def __init__(self, session: ort.InferenceSession, cuda_device_id: int):
         self._session = session
         self._cuda_device_id = cuda_device_id
@@ -156,10 +169,17 @@ class CudaGraphRunner(BaseModelRunner):
 class OpenVINOModelRunner(BaseModelRunner):
     """OpenVINO model runner that handles inference efficiently."""
 
-    def __init__(self, model_path: str, device: str, complex_model: bool, **kwargs):
+    @staticmethod
+    def is_complex_model(model_type: str) -> bool:
+        # Import here to avoid circular imports
+        from frigate.embeddings.types import EnrichmentModelTypeEnum
+
+        return model_type in [EnrichmentModelTypeEnum.paddleocr.value]
+
+    def __init__(self, model_path: str, device: str, model_type: str, **kwargs):
         self.model_path = model_path
         self.device = device
-        self.complex_model = complex_model
+        self.complex_model = OpenVINOModelRunner.is_complex_model(model_type)
 
         if not os.path.isfile(model_path):
             raise FileNotFoundError(f"OpenVINO model file {model_path} not found.")
@@ -183,7 +203,7 @@ class OpenVINOModelRunner(BaseModelRunner):
         self.infer_request = self.compiled_model.create_infer_request()
         self.input_tensor: ov.Tensor | None = None
 
-        if not complex_model:
+        if not self.complex_model:
             try:
                 input_shape = self.compiled_model.inputs[0].get_shape()
                 input_element_type = self.compiled_model.inputs[0].get_element_type()
@@ -381,28 +401,35 @@ class RKNNModelRunner(BaseModelRunner):
 
 
 def get_optimized_runner(
-    model_path: str, device: str | None, complex_model: bool = True, **kwargs
+    model_path: str, device: str | None, model_type: str, **kwargs
 ) -> BaseModelRunner:
     """Get an optimized runner for the hardware."""
+    device = device or "AUTO"
     if is_rknn_compatible(model_path):
         rknn_path = auto_convert_model(model_path)
 
         if rknn_path:
             return RKNNModelRunner(rknn_path)
 
-    if device != "CPU" and is_openvino_gpu_npu_available():
-        return OpenVINOModelRunner(
-            model_path, device or "AUTO", complex_model, **kwargs
-        )
-
     providers, options = get_ort_providers(device == "CPU", device, **kwargs)
+
+    if providers[0] == "CPUExecutionProvider":
+        # In the default image, ONNXRuntime is used so we will only get CPUExecutionProvider
+        # In other images we will get CUDA / ROCm which are preferred over OpenVINO
+        # There is currently no way to prioritize OpenVINO over CUDA / ROCm in these images
+        if device != "CPU" and is_openvino_gpu_npu_available():
+            return OpenVINOModelRunner(model_path, device, model_type, **kwargs)
+
     ortSession = ort.InferenceSession(
         model_path,
         providers=providers,
         provider_options=options,
     )
 
-    if not complex_model and providers[0] == "CUDAExecutionProvider":
+    if (
+        not CudaGraphRunner.is_complex_model(model_type)
+        and providers[0] == "CUDAExecutionProvider"
+    ):
         return CudaGraphRunner(ortSession, options[0]["device_id"])
 
     return ONNXModelRunner(ortSession)
diff --git a/frigate/detectors/plugins/onnx.py b/frigate/detectors/plugins/onnx.py
index 7c7670733..6c9e510ce 100644
--- a/frigate/detectors/plugins/onnx.py
+++ b/frigate/detectors/plugins/onnx.py
@@ -1,18 +1,16 @@
 import logging
 
 import numpy as np
-import onnxruntime as ort
 from pydantic import Field
 from typing_extensions import Literal
 
 from frigate.detectors.detection_api import DetectionApi
-from frigate.detectors.detection_runners import CudaGraphRunner
+from frigate.detectors.detection_runners import get_optimized_runner
 from frigate.detectors.detector_config import (
     BaseDetectorConfig,
     ModelTypeEnum,
 )
 from frigate.util.model import (
-    get_ort_providers,
     post_process_dfine,
     post_process_rfdetr,
     post_process_yolo,
@@ -38,80 +36,35 @@ class ONNXDetector(DetectionApi):
         path = detector_config.model.path
         logger.info(f"ONNX: loading {detector_config.model.path}")
 
-        providers, options = get_ort_providers(
-            detector_config.device == "CPU", detector_config.device
-        )
-
-        # Enable CUDA Graphs only for supported models when using CUDA EP
-        if "CUDAExecutionProvider" in providers:
-            cuda_idx = providers.index("CUDAExecutionProvider")
-            # mutate only this call's provider options
-            options[cuda_idx] = {
-                **options[cuda_idx],
-                "enable_cuda_graph": True,
-            }
-
-        sess_options = None
-
-        if providers[0] == "ROCMExecutionProvider":
-            # avoid AMD GPU kernel crashes
-            sess_options = ort.SessionOptions()
-            sess_options.graph_optimization_level = (
-                ort.GraphOptimizationLevel.ORT_DISABLE_ALL
-            )
-
-        self.model = ort.InferenceSession(
-            path, providers=providers, provider_options=options
+        self.runner = get_optimized_runner(
+            path,
+            detector_config.device,
+            model_type=detector_config.model.model_type,
         )
 
         self.onnx_model_type = detector_config.model.model_type
         self.onnx_model_px = detector_config.model.input_pixel_format
         self.onnx_model_shape = detector_config.model.input_tensor
-        path = detector_config.model.path
 
         if self.onnx_model_type == ModelTypeEnum.yolox:
             self.calculate_grids_strides()
 
-        self._cuda_device_id = 0
-        self._cg_runner: CudaGraphRunner | None = None
-
-        try:
-            if "CUDAExecutionProvider" in providers:
-                self._cuda_device_id = options[cuda_idx].get("device_id", 0)
-
-                if options[cuda_idx].get("enable_cuda_graph"):
-                    self._cg_runner = CudaGraphRunner(self.model, self._cuda_device_id)
-        except Exception:
-            pass
-
         logger.info(f"ONNX: {path} loaded")
 
     def detect_raw(self, tensor_input: np.ndarray):
         if self.onnx_model_type == ModelTypeEnum.dfine:
-            tensor_output = self.model.run(
-                None,
+            tensor_output = self.runner.run(
                 {
                     "images": tensor_input,
                     "orig_target_sizes": np.array(
                         [[self.height, self.width]], dtype=np.int64
                     ),
-                },
+                }
             )
             return post_process_dfine(tensor_output, self.width, self.height)
 
-        model_input_name = self.model.get_inputs()[0].name
-
-        if self._cg_runner is not None:
-            try:
-                # Run using CUDA graphs if available
-                tensor_output = self._cg_runner.run({model_input_name: tensor_input})
-            except Exception as e:
-                logger.warning(f"CUDA Graphs failed, falling back to regular run: {e}")
-                self._cg_runner = None
-                tensor_output = self.model.run(None, {model_input_name: tensor_input})
-        else:
-            # Use regular run if CUDA graphs are not available
-            tensor_output = self.model.run(None, {model_input_name: tensor_input})
+        model_input_name = self.runner.get_input_names()[0]
+        tensor_output = self.runner.run({model_input_name: tensor_input})
 
         if self.onnx_model_type == ModelTypeEnum.rfdetr:
             return post_process_rfdetr(tensor_output)
diff --git a/frigate/detectors/plugins/openvino.py b/frigate/detectors/plugins/openvino.py
index 789912b4a..bda5c8871 100644
--- a/frigate/detectors/plugins/openvino.py
+++ b/frigate/detectors/plugins/openvino.py
@@ -45,7 +45,7 @@ class OvDetector(DetectionApi):
         self.runner = OpenVINOModelRunner(
             model_path=detector_config.model.path,
             device=detector_config.device,
-            complex_model=False,
+            model_type=detector_config.model.model_type,
         )
 
         # For dfine models, also pre-allocate target sizes tensor
diff --git a/frigate/embeddings/onnx/base_embedding.py b/frigate/embeddings/onnx/base_embedding.py
index fcadd2852..c0bd58475 100644
--- a/frigate/embeddings/onnx/base_embedding.py
+++ b/frigate/embeddings/onnx/base_embedding.py
@@ -3,7 +3,6 @@
 import logging
 import os
 from abc import ABC, abstractmethod
-from enum import Enum
 from io import BytesIO
 from typing import Any
 
@@ -18,11 +17,6 @@ from frigate.util.downloader import ModelDownloader
 logger = logging.getLogger(__name__)
 
 
-class EmbeddingTypeEnum(str, Enum):
-    thumbnail = "thumbnail"
-    description = "description"
-
-
 class BaseEmbedding(ABC):
     """Base embedding class."""
 
diff --git a/frigate/embeddings/onnx/face_embedding.py b/frigate/embeddings/onnx/face_embedding.py
index 4e7e142fc..77f2dbdca 100644
--- a/frigate/embeddings/onnx/face_embedding.py
+++ b/frigate/embeddings/onnx/face_embedding.py
@@ -7,6 +7,7 @@ import numpy as np
 
 from frigate.const import MODEL_CACHE_DIR
 from frigate.detectors.detection_runners import get_optimized_runner
+from frigate.embeddings.types import EnrichmentModelTypeEnum
 from frigate.log import redirect_output_to_logger
 from frigate.util.downloader import ModelDownloader
 
@@ -151,7 +152,7 @@ class ArcfaceEmbedding(BaseEmbedding):
             self.runner = get_optimized_runner(
                 os.path.join(self.download_path, self.model_file),
                 device=self.config.device or "GPU",
-                complex_model=False,
+                model_type=EnrichmentModelTypeEnum.arcface.value,
             )
 
     def _preprocess_inputs(self, raw_inputs):
diff --git a/frigate/embeddings/onnx/jina_v1_embedding.py b/frigate/embeddings/onnx/jina_v1_embedding.py
index 169ee453d..e64d8da39 100644
--- a/frigate/embeddings/onnx/jina_v1_embedding.py
+++ b/frigate/embeddings/onnx/jina_v1_embedding.py
@@ -4,15 +4,17 @@ import logging
 import os
 import warnings
 
-# importing this without pytorch or others causes a warning
-# https://github.com/huggingface/transformers/issues/27214
-# suppressed by setting env TRANSFORMERS_NO_ADVISORY_WARNINGS=1
 from transformers import AutoFeatureExtractor, AutoTokenizer
 from transformers.utils.logging import disable_progress_bar
 
 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
 from frigate.detectors.detection_runners import BaseModelRunner, get_optimized_runner
+
+# importing this without pytorch or others causes a warning
+# https://github.com/huggingface/transformers/issues/27214
+# suppressed by setting env TRANSFORMERS_NO_ADVISORY_WARNINGS=1
+from frigate.embeddings.types import EnrichmentModelTypeEnum
 from frigate.types import ModelStatusTypesEnum
 from frigate.util.downloader import ModelDownloader
 
@@ -128,6 +130,7 @@ class JinaV1TextEmbedding(BaseEmbedding):
             self.runner = get_optimized_runner(
                 os.path.join(self.download_path, self.model_file),
                 self.device,
+                model_type=EnrichmentModelTypeEnum.jina_v1.value,
             )
 
     def _preprocess_inputs(self, raw_inputs):
@@ -206,6 +209,7 @@ class JinaV1ImageEmbedding(BaseEmbedding):
             self.runner = get_optimized_runner(
                 os.path.join(self.download_path, self.model_file),
                 self.device,
+                model_type=EnrichmentModelTypeEnum.jina_v1.value,
             )
 
     def _preprocess_inputs(self, raw_inputs):
diff --git a/frigate/embeddings/onnx/jina_v2_embedding.py b/frigate/embeddings/onnx/jina_v2_embedding.py
index 94e608512..44cc6c12b 100644
--- a/frigate/embeddings/onnx/jina_v2_embedding.py
+++ b/frigate/embeddings/onnx/jina_v2_embedding.py
@@ -12,6 +12,7 @@ from transformers.utils.logging import disable_progress_bar, set_verbosity_error
 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
 from frigate.detectors.detection_runners import get_optimized_runner
+from frigate.embeddings.types import EnrichmentModelTypeEnum
 from frigate.types import ModelStatusTypesEnum
 from frigate.util.downloader import ModelDownloader
 
@@ -128,6 +129,7 @@ class JinaV2Embedding(BaseEmbedding):
             self.runner = get_optimized_runner(
                 os.path.join(self.download_path, self.model_file),
                 self.device,
+                model_type=EnrichmentModelTypeEnum.jina_v2.value,
             )
 
     def _preprocess_image(self, image_data: bytes | Image.Image) -> np.ndarray:
diff --git a/frigate/embeddings/onnx/lpr_embedding.py b/frigate/embeddings/onnx/lpr_embedding.py
index 30b8d372c..d41531d19 100644
--- a/frigate/embeddings/onnx/lpr_embedding.py
+++ b/frigate/embeddings/onnx/lpr_embedding.py
@@ -8,6 +8,7 @@ import numpy as np
 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.const import MODEL_CACHE_DIR
 from frigate.detectors.detection_runners import BaseModelRunner, get_optimized_runner
+from frigate.embeddings.types import EnrichmentModelTypeEnum
 from frigate.types import ModelStatusTypesEnum
 from frigate.util.downloader import ModelDownloader
 
@@ -79,6 +80,7 @@ class PaddleOCRDetection(BaseEmbedding):
             self.runner = get_optimized_runner(
                 os.path.join(self.download_path, self.model_file),
                 self.device,
+                model_type=EnrichmentModelTypeEnum.paddleocr.value,
             )
 
     def _preprocess_inputs(self, raw_inputs):
@@ -138,6 +140,7 @@ class PaddleOCRClassification(BaseEmbedding):
             self.runner = get_optimized_runner(
                 os.path.join(self.download_path, self.model_file),
                 self.device,
+                model_type=EnrichmentModelTypeEnum.paddleocr.value,
             )
 
     def _preprocess_inputs(self, raw_inputs):
@@ -198,6 +201,7 @@ class PaddleOCRRecognition(BaseEmbedding):
             self.runner = get_optimized_runner(
                 os.path.join(self.download_path, self.model_file),
                 self.device,
+                model_type=EnrichmentModelTypeEnum.paddleocr.value,
             )
 
     def _preprocess_inputs(self, raw_inputs):
@@ -258,7 +262,7 @@ class LicensePlateDetector(BaseEmbedding):
             self.runner = get_optimized_runner(
                 os.path.join(self.download_path, self.model_file),
                 self.device,
-                complex_model=False,
+                model_type="yolov9",
             )
 
     def _preprocess_inputs(self, raw_inputs):
diff --git a/frigate/embeddings/types.py b/frigate/embeddings/types.py
new file mode 100644
index 000000000..2ed404437
--- /dev/null
+++ b/frigate/embeddings/types.py
@@ -0,0 +1,14 @@
+from enum import Enum
+
+
+class EmbeddingTypeEnum(str, Enum):
+    thumbnail = "thumbnail"
+    description = "description"
+
+
+class EnrichmentModelTypeEnum(str, Enum):
+    arcface = "arcface"
+    facenet = "facenet"
+    jina_v1 = "jina_v1"
+    jina_v2 = "jina_v2"
+    paddleocr = "paddleocr"