Cleanup onnx detector (#20128)

* Cleanup onnx detector

* Fix

* Fix classification cropping

* Deprioritize openvino

* Send model type

* Use model type to decide if model can use full optimization

* Clenanup

* Cleanup
This commit is contained in:
Nicolas Mowen 2025-09-18 15:12:09 -06:00 committed by GitHub
parent c05e260ae9
commit 68f806bb61
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 82 additions and 83 deletions

View File

@ -133,9 +133,9 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
x:x2,
]
if input.shape != (224, 224):
if frame.shape != (224, 224):
try:
input = cv2.resize(input, (224, 224))
frame = cv2.resize(frame, (224, 224))
except Exception:
logger.warning("Failed to resize image for state classification")
return
@ -258,11 +258,11 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi):
x:x2,
]
if input.shape != (224, 224):
if crop.shape != (224, 224):
try:
input = cv2.resize(input, (224, 224))
crop = cv2.resize(crop, (224, 224))
except Exception:
logger.warning("Failed to resize image for object classification")
logger.warning("Failed to resize image for state classification")
return
input = np.expand_dims(crop, axis=0)

View File

@ -101,6 +101,19 @@ class CudaGraphRunner(BaseModelRunner):
for more complex models like CLIP or PaddleOCR.
"""
@staticmethod
def is_complex_model(model_type: str) -> bool:
# Import here to avoid circular imports
from frigate.detectors.detector_config import ModelTypeEnum
from frigate.embeddings.types import EnrichmentModelTypeEnum
return model_type in [
ModelTypeEnum.yolonas.value,
EnrichmentModelTypeEnum.paddleocr.value,
EnrichmentModelTypeEnum.jina_v1.value,
EnrichmentModelTypeEnum.jina_v2.value,
]
def __init__(self, session: ort.InferenceSession, cuda_device_id: int):
self._session = session
self._cuda_device_id = cuda_device_id
@ -156,10 +169,17 @@ class CudaGraphRunner(BaseModelRunner):
class OpenVINOModelRunner(BaseModelRunner):
"""OpenVINO model runner that handles inference efficiently."""
def __init__(self, model_path: str, device: str, complex_model: bool, **kwargs):
@staticmethod
def is_complex_model(model_type: str) -> bool:
# Import here to avoid circular imports
from frigate.embeddings.types import EnrichmentModelTypeEnum
return model_type in [EnrichmentModelTypeEnum.paddleocr.value]
def __init__(self, model_path: str, device: str, model_type: str, **kwargs):
self.model_path = model_path
self.device = device
self.complex_model = complex_model
self.complex_model = OpenVINOModelRunner.is_complex_model(model_type)
if not os.path.isfile(model_path):
raise FileNotFoundError(f"OpenVINO model file {model_path} not found.")
@ -183,7 +203,7 @@ class OpenVINOModelRunner(BaseModelRunner):
self.infer_request = self.compiled_model.create_infer_request()
self.input_tensor: ov.Tensor | None = None
if not complex_model:
if not self.complex_model:
try:
input_shape = self.compiled_model.inputs[0].get_shape()
input_element_type = self.compiled_model.inputs[0].get_element_type()
@ -381,28 +401,35 @@ class RKNNModelRunner(BaseModelRunner):
def get_optimized_runner(
model_path: str, device: str | None, complex_model: bool = True, **kwargs
model_path: str, device: str | None, model_type: str, **kwargs
) -> BaseModelRunner:
"""Get an optimized runner for the hardware."""
device = device or "AUTO"
if is_rknn_compatible(model_path):
rknn_path = auto_convert_model(model_path)
if rknn_path:
return RKNNModelRunner(rknn_path)
if device != "CPU" and is_openvino_gpu_npu_available():
return OpenVINOModelRunner(
model_path, device or "AUTO", complex_model, **kwargs
)
providers, options = get_ort_providers(device == "CPU", device, **kwargs)
if providers[0] == "CPUExecutionProvider":
# In the default image, ONNXRuntime is used so we will only get CPUExecutionProvider
# In other images we will get CUDA / ROCm which are preferred over OpenVINO
# There is currently no way to prioritize OpenVINO over CUDA / ROCm in these images
if device != "CPU" and is_openvino_gpu_npu_available():
return OpenVINOModelRunner(model_path, device, model_type, **kwargs)
ortSession = ort.InferenceSession(
model_path,
providers=providers,
provider_options=options,
)
if not complex_model and providers[0] == "CUDAExecutionProvider":
if (
not CudaGraphRunner.is_complex_model(model_type)
and providers[0] == "CUDAExecutionProvider"
):
return CudaGraphRunner(ortSession, options[0]["device_id"])
return ONNXModelRunner(ortSession)

View File

@ -1,18 +1,16 @@
import logging
import numpy as np
import onnxruntime as ort
from pydantic import Field
from typing_extensions import Literal
from frigate.detectors.detection_api import DetectionApi
from frigate.detectors.detection_runners import CudaGraphRunner
from frigate.detectors.detection_runners import get_optimized_runner
from frigate.detectors.detector_config import (
BaseDetectorConfig,
ModelTypeEnum,
)
from frigate.util.model import (
get_ort_providers,
post_process_dfine,
post_process_rfdetr,
post_process_yolo,
@ -38,80 +36,35 @@ class ONNXDetector(DetectionApi):
path = detector_config.model.path
logger.info(f"ONNX: loading {detector_config.model.path}")
providers, options = get_ort_providers(
detector_config.device == "CPU", detector_config.device
)
# Enable CUDA Graphs only for supported models when using CUDA EP
if "CUDAExecutionProvider" in providers:
cuda_idx = providers.index("CUDAExecutionProvider")
# mutate only this call's provider options
options[cuda_idx] = {
**options[cuda_idx],
"enable_cuda_graph": True,
}
sess_options = None
if providers[0] == "ROCMExecutionProvider":
# avoid AMD GPU kernel crashes
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = (
ort.GraphOptimizationLevel.ORT_DISABLE_ALL
)
self.model = ort.InferenceSession(
path, providers=providers, provider_options=options
self.runner = get_optimized_runner(
path,
detector_config.device,
model_type=detector_config.model.model_type,
)
self.onnx_model_type = detector_config.model.model_type
self.onnx_model_px = detector_config.model.input_pixel_format
self.onnx_model_shape = detector_config.model.input_tensor
path = detector_config.model.path
if self.onnx_model_type == ModelTypeEnum.yolox:
self.calculate_grids_strides()
self._cuda_device_id = 0
self._cg_runner: CudaGraphRunner | None = None
try:
if "CUDAExecutionProvider" in providers:
self._cuda_device_id = options[cuda_idx].get("device_id", 0)
if options[cuda_idx].get("enable_cuda_graph"):
self._cg_runner = CudaGraphRunner(self.model, self._cuda_device_id)
except Exception:
pass
logger.info(f"ONNX: {path} loaded")
def detect_raw(self, tensor_input: np.ndarray):
if self.onnx_model_type == ModelTypeEnum.dfine:
tensor_output = self.model.run(
None,
tensor_output = self.runner.run(
{
"images": tensor_input,
"orig_target_sizes": np.array(
[[self.height, self.width]], dtype=np.int64
),
},
}
)
return post_process_dfine(tensor_output, self.width, self.height)
model_input_name = self.model.get_inputs()[0].name
if self._cg_runner is not None:
try:
# Run using CUDA graphs if available
tensor_output = self._cg_runner.run({model_input_name: tensor_input})
except Exception as e:
logger.warning(f"CUDA Graphs failed, falling back to regular run: {e}")
self._cg_runner = None
tensor_output = self.model.run(None, {model_input_name: tensor_input})
else:
# Use regular run if CUDA graphs are not available
tensor_output = self.model.run(None, {model_input_name: tensor_input})
model_input_name = self.runner.get_input_names()[0]
tensor_output = self.runner.run({model_input_name: tensor_input})
if self.onnx_model_type == ModelTypeEnum.rfdetr:
return post_process_rfdetr(tensor_output)

View File

@ -45,7 +45,7 @@ class OvDetector(DetectionApi):
self.runner = OpenVINOModelRunner(
model_path=detector_config.model.path,
device=detector_config.device,
complex_model=False,
model_type=detector_config.model.model_type,
)
# For dfine models, also pre-allocate target sizes tensor

View File

@ -3,7 +3,6 @@
import logging
import os
from abc import ABC, abstractmethod
from enum import Enum
from io import BytesIO
from typing import Any
@ -18,11 +17,6 @@ from frigate.util.downloader import ModelDownloader
logger = logging.getLogger(__name__)
class EmbeddingTypeEnum(str, Enum):
thumbnail = "thumbnail"
description = "description"
class BaseEmbedding(ABC):
"""Base embedding class."""

View File

@ -7,6 +7,7 @@ import numpy as np
from frigate.const import MODEL_CACHE_DIR
from frigate.detectors.detection_runners import get_optimized_runner
from frigate.embeddings.types import EnrichmentModelTypeEnum
from frigate.log import redirect_output_to_logger
from frigate.util.downloader import ModelDownloader
@ -151,7 +152,7 @@ class ArcfaceEmbedding(BaseEmbedding):
self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file),
device=self.config.device or "GPU",
complex_model=False,
model_type=EnrichmentModelTypeEnum.arcface.value,
)
def _preprocess_inputs(self, raw_inputs):

View File

@ -4,15 +4,17 @@ import logging
import os
import warnings
# importing this without pytorch or others causes a warning
# https://github.com/huggingface/transformers/issues/27214
# suppressed by setting env TRANSFORMERS_NO_ADVISORY_WARNINGS=1
from transformers import AutoFeatureExtractor, AutoTokenizer
from transformers.utils.logging import disable_progress_bar
from frigate.comms.inter_process import InterProcessRequestor
from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
from frigate.detectors.detection_runners import BaseModelRunner, get_optimized_runner
# importing this without pytorch or others causes a warning
# https://github.com/huggingface/transformers/issues/27214
# suppressed by setting env TRANSFORMERS_NO_ADVISORY_WARNINGS=1
from frigate.embeddings.types import EnrichmentModelTypeEnum
from frigate.types import ModelStatusTypesEnum
from frigate.util.downloader import ModelDownloader
@ -128,6 +130,7 @@ class JinaV1TextEmbedding(BaseEmbedding):
self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file),
self.device,
model_type=EnrichmentModelTypeEnum.jina_v1.value,
)
def _preprocess_inputs(self, raw_inputs):
@ -206,6 +209,7 @@ class JinaV1ImageEmbedding(BaseEmbedding):
self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file),
self.device,
model_type=EnrichmentModelTypeEnum.jina_v1.value,
)
def _preprocess_inputs(self, raw_inputs):

View File

@ -12,6 +12,7 @@ from transformers.utils.logging import disable_progress_bar, set_verbosity_error
from frigate.comms.inter_process import InterProcessRequestor
from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
from frigate.detectors.detection_runners import get_optimized_runner
from frigate.embeddings.types import EnrichmentModelTypeEnum
from frigate.types import ModelStatusTypesEnum
from frigate.util.downloader import ModelDownloader
@ -128,6 +129,7 @@ class JinaV2Embedding(BaseEmbedding):
self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file),
self.device,
model_type=EnrichmentModelTypeEnum.jina_v2.value,
)
def _preprocess_image(self, image_data: bytes | Image.Image) -> np.ndarray:

View File

@ -8,6 +8,7 @@ import numpy as np
from frigate.comms.inter_process import InterProcessRequestor
from frigate.const import MODEL_CACHE_DIR
from frigate.detectors.detection_runners import BaseModelRunner, get_optimized_runner
from frigate.embeddings.types import EnrichmentModelTypeEnum
from frigate.types import ModelStatusTypesEnum
from frigate.util.downloader import ModelDownloader
@ -79,6 +80,7 @@ class PaddleOCRDetection(BaseEmbedding):
self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file),
self.device,
model_type=EnrichmentModelTypeEnum.paddleocr.value,
)
def _preprocess_inputs(self, raw_inputs):
@ -138,6 +140,7 @@ class PaddleOCRClassification(BaseEmbedding):
self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file),
self.device,
model_type=EnrichmentModelTypeEnum.paddleocr.value,
)
def _preprocess_inputs(self, raw_inputs):
@ -198,6 +201,7 @@ class PaddleOCRRecognition(BaseEmbedding):
self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file),
self.device,
model_type=EnrichmentModelTypeEnum.paddleocr.value,
)
def _preprocess_inputs(self, raw_inputs):
@ -258,7 +262,7 @@ class LicensePlateDetector(BaseEmbedding):
self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file),
self.device,
complex_model=False,
model_type="yolov9",
)
def _preprocess_inputs(self, raw_inputs):

View File

@ -0,0 +1,14 @@
from enum import Enum
class EmbeddingTypeEnum(str, Enum):
thumbnail = "thumbnail"
description = "description"
class EnrichmentModelTypeEnum(str, Enum):
arcface = "arcface"
facenet = "facenet"
jina_v1 = "jina_v1"
jina_v2 = "jina_v2"
paddleocr = "paddleocr"