Cleanup onnx detector (#20128)

* Cleanup onnx detector

* Fix

* Fix classification cropping

* Deprioritize openvino

* Send model type

* Use model type to decide if model can use full optimization

* Clenanup

* Cleanup
This commit is contained in:
Nicolas Mowen 2025-09-18 15:12:09 -06:00 committed by GitHub
parent c05e260ae9
commit 68f806bb61
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 82 additions and 83 deletions

View File

@ -133,9 +133,9 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
x:x2, x:x2,
] ]
if input.shape != (224, 224): if frame.shape != (224, 224):
try: try:
input = cv2.resize(input, (224, 224)) frame = cv2.resize(frame, (224, 224))
except Exception: except Exception:
logger.warning("Failed to resize image for state classification") logger.warning("Failed to resize image for state classification")
return return
@ -258,11 +258,11 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi):
x:x2, x:x2,
] ]
if input.shape != (224, 224): if crop.shape != (224, 224):
try: try:
input = cv2.resize(input, (224, 224)) crop = cv2.resize(crop, (224, 224))
except Exception: except Exception:
logger.warning("Failed to resize image for object classification") logger.warning("Failed to resize image for state classification")
return return
input = np.expand_dims(crop, axis=0) input = np.expand_dims(crop, axis=0)

View File

@ -101,6 +101,19 @@ class CudaGraphRunner(BaseModelRunner):
for more complex models like CLIP or PaddleOCR. for more complex models like CLIP or PaddleOCR.
""" """
@staticmethod
def is_complex_model(model_type: str) -> bool:
# Import here to avoid circular imports
from frigate.detectors.detector_config import ModelTypeEnum
from frigate.embeddings.types import EnrichmentModelTypeEnum
return model_type in [
ModelTypeEnum.yolonas.value,
EnrichmentModelTypeEnum.paddleocr.value,
EnrichmentModelTypeEnum.jina_v1.value,
EnrichmentModelTypeEnum.jina_v2.value,
]
def __init__(self, session: ort.InferenceSession, cuda_device_id: int): def __init__(self, session: ort.InferenceSession, cuda_device_id: int):
self._session = session self._session = session
self._cuda_device_id = cuda_device_id self._cuda_device_id = cuda_device_id
@ -156,10 +169,17 @@ class CudaGraphRunner(BaseModelRunner):
class OpenVINOModelRunner(BaseModelRunner): class OpenVINOModelRunner(BaseModelRunner):
"""OpenVINO model runner that handles inference efficiently.""" """OpenVINO model runner that handles inference efficiently."""
def __init__(self, model_path: str, device: str, complex_model: bool, **kwargs): @staticmethod
def is_complex_model(model_type: str) -> bool:
# Import here to avoid circular imports
from frigate.embeddings.types import EnrichmentModelTypeEnum
return model_type in [EnrichmentModelTypeEnum.paddleocr.value]
def __init__(self, model_path: str, device: str, model_type: str, **kwargs):
self.model_path = model_path self.model_path = model_path
self.device = device self.device = device
self.complex_model = complex_model self.complex_model = OpenVINOModelRunner.is_complex_model(model_type)
if not os.path.isfile(model_path): if not os.path.isfile(model_path):
raise FileNotFoundError(f"OpenVINO model file {model_path} not found.") raise FileNotFoundError(f"OpenVINO model file {model_path} not found.")
@ -183,7 +203,7 @@ class OpenVINOModelRunner(BaseModelRunner):
self.infer_request = self.compiled_model.create_infer_request() self.infer_request = self.compiled_model.create_infer_request()
self.input_tensor: ov.Tensor | None = None self.input_tensor: ov.Tensor | None = None
if not complex_model: if not self.complex_model:
try: try:
input_shape = self.compiled_model.inputs[0].get_shape() input_shape = self.compiled_model.inputs[0].get_shape()
input_element_type = self.compiled_model.inputs[0].get_element_type() input_element_type = self.compiled_model.inputs[0].get_element_type()
@ -381,28 +401,35 @@ class RKNNModelRunner(BaseModelRunner):
def get_optimized_runner( def get_optimized_runner(
model_path: str, device: str | None, complex_model: bool = True, **kwargs model_path: str, device: str | None, model_type: str, **kwargs
) -> BaseModelRunner: ) -> BaseModelRunner:
"""Get an optimized runner for the hardware.""" """Get an optimized runner for the hardware."""
device = device or "AUTO"
if is_rknn_compatible(model_path): if is_rknn_compatible(model_path):
rknn_path = auto_convert_model(model_path) rknn_path = auto_convert_model(model_path)
if rknn_path: if rknn_path:
return RKNNModelRunner(rknn_path) return RKNNModelRunner(rknn_path)
if device != "CPU" and is_openvino_gpu_npu_available():
return OpenVINOModelRunner(
model_path, device or "AUTO", complex_model, **kwargs
)
providers, options = get_ort_providers(device == "CPU", device, **kwargs) providers, options = get_ort_providers(device == "CPU", device, **kwargs)
if providers[0] == "CPUExecutionProvider":
# In the default image, ONNXRuntime is used so we will only get CPUExecutionProvider
# In other images we will get CUDA / ROCm which are preferred over OpenVINO
# There is currently no way to prioritize OpenVINO over CUDA / ROCm in these images
if device != "CPU" and is_openvino_gpu_npu_available():
return OpenVINOModelRunner(model_path, device, model_type, **kwargs)
ortSession = ort.InferenceSession( ortSession = ort.InferenceSession(
model_path, model_path,
providers=providers, providers=providers,
provider_options=options, provider_options=options,
) )
if not complex_model and providers[0] == "CUDAExecutionProvider": if (
not CudaGraphRunner.is_complex_model(model_type)
and providers[0] == "CUDAExecutionProvider"
):
return CudaGraphRunner(ortSession, options[0]["device_id"]) return CudaGraphRunner(ortSession, options[0]["device_id"])
return ONNXModelRunner(ortSession) return ONNXModelRunner(ortSession)

View File

@ -1,18 +1,16 @@
import logging import logging
import numpy as np import numpy as np
import onnxruntime as ort
from pydantic import Field from pydantic import Field
from typing_extensions import Literal from typing_extensions import Literal
from frigate.detectors.detection_api import DetectionApi from frigate.detectors.detection_api import DetectionApi
from frigate.detectors.detection_runners import CudaGraphRunner from frigate.detectors.detection_runners import get_optimized_runner
from frigate.detectors.detector_config import ( from frigate.detectors.detector_config import (
BaseDetectorConfig, BaseDetectorConfig,
ModelTypeEnum, ModelTypeEnum,
) )
from frigate.util.model import ( from frigate.util.model import (
get_ort_providers,
post_process_dfine, post_process_dfine,
post_process_rfdetr, post_process_rfdetr,
post_process_yolo, post_process_yolo,
@ -38,80 +36,35 @@ class ONNXDetector(DetectionApi):
path = detector_config.model.path path = detector_config.model.path
logger.info(f"ONNX: loading {detector_config.model.path}") logger.info(f"ONNX: loading {detector_config.model.path}")
providers, options = get_ort_providers( self.runner = get_optimized_runner(
detector_config.device == "CPU", detector_config.device path,
) detector_config.device,
model_type=detector_config.model.model_type,
# Enable CUDA Graphs only for supported models when using CUDA EP
if "CUDAExecutionProvider" in providers:
cuda_idx = providers.index("CUDAExecutionProvider")
# mutate only this call's provider options
options[cuda_idx] = {
**options[cuda_idx],
"enable_cuda_graph": True,
}
sess_options = None
if providers[0] == "ROCMExecutionProvider":
# avoid AMD GPU kernel crashes
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = (
ort.GraphOptimizationLevel.ORT_DISABLE_ALL
)
self.model = ort.InferenceSession(
path, providers=providers, provider_options=options
) )
self.onnx_model_type = detector_config.model.model_type self.onnx_model_type = detector_config.model.model_type
self.onnx_model_px = detector_config.model.input_pixel_format self.onnx_model_px = detector_config.model.input_pixel_format
self.onnx_model_shape = detector_config.model.input_tensor self.onnx_model_shape = detector_config.model.input_tensor
path = detector_config.model.path
if self.onnx_model_type == ModelTypeEnum.yolox: if self.onnx_model_type == ModelTypeEnum.yolox:
self.calculate_grids_strides() self.calculate_grids_strides()
self._cuda_device_id = 0
self._cg_runner: CudaGraphRunner | None = None
try:
if "CUDAExecutionProvider" in providers:
self._cuda_device_id = options[cuda_idx].get("device_id", 0)
if options[cuda_idx].get("enable_cuda_graph"):
self._cg_runner = CudaGraphRunner(self.model, self._cuda_device_id)
except Exception:
pass
logger.info(f"ONNX: {path} loaded") logger.info(f"ONNX: {path} loaded")
def detect_raw(self, tensor_input: np.ndarray): def detect_raw(self, tensor_input: np.ndarray):
if self.onnx_model_type == ModelTypeEnum.dfine: if self.onnx_model_type == ModelTypeEnum.dfine:
tensor_output = self.model.run( tensor_output = self.runner.run(
None,
{ {
"images": tensor_input, "images": tensor_input,
"orig_target_sizes": np.array( "orig_target_sizes": np.array(
[[self.height, self.width]], dtype=np.int64 [[self.height, self.width]], dtype=np.int64
), ),
}, }
) )
return post_process_dfine(tensor_output, self.width, self.height) return post_process_dfine(tensor_output, self.width, self.height)
model_input_name = self.model.get_inputs()[0].name model_input_name = self.runner.get_input_names()[0]
tensor_output = self.runner.run({model_input_name: tensor_input})
if self._cg_runner is not None:
try:
# Run using CUDA graphs if available
tensor_output = self._cg_runner.run({model_input_name: tensor_input})
except Exception as e:
logger.warning(f"CUDA Graphs failed, falling back to regular run: {e}")
self._cg_runner = None
tensor_output = self.model.run(None, {model_input_name: tensor_input})
else:
# Use regular run if CUDA graphs are not available
tensor_output = self.model.run(None, {model_input_name: tensor_input})
if self.onnx_model_type == ModelTypeEnum.rfdetr: if self.onnx_model_type == ModelTypeEnum.rfdetr:
return post_process_rfdetr(tensor_output) return post_process_rfdetr(tensor_output)

View File

@ -45,7 +45,7 @@ class OvDetector(DetectionApi):
self.runner = OpenVINOModelRunner( self.runner = OpenVINOModelRunner(
model_path=detector_config.model.path, model_path=detector_config.model.path,
device=detector_config.device, device=detector_config.device,
complex_model=False, model_type=detector_config.model.model_type,
) )
# For dfine models, also pre-allocate target sizes tensor # For dfine models, also pre-allocate target sizes tensor

View File

@ -3,7 +3,6 @@
import logging import logging
import os import os
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from enum import Enum
from io import BytesIO from io import BytesIO
from typing import Any from typing import Any
@ -18,11 +17,6 @@ from frigate.util.downloader import ModelDownloader
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class EmbeddingTypeEnum(str, Enum):
thumbnail = "thumbnail"
description = "description"
class BaseEmbedding(ABC): class BaseEmbedding(ABC):
"""Base embedding class.""" """Base embedding class."""

View File

@ -7,6 +7,7 @@ import numpy as np
from frigate.const import MODEL_CACHE_DIR from frigate.const import MODEL_CACHE_DIR
from frigate.detectors.detection_runners import get_optimized_runner from frigate.detectors.detection_runners import get_optimized_runner
from frigate.embeddings.types import EnrichmentModelTypeEnum
from frigate.log import redirect_output_to_logger from frigate.log import redirect_output_to_logger
from frigate.util.downloader import ModelDownloader from frigate.util.downloader import ModelDownloader
@ -151,7 +152,7 @@ class ArcfaceEmbedding(BaseEmbedding):
self.runner = get_optimized_runner( self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file), os.path.join(self.download_path, self.model_file),
device=self.config.device or "GPU", device=self.config.device or "GPU",
complex_model=False, model_type=EnrichmentModelTypeEnum.arcface.value,
) )
def _preprocess_inputs(self, raw_inputs): def _preprocess_inputs(self, raw_inputs):

View File

@ -4,15 +4,17 @@ import logging
import os import os
import warnings import warnings
# importing this without pytorch or others causes a warning
# https://github.com/huggingface/transformers/issues/27214
# suppressed by setting env TRANSFORMERS_NO_ADVISORY_WARNINGS=1
from transformers import AutoFeatureExtractor, AutoTokenizer from transformers import AutoFeatureExtractor, AutoTokenizer
from transformers.utils.logging import disable_progress_bar from transformers.utils.logging import disable_progress_bar
from frigate.comms.inter_process import InterProcessRequestor from frigate.comms.inter_process import InterProcessRequestor
from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
from frigate.detectors.detection_runners import BaseModelRunner, get_optimized_runner from frigate.detectors.detection_runners import BaseModelRunner, get_optimized_runner
# importing this without pytorch or others causes a warning
# https://github.com/huggingface/transformers/issues/27214
# suppressed by setting env TRANSFORMERS_NO_ADVISORY_WARNINGS=1
from frigate.embeddings.types import EnrichmentModelTypeEnum
from frigate.types import ModelStatusTypesEnum from frigate.types import ModelStatusTypesEnum
from frigate.util.downloader import ModelDownloader from frigate.util.downloader import ModelDownloader
@ -128,6 +130,7 @@ class JinaV1TextEmbedding(BaseEmbedding):
self.runner = get_optimized_runner( self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file), os.path.join(self.download_path, self.model_file),
self.device, self.device,
model_type=EnrichmentModelTypeEnum.jina_v1.value,
) )
def _preprocess_inputs(self, raw_inputs): def _preprocess_inputs(self, raw_inputs):
@ -206,6 +209,7 @@ class JinaV1ImageEmbedding(BaseEmbedding):
self.runner = get_optimized_runner( self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file), os.path.join(self.download_path, self.model_file),
self.device, self.device,
model_type=EnrichmentModelTypeEnum.jina_v1.value,
) )
def _preprocess_inputs(self, raw_inputs): def _preprocess_inputs(self, raw_inputs):

View File

@ -12,6 +12,7 @@ from transformers.utils.logging import disable_progress_bar, set_verbosity_error
from frigate.comms.inter_process import InterProcessRequestor from frigate.comms.inter_process import InterProcessRequestor
from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
from frigate.detectors.detection_runners import get_optimized_runner from frigate.detectors.detection_runners import get_optimized_runner
from frigate.embeddings.types import EnrichmentModelTypeEnum
from frigate.types import ModelStatusTypesEnum from frigate.types import ModelStatusTypesEnum
from frigate.util.downloader import ModelDownloader from frigate.util.downloader import ModelDownloader
@ -128,6 +129,7 @@ class JinaV2Embedding(BaseEmbedding):
self.runner = get_optimized_runner( self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file), os.path.join(self.download_path, self.model_file),
self.device, self.device,
model_type=EnrichmentModelTypeEnum.jina_v2.value,
) )
def _preprocess_image(self, image_data: bytes | Image.Image) -> np.ndarray: def _preprocess_image(self, image_data: bytes | Image.Image) -> np.ndarray:

View File

@ -8,6 +8,7 @@ import numpy as np
from frigate.comms.inter_process import InterProcessRequestor from frigate.comms.inter_process import InterProcessRequestor
from frigate.const import MODEL_CACHE_DIR from frigate.const import MODEL_CACHE_DIR
from frigate.detectors.detection_runners import BaseModelRunner, get_optimized_runner from frigate.detectors.detection_runners import BaseModelRunner, get_optimized_runner
from frigate.embeddings.types import EnrichmentModelTypeEnum
from frigate.types import ModelStatusTypesEnum from frigate.types import ModelStatusTypesEnum
from frigate.util.downloader import ModelDownloader from frigate.util.downloader import ModelDownloader
@ -79,6 +80,7 @@ class PaddleOCRDetection(BaseEmbedding):
self.runner = get_optimized_runner( self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file), os.path.join(self.download_path, self.model_file),
self.device, self.device,
model_type=EnrichmentModelTypeEnum.paddleocr.value,
) )
def _preprocess_inputs(self, raw_inputs): def _preprocess_inputs(self, raw_inputs):
@ -138,6 +140,7 @@ class PaddleOCRClassification(BaseEmbedding):
self.runner = get_optimized_runner( self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file), os.path.join(self.download_path, self.model_file),
self.device, self.device,
model_type=EnrichmentModelTypeEnum.paddleocr.value,
) )
def _preprocess_inputs(self, raw_inputs): def _preprocess_inputs(self, raw_inputs):
@ -198,6 +201,7 @@ class PaddleOCRRecognition(BaseEmbedding):
self.runner = get_optimized_runner( self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file), os.path.join(self.download_path, self.model_file),
self.device, self.device,
model_type=EnrichmentModelTypeEnum.paddleocr.value,
) )
def _preprocess_inputs(self, raw_inputs): def _preprocess_inputs(self, raw_inputs):
@ -258,7 +262,7 @@ class LicensePlateDetector(BaseEmbedding):
self.runner = get_optimized_runner( self.runner = get_optimized_runner(
os.path.join(self.download_path, self.model_file), os.path.join(self.download_path, self.model_file),
self.device, self.device,
complex_model=False, model_type="yolov9",
) )
def _preprocess_inputs(self, raw_inputs): def _preprocess_inputs(self, raw_inputs):

View File

@ -0,0 +1,14 @@
from enum import Enum
class EmbeddingTypeEnum(str, Enum):
thumbnail = "thumbnail"
description = "description"
class EnrichmentModelTypeEnum(str, Enum):
arcface = "arcface"
facenet = "facenet"
jina_v1 = "jina_v1"
jina_v2 = "jina_v2"
paddleocr = "paddleocr"