mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-09-23 17:52:05 +02:00
Cleanup onnx detector (#20128)
* Cleanup onnx detector * Fix * Fix classification cropping * Deprioritize openvino * Send model type * Use model type to decide if model can use full optimization * Clenanup * Cleanup
This commit is contained in:
parent
c05e260ae9
commit
68f806bb61
@ -133,9 +133,9 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
|
||||
x:x2,
|
||||
]
|
||||
|
||||
if input.shape != (224, 224):
|
||||
if frame.shape != (224, 224):
|
||||
try:
|
||||
input = cv2.resize(input, (224, 224))
|
||||
frame = cv2.resize(frame, (224, 224))
|
||||
except Exception:
|
||||
logger.warning("Failed to resize image for state classification")
|
||||
return
|
||||
@ -258,11 +258,11 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi):
|
||||
x:x2,
|
||||
]
|
||||
|
||||
if input.shape != (224, 224):
|
||||
if crop.shape != (224, 224):
|
||||
try:
|
||||
input = cv2.resize(input, (224, 224))
|
||||
crop = cv2.resize(crop, (224, 224))
|
||||
except Exception:
|
||||
logger.warning("Failed to resize image for object classification")
|
||||
logger.warning("Failed to resize image for state classification")
|
||||
return
|
||||
|
||||
input = np.expand_dims(crop, axis=0)
|
||||
|
@ -101,6 +101,19 @@ class CudaGraphRunner(BaseModelRunner):
|
||||
for more complex models like CLIP or PaddleOCR.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def is_complex_model(model_type: str) -> bool:
|
||||
# Import here to avoid circular imports
|
||||
from frigate.detectors.detector_config import ModelTypeEnum
|
||||
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
||||
|
||||
return model_type in [
|
||||
ModelTypeEnum.yolonas.value,
|
||||
EnrichmentModelTypeEnum.paddleocr.value,
|
||||
EnrichmentModelTypeEnum.jina_v1.value,
|
||||
EnrichmentModelTypeEnum.jina_v2.value,
|
||||
]
|
||||
|
||||
def __init__(self, session: ort.InferenceSession, cuda_device_id: int):
|
||||
self._session = session
|
||||
self._cuda_device_id = cuda_device_id
|
||||
@ -156,10 +169,17 @@ class CudaGraphRunner(BaseModelRunner):
|
||||
class OpenVINOModelRunner(BaseModelRunner):
|
||||
"""OpenVINO model runner that handles inference efficiently."""
|
||||
|
||||
def __init__(self, model_path: str, device: str, complex_model: bool, **kwargs):
|
||||
@staticmethod
|
||||
def is_complex_model(model_type: str) -> bool:
|
||||
# Import here to avoid circular imports
|
||||
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
||||
|
||||
return model_type in [EnrichmentModelTypeEnum.paddleocr.value]
|
||||
|
||||
def __init__(self, model_path: str, device: str, model_type: str, **kwargs):
|
||||
self.model_path = model_path
|
||||
self.device = device
|
||||
self.complex_model = complex_model
|
||||
self.complex_model = OpenVINOModelRunner.is_complex_model(model_type)
|
||||
|
||||
if not os.path.isfile(model_path):
|
||||
raise FileNotFoundError(f"OpenVINO model file {model_path} not found.")
|
||||
@ -183,7 +203,7 @@ class OpenVINOModelRunner(BaseModelRunner):
|
||||
self.infer_request = self.compiled_model.create_infer_request()
|
||||
self.input_tensor: ov.Tensor | None = None
|
||||
|
||||
if not complex_model:
|
||||
if not self.complex_model:
|
||||
try:
|
||||
input_shape = self.compiled_model.inputs[0].get_shape()
|
||||
input_element_type = self.compiled_model.inputs[0].get_element_type()
|
||||
@ -381,28 +401,35 @@ class RKNNModelRunner(BaseModelRunner):
|
||||
|
||||
|
||||
def get_optimized_runner(
|
||||
model_path: str, device: str | None, complex_model: bool = True, **kwargs
|
||||
model_path: str, device: str | None, model_type: str, **kwargs
|
||||
) -> BaseModelRunner:
|
||||
"""Get an optimized runner for the hardware."""
|
||||
device = device or "AUTO"
|
||||
if is_rknn_compatible(model_path):
|
||||
rknn_path = auto_convert_model(model_path)
|
||||
|
||||
if rknn_path:
|
||||
return RKNNModelRunner(rknn_path)
|
||||
|
||||
if device != "CPU" and is_openvino_gpu_npu_available():
|
||||
return OpenVINOModelRunner(
|
||||
model_path, device or "AUTO", complex_model, **kwargs
|
||||
)
|
||||
|
||||
providers, options = get_ort_providers(device == "CPU", device, **kwargs)
|
||||
|
||||
if providers[0] == "CPUExecutionProvider":
|
||||
# In the default image, ONNXRuntime is used so we will only get CPUExecutionProvider
|
||||
# In other images we will get CUDA / ROCm which are preferred over OpenVINO
|
||||
# There is currently no way to prioritize OpenVINO over CUDA / ROCm in these images
|
||||
if device != "CPU" and is_openvino_gpu_npu_available():
|
||||
return OpenVINOModelRunner(model_path, device, model_type, **kwargs)
|
||||
|
||||
ortSession = ort.InferenceSession(
|
||||
model_path,
|
||||
providers=providers,
|
||||
provider_options=options,
|
||||
)
|
||||
|
||||
if not complex_model and providers[0] == "CUDAExecutionProvider":
|
||||
if (
|
||||
not CudaGraphRunner.is_complex_model(model_type)
|
||||
and providers[0] == "CUDAExecutionProvider"
|
||||
):
|
||||
return CudaGraphRunner(ortSession, options[0]["device_id"])
|
||||
|
||||
return ONNXModelRunner(ortSession)
|
||||
|
@ -1,18 +1,16 @@
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
from pydantic import Field
|
||||
from typing_extensions import Literal
|
||||
|
||||
from frigate.detectors.detection_api import DetectionApi
|
||||
from frigate.detectors.detection_runners import CudaGraphRunner
|
||||
from frigate.detectors.detection_runners import get_optimized_runner
|
||||
from frigate.detectors.detector_config import (
|
||||
BaseDetectorConfig,
|
||||
ModelTypeEnum,
|
||||
)
|
||||
from frigate.util.model import (
|
||||
get_ort_providers,
|
||||
post_process_dfine,
|
||||
post_process_rfdetr,
|
||||
post_process_yolo,
|
||||
@ -38,80 +36,35 @@ class ONNXDetector(DetectionApi):
|
||||
path = detector_config.model.path
|
||||
logger.info(f"ONNX: loading {detector_config.model.path}")
|
||||
|
||||
providers, options = get_ort_providers(
|
||||
detector_config.device == "CPU", detector_config.device
|
||||
)
|
||||
|
||||
# Enable CUDA Graphs only for supported models when using CUDA EP
|
||||
if "CUDAExecutionProvider" in providers:
|
||||
cuda_idx = providers.index("CUDAExecutionProvider")
|
||||
# mutate only this call's provider options
|
||||
options[cuda_idx] = {
|
||||
**options[cuda_idx],
|
||||
"enable_cuda_graph": True,
|
||||
}
|
||||
|
||||
sess_options = None
|
||||
|
||||
if providers[0] == "ROCMExecutionProvider":
|
||||
# avoid AMD GPU kernel crashes
|
||||
sess_options = ort.SessionOptions()
|
||||
sess_options.graph_optimization_level = (
|
||||
ort.GraphOptimizationLevel.ORT_DISABLE_ALL
|
||||
)
|
||||
|
||||
self.model = ort.InferenceSession(
|
||||
path, providers=providers, provider_options=options
|
||||
self.runner = get_optimized_runner(
|
||||
path,
|
||||
detector_config.device,
|
||||
model_type=detector_config.model.model_type,
|
||||
)
|
||||
|
||||
self.onnx_model_type = detector_config.model.model_type
|
||||
self.onnx_model_px = detector_config.model.input_pixel_format
|
||||
self.onnx_model_shape = detector_config.model.input_tensor
|
||||
path = detector_config.model.path
|
||||
|
||||
if self.onnx_model_type == ModelTypeEnum.yolox:
|
||||
self.calculate_grids_strides()
|
||||
|
||||
self._cuda_device_id = 0
|
||||
self._cg_runner: CudaGraphRunner | None = None
|
||||
|
||||
try:
|
||||
if "CUDAExecutionProvider" in providers:
|
||||
self._cuda_device_id = options[cuda_idx].get("device_id", 0)
|
||||
|
||||
if options[cuda_idx].get("enable_cuda_graph"):
|
||||
self._cg_runner = CudaGraphRunner(self.model, self._cuda_device_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.info(f"ONNX: {path} loaded")
|
||||
|
||||
def detect_raw(self, tensor_input: np.ndarray):
|
||||
if self.onnx_model_type == ModelTypeEnum.dfine:
|
||||
tensor_output = self.model.run(
|
||||
None,
|
||||
tensor_output = self.runner.run(
|
||||
{
|
||||
"images": tensor_input,
|
||||
"orig_target_sizes": np.array(
|
||||
[[self.height, self.width]], dtype=np.int64
|
||||
),
|
||||
},
|
||||
}
|
||||
)
|
||||
return post_process_dfine(tensor_output, self.width, self.height)
|
||||
|
||||
model_input_name = self.model.get_inputs()[0].name
|
||||
|
||||
if self._cg_runner is not None:
|
||||
try:
|
||||
# Run using CUDA graphs if available
|
||||
tensor_output = self._cg_runner.run({model_input_name: tensor_input})
|
||||
except Exception as e:
|
||||
logger.warning(f"CUDA Graphs failed, falling back to regular run: {e}")
|
||||
self._cg_runner = None
|
||||
tensor_output = self.model.run(None, {model_input_name: tensor_input})
|
||||
else:
|
||||
# Use regular run if CUDA graphs are not available
|
||||
tensor_output = self.model.run(None, {model_input_name: tensor_input})
|
||||
model_input_name = self.runner.get_input_names()[0]
|
||||
tensor_output = self.runner.run({model_input_name: tensor_input})
|
||||
|
||||
if self.onnx_model_type == ModelTypeEnum.rfdetr:
|
||||
return post_process_rfdetr(tensor_output)
|
||||
|
@ -45,7 +45,7 @@ class OvDetector(DetectionApi):
|
||||
self.runner = OpenVINOModelRunner(
|
||||
model_path=detector_config.model.path,
|
||||
device=detector_config.device,
|
||||
complex_model=False,
|
||||
model_type=detector_config.model.model_type,
|
||||
)
|
||||
|
||||
# For dfine models, also pre-allocate target sizes tensor
|
||||
|
@ -3,7 +3,6 @@
|
||||
import logging
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
from io import BytesIO
|
||||
from typing import Any
|
||||
|
||||
@ -18,11 +17,6 @@ from frigate.util.downloader import ModelDownloader
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EmbeddingTypeEnum(str, Enum):
|
||||
thumbnail = "thumbnail"
|
||||
description = "description"
|
||||
|
||||
|
||||
class BaseEmbedding(ABC):
|
||||
"""Base embedding class."""
|
||||
|
||||
|
@ -7,6 +7,7 @@ import numpy as np
|
||||
|
||||
from frigate.const import MODEL_CACHE_DIR
|
||||
from frigate.detectors.detection_runners import get_optimized_runner
|
||||
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
||||
from frigate.log import redirect_output_to_logger
|
||||
from frigate.util.downloader import ModelDownloader
|
||||
|
||||
@ -151,7 +152,7 @@ class ArcfaceEmbedding(BaseEmbedding):
|
||||
self.runner = get_optimized_runner(
|
||||
os.path.join(self.download_path, self.model_file),
|
||||
device=self.config.device or "GPU",
|
||||
complex_model=False,
|
||||
model_type=EnrichmentModelTypeEnum.arcface.value,
|
||||
)
|
||||
|
||||
def _preprocess_inputs(self, raw_inputs):
|
||||
|
@ -4,15 +4,17 @@ import logging
|
||||
import os
|
||||
import warnings
|
||||
|
||||
# importing this without pytorch or others causes a warning
|
||||
# https://github.com/huggingface/transformers/issues/27214
|
||||
# suppressed by setting env TRANSFORMERS_NO_ADVISORY_WARNINGS=1
|
||||
from transformers import AutoFeatureExtractor, AutoTokenizer
|
||||
from transformers.utils.logging import disable_progress_bar
|
||||
|
||||
from frigate.comms.inter_process import InterProcessRequestor
|
||||
from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
|
||||
from frigate.detectors.detection_runners import BaseModelRunner, get_optimized_runner
|
||||
|
||||
# importing this without pytorch or others causes a warning
|
||||
# https://github.com/huggingface/transformers/issues/27214
|
||||
# suppressed by setting env TRANSFORMERS_NO_ADVISORY_WARNINGS=1
|
||||
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
||||
from frigate.types import ModelStatusTypesEnum
|
||||
from frigate.util.downloader import ModelDownloader
|
||||
|
||||
@ -128,6 +130,7 @@ class JinaV1TextEmbedding(BaseEmbedding):
|
||||
self.runner = get_optimized_runner(
|
||||
os.path.join(self.download_path, self.model_file),
|
||||
self.device,
|
||||
model_type=EnrichmentModelTypeEnum.jina_v1.value,
|
||||
)
|
||||
|
||||
def _preprocess_inputs(self, raw_inputs):
|
||||
@ -206,6 +209,7 @@ class JinaV1ImageEmbedding(BaseEmbedding):
|
||||
self.runner = get_optimized_runner(
|
||||
os.path.join(self.download_path, self.model_file),
|
||||
self.device,
|
||||
model_type=EnrichmentModelTypeEnum.jina_v1.value,
|
||||
)
|
||||
|
||||
def _preprocess_inputs(self, raw_inputs):
|
||||
|
@ -12,6 +12,7 @@ from transformers.utils.logging import disable_progress_bar, set_verbosity_error
|
||||
from frigate.comms.inter_process import InterProcessRequestor
|
||||
from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
|
||||
from frigate.detectors.detection_runners import get_optimized_runner
|
||||
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
||||
from frigate.types import ModelStatusTypesEnum
|
||||
from frigate.util.downloader import ModelDownloader
|
||||
|
||||
@ -128,6 +129,7 @@ class JinaV2Embedding(BaseEmbedding):
|
||||
self.runner = get_optimized_runner(
|
||||
os.path.join(self.download_path, self.model_file),
|
||||
self.device,
|
||||
model_type=EnrichmentModelTypeEnum.jina_v2.value,
|
||||
)
|
||||
|
||||
def _preprocess_image(self, image_data: bytes | Image.Image) -> np.ndarray:
|
||||
|
@ -8,6 +8,7 @@ import numpy as np
|
||||
from frigate.comms.inter_process import InterProcessRequestor
|
||||
from frigate.const import MODEL_CACHE_DIR
|
||||
from frigate.detectors.detection_runners import BaseModelRunner, get_optimized_runner
|
||||
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
||||
from frigate.types import ModelStatusTypesEnum
|
||||
from frigate.util.downloader import ModelDownloader
|
||||
|
||||
@ -79,6 +80,7 @@ class PaddleOCRDetection(BaseEmbedding):
|
||||
self.runner = get_optimized_runner(
|
||||
os.path.join(self.download_path, self.model_file),
|
||||
self.device,
|
||||
model_type=EnrichmentModelTypeEnum.paddleocr.value,
|
||||
)
|
||||
|
||||
def _preprocess_inputs(self, raw_inputs):
|
||||
@ -138,6 +140,7 @@ class PaddleOCRClassification(BaseEmbedding):
|
||||
self.runner = get_optimized_runner(
|
||||
os.path.join(self.download_path, self.model_file),
|
||||
self.device,
|
||||
model_type=EnrichmentModelTypeEnum.paddleocr.value,
|
||||
)
|
||||
|
||||
def _preprocess_inputs(self, raw_inputs):
|
||||
@ -198,6 +201,7 @@ class PaddleOCRRecognition(BaseEmbedding):
|
||||
self.runner = get_optimized_runner(
|
||||
os.path.join(self.download_path, self.model_file),
|
||||
self.device,
|
||||
model_type=EnrichmentModelTypeEnum.paddleocr.value,
|
||||
)
|
||||
|
||||
def _preprocess_inputs(self, raw_inputs):
|
||||
@ -258,7 +262,7 @@ class LicensePlateDetector(BaseEmbedding):
|
||||
self.runner = get_optimized_runner(
|
||||
os.path.join(self.download_path, self.model_file),
|
||||
self.device,
|
||||
complex_model=False,
|
||||
model_type="yolov9",
|
||||
)
|
||||
|
||||
def _preprocess_inputs(self, raw_inputs):
|
||||
|
14
frigate/embeddings/types.py
Normal file
14
frigate/embeddings/types.py
Normal file
@ -0,0 +1,14 @@
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class EmbeddingTypeEnum(str, Enum):
|
||||
thumbnail = "thumbnail"
|
||||
description = "description"
|
||||
|
||||
|
||||
class EnrichmentModelTypeEnum(str, Enum):
|
||||
arcface = "arcface"
|
||||
facenet = "facenet"
|
||||
jina_v1 = "jina_v1"
|
||||
jina_v2 = "jina_v2"
|
||||
paddleocr = "paddleocr"
|
Loading…
Reference in New Issue
Block a user