From 6df541e1fd989d74ef70b7ffb776ba7822b833e1 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Fri, 11 Oct 2024 10:47:23 -0600 Subject: [PATCH] Openvino models (#14283) * Enable model conversion cache for openvino * Use openvino directly for onnx embeddings if available * Don't fail if zmq is busy --- frigate/comms/dispatcher.py | 9 ++-- frigate/comms/inter_process.py | 7 +++- frigate/detectors/plugins/openvino.py | 3 ++ frigate/embeddings/functions/onnx.py | 35 ++++++---------- frigate/util/model.py | 60 +++++++++++++++++++++++++++ 5 files changed, 85 insertions(+), 29 deletions(-) diff --git a/frigate/comms/dispatcher.py b/frigate/comms/dispatcher.py index 8b310a4bb..4a3862eaf 100644 --- a/frigate/comms/dispatcher.py +++ b/frigate/comms/dispatcher.py @@ -142,10 +142,11 @@ class Dispatcher: ) def handle_update_model_state(): - model = payload["model"] - state = payload["state"] - self.model_state[model] = ModelStatusTypesEnum[state] - self.publish("model_state", json.dumps(self.model_state)) + if payload: + model = payload["model"] + state = payload["state"] + self.model_state[model] = ModelStatusTypesEnum[state] + self.publish("model_state", json.dumps(self.model_state)) def handle_model_state(): self.publish("model_state", json.dumps(self.model_state.copy())) diff --git a/frigate/comms/inter_process.py b/frigate/comms/inter_process.py index 32cec49e4..850e2435c 100644 --- a/frigate/comms/inter_process.py +++ b/frigate/comms/inter_process.py @@ -65,8 +65,11 @@ class InterProcessRequestor: def send_data(self, topic: str, data: any) -> any: """Sends data and then waits for reply.""" - self.socket.send_json((topic, data)) - return self.socket.recv_json() + try: + self.socket.send_json((topic, data)) + return self.socket.recv_json() + except zmq.ZMQError: + return "" def stop(self) -> None: self.socket.close() diff --git a/frigate/detectors/plugins/openvino.py b/frigate/detectors/plugins/openvino.py index 5dc998487..51e48530b 100644 --- a/frigate/detectors/plugins/openvino.py +++ b/frigate/detectors/plugins/openvino.py @@ -3,6 +3,7 @@ import os import numpy as np import openvino as ov +import openvino.properties as props from pydantic import Field from typing_extensions import Literal @@ -34,6 +35,8 @@ class OvDetector(DetectionApi): logger.error(f"OpenVino model file {detector_config.model.path} not found.") raise FileNotFoundError + os.makedirs("/config/model_cache/openvino", exist_ok=True) + self.ov_core.set_property({props.cache_dir: "/config/model_cache/openvino"}) self.interpreter = self.ov_core.compile_model( model=detector_config.model.path, device_name=detector_config.device ) diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py index ae9fe33bc..1e50e07b1 100644 --- a/frigate/embeddings/functions/onnx.py +++ b/frigate/embeddings/functions/onnx.py @@ -5,7 +5,6 @@ from io import BytesIO from typing import Callable, Dict, List, Optional, Union import numpy as np -import onnxruntime as ort import requests from PIL import Image @@ -19,7 +18,7 @@ from frigate.comms.inter_process import InterProcessRequestor from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE from frigate.types import ModelStatusTypesEnum from frigate.util.downloader import ModelDownloader -from frigate.util.model import get_ort_providers +from frigate.util.model import ONNXModelRunner warnings.filterwarnings( "ignore", @@ -54,16 +53,12 @@ class GenericONNXEmbedding: self.download_urls = download_urls self.embedding_function = embedding_function self.model_type = model_type # 'text' or 'vision' - self.providers, self.provider_options = get_ort_providers( - force_cpu=device == "CPU", - requires_fp16=model_size == "large" or self.model_type == "text", - openvino_device=device, - ) - + self.model_size = model_size + self.device = device self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name) self.tokenizer = None self.feature_extractor = None - self.session = None + self.runner = None files_names = list(self.download_urls.keys()) + ( [self.tokenizer_file] if self.tokenizer_file else [] ) @@ -124,15 +119,17 @@ class GenericONNXEmbedding: ) def _load_model_and_tokenizer(self): - if self.session is None: + if self.runner is None: if self.downloader: self.downloader.wait_for_download() if self.model_type == "text": self.tokenizer = self._load_tokenizer() else: self.feature_extractor = self._load_feature_extractor() - self.session = self._load_model( - os.path.join(self.download_path, self.model_file) + self.runner = ONNXModelRunner( + os.path.join(self.download_path, self.model_file), + self.device, + self.model_size, ) def _load_tokenizer(self): @@ -149,14 +146,6 @@ class GenericONNXEmbedding: f"{MODEL_CACHE_DIR}/{self.model_name}", ) - def _load_model(self, path: str) -> Optional[ort.InferenceSession]: - if os.path.exists(path): - return ort.InferenceSession( - path, providers=self.providers, provider_options=self.provider_options - ) - else: - return None - def _process_image(self, image): if isinstance(image, str): if image.startswith("http"): @@ -170,7 +159,7 @@ class GenericONNXEmbedding: ) -> List[np.ndarray]: self._load_model_and_tokenizer() - if self.session is None or ( + if self.runner is None or ( self.tokenizer is None and self.feature_extractor is None ): logger.error( @@ -188,14 +177,14 @@ class GenericONNXEmbedding: images=processed_images, return_tensors="np" ) - input_names = [input.name for input in self.session.get_inputs()] + input_names = self.runner.get_input_names() onnx_inputs = { name: processed_inputs[name] for name in input_names if name in processed_inputs } - outputs = self.session.run(None, onnx_inputs) + outputs = self.runner.run(onnx_inputs) embeddings = self.embedding_function(outputs) return [embedding for embedding in embeddings] diff --git a/frigate/util/model.py b/frigate/util/model.py index fabade387..951e61370 100644 --- a/frigate/util/model.py +++ b/frigate/util/model.py @@ -1,9 +1,16 @@ """Model Utils""" import os +from typing import Any import onnxruntime as ort +try: + import openvino as ov +except ImportError: + # openvino is not included + pass + def get_ort_providers( force_cpu: bool = False, openvino_device: str = "AUTO", requires_fp16: bool = False @@ -42,3 +49,56 @@ def get_ort_providers( options.append({}) return (providers, options) + + +class ONNXModelRunner: + """Run onnx models optimally based on available hardware.""" + + def __init__(self, model_path: str, device: str, requires_fp16: bool = False): + self.model_path = model_path + self.ort: ort.InferenceSession = None + self.ov: ov.Core = None + providers, options = get_ort_providers(device == "CPU", device, requires_fp16) + + if "OpenVINOExecutionProvider" in providers: + # use OpenVINO directly + self.type = "ov" + self.ov = ov.Core() + self.ov.set_property( + {ov.properties.cache_dir: "/config/model_cache/openvino"} + ) + self.interpreter = self.ov.compile_model( + model=model_path, device_name=device + ) + else: + # Use ONNXRuntime + self.type = "ort" + self.ort = ort.InferenceSession( + model_path, providers=providers, provider_options=options + ) + + def get_input_names(self) -> list[str]: + if self.type == "ov": + input_names = [] + + for input in self.interpreter.inputs: + input_names.extend(input.names) + + return input_names + elif self.type == "ort": + return [input.name for input in self.ort.get_inputs()] + + def run(self, input: dict[str, Any]) -> Any: + if self.type == "ov": + infer_request = self.interpreter.create_infer_request() + input_tensor = list(input.values()) + + if len(input_tensor) == 1: + input_tensor = ov.Tensor(array=input_tensor[0]) + else: + input_tensor = ov.Tensor(array=input_tensor) + + infer_request.infer(input_tensor) + return [infer_request.get_output_tensor().data] + elif self.type == "ort": + return self.ort.run(None, input)