mirror of
https://github.com/blakeblackshear/frigate.git
synced 2024-11-21 19:07:46 +01:00
Openvino models (#14283)
* Enable model conversion cache for openvino * Use openvino directly for onnx embeddings if available * Don't fail if zmq is busy
This commit is contained in:
parent
748087483c
commit
6df541e1fd
@ -142,10 +142,11 @@ class Dispatcher:
|
||||
)
|
||||
|
||||
def handle_update_model_state():
|
||||
model = payload["model"]
|
||||
state = payload["state"]
|
||||
self.model_state[model] = ModelStatusTypesEnum[state]
|
||||
self.publish("model_state", json.dumps(self.model_state))
|
||||
if payload:
|
||||
model = payload["model"]
|
||||
state = payload["state"]
|
||||
self.model_state[model] = ModelStatusTypesEnum[state]
|
||||
self.publish("model_state", json.dumps(self.model_state))
|
||||
|
||||
def handle_model_state():
|
||||
self.publish("model_state", json.dumps(self.model_state.copy()))
|
||||
|
@ -65,8 +65,11 @@ class InterProcessRequestor:
|
||||
|
||||
def send_data(self, topic: str, data: any) -> any:
|
||||
"""Sends data and then waits for reply."""
|
||||
self.socket.send_json((topic, data))
|
||||
return self.socket.recv_json()
|
||||
try:
|
||||
self.socket.send_json((topic, data))
|
||||
return self.socket.recv_json()
|
||||
except zmq.ZMQError:
|
||||
return ""
|
||||
|
||||
def stop(self) -> None:
|
||||
self.socket.close()
|
||||
|
@ -3,6 +3,7 @@ import os
|
||||
|
||||
import numpy as np
|
||||
import openvino as ov
|
||||
import openvino.properties as props
|
||||
from pydantic import Field
|
||||
from typing_extensions import Literal
|
||||
|
||||
@ -34,6 +35,8 @@ class OvDetector(DetectionApi):
|
||||
logger.error(f"OpenVino model file {detector_config.model.path} not found.")
|
||||
raise FileNotFoundError
|
||||
|
||||
os.makedirs("/config/model_cache/openvino", exist_ok=True)
|
||||
self.ov_core.set_property({props.cache_dir: "/config/model_cache/openvino"})
|
||||
self.interpreter = self.ov_core.compile_model(
|
||||
model=detector_config.model.path, device_name=detector_config.device
|
||||
)
|
||||
|
@ -5,7 +5,6 @@ from io import BytesIO
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
import requests
|
||||
from PIL import Image
|
||||
|
||||
@ -19,7 +18,7 @@ from frigate.comms.inter_process import InterProcessRequestor
|
||||
from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
|
||||
from frigate.types import ModelStatusTypesEnum
|
||||
from frigate.util.downloader import ModelDownloader
|
||||
from frigate.util.model import get_ort_providers
|
||||
from frigate.util.model import ONNXModelRunner
|
||||
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
@ -54,16 +53,12 @@ class GenericONNXEmbedding:
|
||||
self.download_urls = download_urls
|
||||
self.embedding_function = embedding_function
|
||||
self.model_type = model_type # 'text' or 'vision'
|
||||
self.providers, self.provider_options = get_ort_providers(
|
||||
force_cpu=device == "CPU",
|
||||
requires_fp16=model_size == "large" or self.model_type == "text",
|
||||
openvino_device=device,
|
||||
)
|
||||
|
||||
self.model_size = model_size
|
||||
self.device = device
|
||||
self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)
|
||||
self.tokenizer = None
|
||||
self.feature_extractor = None
|
||||
self.session = None
|
||||
self.runner = None
|
||||
files_names = list(self.download_urls.keys()) + (
|
||||
[self.tokenizer_file] if self.tokenizer_file else []
|
||||
)
|
||||
@ -124,15 +119,17 @@ class GenericONNXEmbedding:
|
||||
)
|
||||
|
||||
def _load_model_and_tokenizer(self):
|
||||
if self.session is None:
|
||||
if self.runner is None:
|
||||
if self.downloader:
|
||||
self.downloader.wait_for_download()
|
||||
if self.model_type == "text":
|
||||
self.tokenizer = self._load_tokenizer()
|
||||
else:
|
||||
self.feature_extractor = self._load_feature_extractor()
|
||||
self.session = self._load_model(
|
||||
os.path.join(self.download_path, self.model_file)
|
||||
self.runner = ONNXModelRunner(
|
||||
os.path.join(self.download_path, self.model_file),
|
||||
self.device,
|
||||
self.model_size,
|
||||
)
|
||||
|
||||
def _load_tokenizer(self):
|
||||
@ -149,14 +146,6 @@ class GenericONNXEmbedding:
|
||||
f"{MODEL_CACHE_DIR}/{self.model_name}",
|
||||
)
|
||||
|
||||
def _load_model(self, path: str) -> Optional[ort.InferenceSession]:
|
||||
if os.path.exists(path):
|
||||
return ort.InferenceSession(
|
||||
path, providers=self.providers, provider_options=self.provider_options
|
||||
)
|
||||
else:
|
||||
return None
|
||||
|
||||
def _process_image(self, image):
|
||||
if isinstance(image, str):
|
||||
if image.startswith("http"):
|
||||
@ -170,7 +159,7 @@ class GenericONNXEmbedding:
|
||||
) -> List[np.ndarray]:
|
||||
self._load_model_and_tokenizer()
|
||||
|
||||
if self.session is None or (
|
||||
if self.runner is None or (
|
||||
self.tokenizer is None and self.feature_extractor is None
|
||||
):
|
||||
logger.error(
|
||||
@ -188,14 +177,14 @@ class GenericONNXEmbedding:
|
||||
images=processed_images, return_tensors="np"
|
||||
)
|
||||
|
||||
input_names = [input.name for input in self.session.get_inputs()]
|
||||
input_names = self.runner.get_input_names()
|
||||
onnx_inputs = {
|
||||
name: processed_inputs[name]
|
||||
for name in input_names
|
||||
if name in processed_inputs
|
||||
}
|
||||
|
||||
outputs = self.session.run(None, onnx_inputs)
|
||||
outputs = self.runner.run(onnx_inputs)
|
||||
embeddings = self.embedding_function(outputs)
|
||||
|
||||
return [embedding for embedding in embeddings]
|
||||
|
@ -1,9 +1,16 @@
|
||||
"""Model Utils"""
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import onnxruntime as ort
|
||||
|
||||
try:
|
||||
import openvino as ov
|
||||
except ImportError:
|
||||
# openvino is not included
|
||||
pass
|
||||
|
||||
|
||||
def get_ort_providers(
|
||||
force_cpu: bool = False, openvino_device: str = "AUTO", requires_fp16: bool = False
|
||||
@ -42,3 +49,56 @@ def get_ort_providers(
|
||||
options.append({})
|
||||
|
||||
return (providers, options)
|
||||
|
||||
|
||||
class ONNXModelRunner:
|
||||
"""Run onnx models optimally based on available hardware."""
|
||||
|
||||
def __init__(self, model_path: str, device: str, requires_fp16: bool = False):
|
||||
self.model_path = model_path
|
||||
self.ort: ort.InferenceSession = None
|
||||
self.ov: ov.Core = None
|
||||
providers, options = get_ort_providers(device == "CPU", device, requires_fp16)
|
||||
|
||||
if "OpenVINOExecutionProvider" in providers:
|
||||
# use OpenVINO directly
|
||||
self.type = "ov"
|
||||
self.ov = ov.Core()
|
||||
self.ov.set_property(
|
||||
{ov.properties.cache_dir: "/config/model_cache/openvino"}
|
||||
)
|
||||
self.interpreter = self.ov.compile_model(
|
||||
model=model_path, device_name=device
|
||||
)
|
||||
else:
|
||||
# Use ONNXRuntime
|
||||
self.type = "ort"
|
||||
self.ort = ort.InferenceSession(
|
||||
model_path, providers=providers, provider_options=options
|
||||
)
|
||||
|
||||
def get_input_names(self) -> list[str]:
|
||||
if self.type == "ov":
|
||||
input_names = []
|
||||
|
||||
for input in self.interpreter.inputs:
|
||||
input_names.extend(input.names)
|
||||
|
||||
return input_names
|
||||
elif self.type == "ort":
|
||||
return [input.name for input in self.ort.get_inputs()]
|
||||
|
||||
def run(self, input: dict[str, Any]) -> Any:
|
||||
if self.type == "ov":
|
||||
infer_request = self.interpreter.create_infer_request()
|
||||
input_tensor = list(input.values())
|
||||
|
||||
if len(input_tensor) == 1:
|
||||
input_tensor = ov.Tensor(array=input_tensor[0])
|
||||
else:
|
||||
input_tensor = ov.Tensor(array=input_tensor)
|
||||
|
||||
infer_request.infer(input_tensor)
|
||||
return [infer_request.get_output_tensor().data]
|
||||
elif self.type == "ort":
|
||||
return self.ort.run(None, input)
|
||||
|
Loading…
Reference in New Issue
Block a user