mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-09-23 17:52:05 +02:00
OpenVINO Hardware Improvements (#20071)
* Use OpenVINO directly to detect if devices are available * Cleanup * Update OpenVINO * Cleanup * Don't try to use OpenVINO when CPU is set as device * Catch case where input tensor can't be pre-defined * Cleanup
This commit is contained in:
parent
2d4a0cc584
commit
03fe054078
@ -42,9 +42,8 @@ opencv-python-headless == 4.11.0.*
|
||||
opencv-contrib-python == 4.11.0.*
|
||||
scipy == 1.14.*
|
||||
# OpenVino & ONNX
|
||||
openvino == 2025.1.*
|
||||
onnxruntime-openvino == 1.22.* ; platform_machine == 'x86_64'
|
||||
onnxruntime == 1.22.* ; platform_machine == 'aarch64'
|
||||
openvino == 2025.3.*
|
||||
onnxruntime == 1.22.*
|
||||
# Embeddings
|
||||
transformers == 4.45.*
|
||||
# Generative AI
|
||||
|
@ -47,7 +47,7 @@ RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \
|
||||
RUN python3 -m pip config set global.break-system-packages true
|
||||
|
||||
COPY docker/rocm/requirements-wheels-rocm.txt /requirements.txt
|
||||
RUN pip3 uninstall -y onnxruntime-openvino \
|
||||
RUN pip3 uninstall -y onnxruntime \
|
||||
&& pip3 install -r /requirements.txt
|
||||
|
||||
#######################################################################
|
||||
|
@ -21,7 +21,7 @@ FROM deps AS frigate-tensorrt
|
||||
ARG PIP_BREAK_SYSTEM_PACKAGES
|
||||
|
||||
RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
|
||||
pip3 uninstall -y onnxruntime-openvino tensorflow-cpu \
|
||||
pip3 uninstall -y onnxruntime tensorflow-cpu \
|
||||
&& pip3 install -U /deps/trt-wheels/*.whl
|
||||
|
||||
COPY --from=rootfs / /
|
||||
|
@ -20,6 +20,38 @@ except ImportError:
|
||||
ov = None
|
||||
|
||||
|
||||
def get_openvino_available_devices() -> list[str]:
|
||||
"""Get available OpenVINO devices without using ONNX Runtime.
|
||||
|
||||
Returns:
|
||||
List of available OpenVINO device names (e.g., ['CPU', 'GPU', 'MYRIAD'])
|
||||
"""
|
||||
if ov is None:
|
||||
logger.debug("OpenVINO is not available")
|
||||
return []
|
||||
|
||||
try:
|
||||
core = ov.Core()
|
||||
available_devices = core.available_devices
|
||||
logger.debug(f"OpenVINO available devices: {available_devices}")
|
||||
return available_devices
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get OpenVINO available devices: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def is_openvino_gpu_npu_available() -> bool:
|
||||
"""Check if OpenVINO GPU or NPU devices are available.
|
||||
|
||||
Returns:
|
||||
True if GPU or NPU devices are available, False otherwise
|
||||
"""
|
||||
available_devices = get_openvino_available_devices()
|
||||
# Check for GPU, NPU, or other acceleration devices (excluding CPU)
|
||||
acceleration_devices = ["GPU", "MYRIAD", "NPU", "GNA", "HDDL"]
|
||||
return any(device in available_devices for device in acceleration_devices)
|
||||
|
||||
|
||||
class BaseModelRunner(ABC):
|
||||
"""Abstract base class for model runners."""
|
||||
|
||||
@ -148,9 +180,14 @@ class OpenVINOModelRunner(BaseModelRunner):
|
||||
|
||||
# Create reusable inference request
|
||||
self.infer_request = self.compiled_model.create_infer_request()
|
||||
input_shape = self.compiled_model.inputs[0].get_shape()
|
||||
input_element_type = self.compiled_model.inputs[0].get_element_type()
|
||||
self.input_tensor = ov.Tensor(input_element_type, input_shape)
|
||||
|
||||
try:
|
||||
input_shape = self.compiled_model.inputs[0].get_shape()
|
||||
input_element_type = self.compiled_model.inputs[0].get_element_type()
|
||||
self.input_tensor = ov.Tensor(input_element_type, input_shape)
|
||||
except RuntimeError:
|
||||
# model is complex and has dynamic shape
|
||||
self.input_tensor = None
|
||||
|
||||
def get_input_names(self) -> list[str]:
|
||||
"""Get input names for the model."""
|
||||
@ -172,7 +209,11 @@ class OpenVINOModelRunner(BaseModelRunner):
|
||||
List of output tensors
|
||||
"""
|
||||
# Handle single input case for backward compatibility
|
||||
if len(inputs) == 1 and len(self.compiled_model.inputs) == 1:
|
||||
if (
|
||||
len(inputs) == 1
|
||||
and len(self.compiled_model.inputs) == 1
|
||||
and self.input_tensor is not None
|
||||
):
|
||||
# Single input case - use the pre-allocated tensor for efficiency
|
||||
input_data = list(inputs.values())[0]
|
||||
np.copyto(self.input_tensor.data, input_data)
|
||||
@ -322,20 +363,10 @@ def get_optimized_runner(
|
||||
if rknn_path:
|
||||
return RKNNModelRunner(rknn_path)
|
||||
|
||||
providers, options = get_ort_providers(device == "CPU", device, **kwargs)
|
||||
|
||||
if device == "CPU":
|
||||
return ONNXModelRunner(
|
||||
ort.InferenceSession(
|
||||
model_path,
|
||||
providers=providers,
|
||||
provider_options=options,
|
||||
)
|
||||
)
|
||||
|
||||
if "OpenVINOExecutionProvider" in providers:
|
||||
if device != "CPU" and is_openvino_gpu_npu_available():
|
||||
return OpenVINOModelRunner(model_path, device, **kwargs)
|
||||
|
||||
providers, options = get_ort_providers(device == "CPU", device, **kwargs)
|
||||
ortSession = ort.InferenceSession(
|
||||
model_path,
|
||||
providers=providers,
|
||||
|
@ -338,14 +338,18 @@ def get_ort_providers(
|
||||
else:
|
||||
continue
|
||||
elif provider == "OpenVINOExecutionProvider":
|
||||
os.makedirs(os.path.join(MODEL_CACHE_DIR, "openvino/ort"), exist_ok=True)
|
||||
providers.append(provider)
|
||||
options.append(
|
||||
{
|
||||
"cache_dir": os.path.join(MODEL_CACHE_DIR, "openvino/ort"),
|
||||
"device_type": device,
|
||||
}
|
||||
)
|
||||
# OpenVINO is used directly
|
||||
if device == "OpenVINO":
|
||||
os.makedirs(
|
||||
os.path.join(MODEL_CACHE_DIR, "openvino/ort"), exist_ok=True
|
||||
)
|
||||
providers.append(provider)
|
||||
options.append(
|
||||
{
|
||||
"cache_dir": os.path.join(MODEL_CACHE_DIR, "openvino/ort"),
|
||||
"device_type": device,
|
||||
}
|
||||
)
|
||||
elif provider == "MIGraphXExecutionProvider":
|
||||
# MIGraphX uses more CPU than ROCM, while also being the same speed
|
||||
if device == "MIGraphX":
|
||||
|
Loading…
Reference in New Issue
Block a user