mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-09-23 17:52:05 +02:00
OpenVINO Hardware Improvements (#20071)
* Use OpenVINO directly to detect if devices are available * Cleanup * Update OpenVINO * Cleanup * Don't try to use OpenVINO when CPU is set as device * Catch case where input tensor can't be pre-defined * Cleanup
This commit is contained in:
parent
2d4a0cc584
commit
03fe054078
@ -42,9 +42,8 @@ opencv-python-headless == 4.11.0.*
|
|||||||
opencv-contrib-python == 4.11.0.*
|
opencv-contrib-python == 4.11.0.*
|
||||||
scipy == 1.14.*
|
scipy == 1.14.*
|
||||||
# OpenVino & ONNX
|
# OpenVino & ONNX
|
||||||
openvino == 2025.1.*
|
openvino == 2025.3.*
|
||||||
onnxruntime-openvino == 1.22.* ; platform_machine == 'x86_64'
|
onnxruntime == 1.22.*
|
||||||
onnxruntime == 1.22.* ; platform_machine == 'aarch64'
|
|
||||||
# Embeddings
|
# Embeddings
|
||||||
transformers == 4.45.*
|
transformers == 4.45.*
|
||||||
# Generative AI
|
# Generative AI
|
||||||
|
@ -47,7 +47,7 @@ RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \
|
|||||||
RUN python3 -m pip config set global.break-system-packages true
|
RUN python3 -m pip config set global.break-system-packages true
|
||||||
|
|
||||||
COPY docker/rocm/requirements-wheels-rocm.txt /requirements.txt
|
COPY docker/rocm/requirements-wheels-rocm.txt /requirements.txt
|
||||||
RUN pip3 uninstall -y onnxruntime-openvino \
|
RUN pip3 uninstall -y onnxruntime \
|
||||||
&& pip3 install -r /requirements.txt
|
&& pip3 install -r /requirements.txt
|
||||||
|
|
||||||
#######################################################################
|
#######################################################################
|
||||||
|
@ -21,7 +21,7 @@ FROM deps AS frigate-tensorrt
|
|||||||
ARG PIP_BREAK_SYSTEM_PACKAGES
|
ARG PIP_BREAK_SYSTEM_PACKAGES
|
||||||
|
|
||||||
RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
|
RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
|
||||||
pip3 uninstall -y onnxruntime-openvino tensorflow-cpu \
|
pip3 uninstall -y onnxruntime tensorflow-cpu \
|
||||||
&& pip3 install -U /deps/trt-wheels/*.whl
|
&& pip3 install -U /deps/trt-wheels/*.whl
|
||||||
|
|
||||||
COPY --from=rootfs / /
|
COPY --from=rootfs / /
|
||||||
|
@ -20,6 +20,38 @@ except ImportError:
|
|||||||
ov = None
|
ov = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_openvino_available_devices() -> list[str]:
|
||||||
|
"""Get available OpenVINO devices without using ONNX Runtime.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of available OpenVINO device names (e.g., ['CPU', 'GPU', 'MYRIAD'])
|
||||||
|
"""
|
||||||
|
if ov is None:
|
||||||
|
logger.debug("OpenVINO is not available")
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
core = ov.Core()
|
||||||
|
available_devices = core.available_devices
|
||||||
|
logger.debug(f"OpenVINO available devices: {available_devices}")
|
||||||
|
return available_devices
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to get OpenVINO available devices: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def is_openvino_gpu_npu_available() -> bool:
|
||||||
|
"""Check if OpenVINO GPU or NPU devices are available.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if GPU or NPU devices are available, False otherwise
|
||||||
|
"""
|
||||||
|
available_devices = get_openvino_available_devices()
|
||||||
|
# Check for GPU, NPU, or other acceleration devices (excluding CPU)
|
||||||
|
acceleration_devices = ["GPU", "MYRIAD", "NPU", "GNA", "HDDL"]
|
||||||
|
return any(device in available_devices for device in acceleration_devices)
|
||||||
|
|
||||||
|
|
||||||
class BaseModelRunner(ABC):
|
class BaseModelRunner(ABC):
|
||||||
"""Abstract base class for model runners."""
|
"""Abstract base class for model runners."""
|
||||||
|
|
||||||
@ -148,9 +180,14 @@ class OpenVINOModelRunner(BaseModelRunner):
|
|||||||
|
|
||||||
# Create reusable inference request
|
# Create reusable inference request
|
||||||
self.infer_request = self.compiled_model.create_infer_request()
|
self.infer_request = self.compiled_model.create_infer_request()
|
||||||
input_shape = self.compiled_model.inputs[0].get_shape()
|
|
||||||
input_element_type = self.compiled_model.inputs[0].get_element_type()
|
try:
|
||||||
self.input_tensor = ov.Tensor(input_element_type, input_shape)
|
input_shape = self.compiled_model.inputs[0].get_shape()
|
||||||
|
input_element_type = self.compiled_model.inputs[0].get_element_type()
|
||||||
|
self.input_tensor = ov.Tensor(input_element_type, input_shape)
|
||||||
|
except RuntimeError:
|
||||||
|
# model is complex and has dynamic shape
|
||||||
|
self.input_tensor = None
|
||||||
|
|
||||||
def get_input_names(self) -> list[str]:
|
def get_input_names(self) -> list[str]:
|
||||||
"""Get input names for the model."""
|
"""Get input names for the model."""
|
||||||
@ -172,7 +209,11 @@ class OpenVINOModelRunner(BaseModelRunner):
|
|||||||
List of output tensors
|
List of output tensors
|
||||||
"""
|
"""
|
||||||
# Handle single input case for backward compatibility
|
# Handle single input case for backward compatibility
|
||||||
if len(inputs) == 1 and len(self.compiled_model.inputs) == 1:
|
if (
|
||||||
|
len(inputs) == 1
|
||||||
|
and len(self.compiled_model.inputs) == 1
|
||||||
|
and self.input_tensor is not None
|
||||||
|
):
|
||||||
# Single input case - use the pre-allocated tensor for efficiency
|
# Single input case - use the pre-allocated tensor for efficiency
|
||||||
input_data = list(inputs.values())[0]
|
input_data = list(inputs.values())[0]
|
||||||
np.copyto(self.input_tensor.data, input_data)
|
np.copyto(self.input_tensor.data, input_data)
|
||||||
@ -322,20 +363,10 @@ def get_optimized_runner(
|
|||||||
if rknn_path:
|
if rknn_path:
|
||||||
return RKNNModelRunner(rknn_path)
|
return RKNNModelRunner(rknn_path)
|
||||||
|
|
||||||
providers, options = get_ort_providers(device == "CPU", device, **kwargs)
|
if device != "CPU" and is_openvino_gpu_npu_available():
|
||||||
|
|
||||||
if device == "CPU":
|
|
||||||
return ONNXModelRunner(
|
|
||||||
ort.InferenceSession(
|
|
||||||
model_path,
|
|
||||||
providers=providers,
|
|
||||||
provider_options=options,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if "OpenVINOExecutionProvider" in providers:
|
|
||||||
return OpenVINOModelRunner(model_path, device, **kwargs)
|
return OpenVINOModelRunner(model_path, device, **kwargs)
|
||||||
|
|
||||||
|
providers, options = get_ort_providers(device == "CPU", device, **kwargs)
|
||||||
ortSession = ort.InferenceSession(
|
ortSession = ort.InferenceSession(
|
||||||
model_path,
|
model_path,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
|
@ -338,14 +338,18 @@ def get_ort_providers(
|
|||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
elif provider == "OpenVINOExecutionProvider":
|
elif provider == "OpenVINOExecutionProvider":
|
||||||
os.makedirs(os.path.join(MODEL_CACHE_DIR, "openvino/ort"), exist_ok=True)
|
# OpenVINO is used directly
|
||||||
providers.append(provider)
|
if device == "OpenVINO":
|
||||||
options.append(
|
os.makedirs(
|
||||||
{
|
os.path.join(MODEL_CACHE_DIR, "openvino/ort"), exist_ok=True
|
||||||
"cache_dir": os.path.join(MODEL_CACHE_DIR, "openvino/ort"),
|
)
|
||||||
"device_type": device,
|
providers.append(provider)
|
||||||
}
|
options.append(
|
||||||
)
|
{
|
||||||
|
"cache_dir": os.path.join(MODEL_CACHE_DIR, "openvino/ort"),
|
||||||
|
"device_type": device,
|
||||||
|
}
|
||||||
|
)
|
||||||
elif provider == "MIGraphXExecutionProvider":
|
elif provider == "MIGraphXExecutionProvider":
|
||||||
# MIGraphX uses more CPU than ROCM, while also being the same speed
|
# MIGraphX uses more CPU than ROCM, while also being the same speed
|
||||||
if device == "MIGraphX":
|
if device == "MIGraphX":
|
||||||
|
Loading…
Reference in New Issue
Block a user