OpenVINO Hardware Improvements (#20071)

* Use OpenVINO directly to detect if devices are available

* Cleanup

* Update OpenVINO

* Cleanup

* Don't try to use OpenVINO when CPU is set as device

* Catch case where input tensor can't be pre-defined

* Cleanup
This commit is contained in:
Nicolas Mowen 2025-09-15 07:35:49 -06:00 committed by GitHub
parent 2d4a0cc584
commit 03fe054078
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 63 additions and 29 deletions

View File

@ -42,9 +42,8 @@ opencv-python-headless == 4.11.0.*
opencv-contrib-python == 4.11.0.*
scipy == 1.14.*
# OpenVino & ONNX
openvino == 2025.1.*
onnxruntime-openvino == 1.22.* ; platform_machine == 'x86_64'
onnxruntime == 1.22.* ; platform_machine == 'aarch64'
openvino == 2025.3.*
onnxruntime == 1.22.*
# Embeddings
transformers == 4.45.*
# Generative AI

View File

@ -47,7 +47,7 @@ RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \
RUN python3 -m pip config set global.break-system-packages true
COPY docker/rocm/requirements-wheels-rocm.txt /requirements.txt
RUN pip3 uninstall -y onnxruntime-openvino \
RUN pip3 uninstall -y onnxruntime \
&& pip3 install -r /requirements.txt
#######################################################################

View File

@ -21,7 +21,7 @@ FROM deps AS frigate-tensorrt
ARG PIP_BREAK_SYSTEM_PACKAGES
RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
pip3 uninstall -y onnxruntime-openvino tensorflow-cpu \
pip3 uninstall -y onnxruntime tensorflow-cpu \
&& pip3 install -U /deps/trt-wheels/*.whl
COPY --from=rootfs / /

View File

@ -20,6 +20,38 @@ except ImportError:
ov = None
def get_openvino_available_devices() -> list[str]:
"""Get available OpenVINO devices without using ONNX Runtime.
Returns:
List of available OpenVINO device names (e.g., ['CPU', 'GPU', 'MYRIAD'])
"""
if ov is None:
logger.debug("OpenVINO is not available")
return []
try:
core = ov.Core()
available_devices = core.available_devices
logger.debug(f"OpenVINO available devices: {available_devices}")
return available_devices
except Exception as e:
logger.warning(f"Failed to get OpenVINO available devices: {e}")
return []
def is_openvino_gpu_npu_available() -> bool:
"""Check if OpenVINO GPU or NPU devices are available.
Returns:
True if GPU or NPU devices are available, False otherwise
"""
available_devices = get_openvino_available_devices()
# Check for GPU, NPU, or other acceleration devices (excluding CPU)
acceleration_devices = ["GPU", "MYRIAD", "NPU", "GNA", "HDDL"]
return any(device in available_devices for device in acceleration_devices)
class BaseModelRunner(ABC):
"""Abstract base class for model runners."""
@ -148,9 +180,14 @@ class OpenVINOModelRunner(BaseModelRunner):
# Create reusable inference request
self.infer_request = self.compiled_model.create_infer_request()
input_shape = self.compiled_model.inputs[0].get_shape()
input_element_type = self.compiled_model.inputs[0].get_element_type()
self.input_tensor = ov.Tensor(input_element_type, input_shape)
try:
input_shape = self.compiled_model.inputs[0].get_shape()
input_element_type = self.compiled_model.inputs[0].get_element_type()
self.input_tensor = ov.Tensor(input_element_type, input_shape)
except RuntimeError:
# model is complex and has dynamic shape
self.input_tensor = None
def get_input_names(self) -> list[str]:
"""Get input names for the model."""
@ -172,7 +209,11 @@ class OpenVINOModelRunner(BaseModelRunner):
List of output tensors
"""
# Handle single input case for backward compatibility
if len(inputs) == 1 and len(self.compiled_model.inputs) == 1:
if (
len(inputs) == 1
and len(self.compiled_model.inputs) == 1
and self.input_tensor is not None
):
# Single input case - use the pre-allocated tensor for efficiency
input_data = list(inputs.values())[0]
np.copyto(self.input_tensor.data, input_data)
@ -322,20 +363,10 @@ def get_optimized_runner(
if rknn_path:
return RKNNModelRunner(rknn_path)
providers, options = get_ort_providers(device == "CPU", device, **kwargs)
if device == "CPU":
return ONNXModelRunner(
ort.InferenceSession(
model_path,
providers=providers,
provider_options=options,
)
)
if "OpenVINOExecutionProvider" in providers:
if device != "CPU" and is_openvino_gpu_npu_available():
return OpenVINOModelRunner(model_path, device, **kwargs)
providers, options = get_ort_providers(device == "CPU", device, **kwargs)
ortSession = ort.InferenceSession(
model_path,
providers=providers,

View File

@ -338,14 +338,18 @@ def get_ort_providers(
else:
continue
elif provider == "OpenVINOExecutionProvider":
os.makedirs(os.path.join(MODEL_CACHE_DIR, "openvino/ort"), exist_ok=True)
providers.append(provider)
options.append(
{
"cache_dir": os.path.join(MODEL_CACHE_DIR, "openvino/ort"),
"device_type": device,
}
)
# OpenVINO is used directly
if device == "OpenVINO":
os.makedirs(
os.path.join(MODEL_CACHE_DIR, "openvino/ort"), exist_ok=True
)
providers.append(provider)
options.append(
{
"cache_dir": os.path.join(MODEL_CACHE_DIR, "openvino/ort"),
"device_type": device,
}
)
elif provider == "MIGraphXExecutionProvider":
# MIGraphX uses more CPU than ROCM, while also being the same speed
if device == "MIGraphX":