OpenVINO Hardware Improvements (#20071)

* Use OpenVINO directly to detect if devices are available * Cleanup * Update OpenVINO * Cleanup * Don't try to use OpenVINO when CPU is set as device * Catch case where input tensor can't be pre-defined * Cleanup
2025-09-23 17:52:05 +02:00 · 2025-09-15 07:35:49 -06:00 · 2025-09-15 07:35:49 -06:00 · 03fe054078
commit 03fe054078
parent 2d4a0cc584
5 changed files with 63 additions and 29 deletions
--- a/docker/main/requirements-wheels.txt
+++ b/docker/main/requirements-wheels.txt
@ -42,9 +42,8 @@ opencv-python-headless == 4.11.0.*
 opencv-contrib-python == 4.11.0.*
 scipy == 1.14.*
 # OpenVino & ONNX
-openvino == 2025.1.*
+openvino == 2025.3.*
-onnxruntime-openvino == 1.22.* ; platform_machine == 'x86_64'
+onnxruntime == 1.22.*
 onnxruntime == 1.22.* ; platform_machine == 'aarch64'
 # Embeddings
 transformers == 4.45.*
 # Generative AI
--- a/docker/rocm/Dockerfile
+++ b/docker/rocm/Dockerfile
@ -47,7 +47,7 @@ RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \
 RUN python3 -m pip config set global.break-system-packages true
 COPY docker/rocm/requirements-wheels-rocm.txt /requirements.txt
-RUN pip3 uninstall -y onnxruntime-openvino \
+RUN pip3 uninstall -y onnxruntime \
    && pip3 install -r /requirements.txt
 #######################################################################
--- a/docker/tensorrt/Dockerfile.amd64
+++ b/docker/tensorrt/Dockerfile.amd64
@ -21,7 +21,7 @@ FROM deps AS frigate-tensorrt
 ARG PIP_BREAK_SYSTEM_PACKAGES
 RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
-    pip3 uninstall -y onnxruntime-openvino tensorflow-cpu \
+    pip3 uninstall -y onnxruntime tensorflow-cpu \
    && pip3 install -U /deps/trt-wheels/*.whl
 COPY --from=rootfs / /
--- a/frigate/detectors/detection_runners.py
+++ b/frigate/detectors/detection_runners.py
@ -20,6 +20,38 @@ except ImportError:
    ov = None
 def get_openvino_available_devices() -> list[str]:
    """Get available OpenVINO devices without using ONNX Runtime.
    Returns:
        List of available OpenVINO device names (e.g., ['CPU', 'GPU', 'MYRIAD'])
    """
    if ov is None:
        logger.debug("OpenVINO is not available")
        return []
    try:
        core = ov.Core()
        available_devices = core.available_devices
        logger.debug(f"OpenVINO available devices: {available_devices}")
        return available_devices
    except Exception as e:
        logger.warning(f"Failed to get OpenVINO available devices: {e}")
        return []
 def is_openvino_gpu_npu_available() -> bool:
    """Check if OpenVINO GPU or NPU devices are available.
    Returns:
        True if GPU or NPU devices are available, False otherwise
    """
    available_devices = get_openvino_available_devices()
    # Check for GPU, NPU, or other acceleration devices (excluding CPU)
    acceleration_devices = ["GPU", "MYRIAD", "NPU", "GNA", "HDDL"]
    return any(device in available_devices for device in acceleration_devices)
 class BaseModelRunner(ABC):
    """Abstract base class for model runners."""
@ -148,9 +180,14 @@ class OpenVINOModelRunner(BaseModelRunner):
        # Create reusable inference request
        self.infer_request = self.compiled_model.create_infer_request()
-        input_shape = self.compiled_model.inputs[0].get_shape()
+
-        input_element_type = self.compiled_model.inputs[0].get_element_type()
+        try:
-        self.input_tensor = ov.Tensor(input_element_type, input_shape)
+            input_shape = self.compiled_model.inputs[0].get_shape()
            input_element_type = self.compiled_model.inputs[0].get_element_type()
            self.input_tensor = ov.Tensor(input_element_type, input_shape)
        except RuntimeError:
            # model is complex and has dynamic shape
            self.input_tensor = None
    def get_input_names(self) -> list[str]:
        """Get input names for the model."""
@ -172,7 +209,11 @@ class OpenVINOModelRunner(BaseModelRunner):
            List of output tensors
        """
        # Handle single input case for backward compatibility
-        if len(inputs) == 1 and len(self.compiled_model.inputs) == 1:
+        if (
            len(inputs) == 1
            and len(self.compiled_model.inputs) == 1
            and self.input_tensor is not None
        ):
            # Single input case - use the pre-allocated tensor for efficiency
            input_data = list(inputs.values())[0]
            np.copyto(self.input_tensor.data, input_data)
@ -322,20 +363,10 @@ def get_optimized_runner(
        if rknn_path:
            return RKNNModelRunner(rknn_path)
-    providers, options = get_ort_providers(device == "CPU", device, **kwargs)
+    if device != "CPU" and is_openvino_gpu_npu_available():
    if device == "CPU":
        return ONNXModelRunner(
            ort.InferenceSession(
                model_path,
                providers=providers,
                provider_options=options,
            )
        )
    if "OpenVINOExecutionProvider" in providers:
        return OpenVINOModelRunner(model_path, device, **kwargs)
    providers, options = get_ort_providers(device == "CPU", device, **kwargs)
    ortSession = ort.InferenceSession(
        model_path,
        providers=providers,
--- a/frigate/util/model.py
+++ b/frigate/util/model.py
@ -338,14 +338,18 @@ def get_ort_providers(
            else:
                continue
        elif provider == "OpenVINOExecutionProvider":
-            os.makedirs(os.path.join(MODEL_CACHE_DIR, "openvino/ort"), exist_ok=True)
+            # OpenVINO is used directly
-            providers.append(provider)
+            if device == "OpenVINO":
-            options.append(
+                os.makedirs(
-                {
+                    os.path.join(MODEL_CACHE_DIR, "openvino/ort"), exist_ok=True
-                    "cache_dir": os.path.join(MODEL_CACHE_DIR, "openvino/ort"),
+                )
-                    "device_type": device,
+                providers.append(provider)
-                }
+                options.append(
-            )
+                    {
                        "cache_dir": os.path.join(MODEL_CACHE_DIR, "openvino/ort"),
                        "device_type": device,
                    }
                )
        elif provider == "MIGraphXExecutionProvider":
            # MIGraphX uses more CPU than ROCM, while also being the same speed
            if device == "MIGraphX":