diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt index c8c9662ce..4b5a81f56 100644 --- a/docker/main/requirements-wheels.txt +++ b/docker/main/requirements-wheels.txt @@ -42,9 +42,8 @@ opencv-python-headless == 4.11.0.* opencv-contrib-python == 4.11.0.* scipy == 1.14.* # OpenVino & ONNX -openvino == 2025.1.* -onnxruntime-openvino == 1.22.* ; platform_machine == 'x86_64' -onnxruntime == 1.22.* ; platform_machine == 'aarch64' +openvino == 2025.3.* +onnxruntime == 1.22.* # Embeddings transformers == 4.45.* # Generative AI diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile index edd6b50c2..cfd8a92e1 100644 --- a/docker/rocm/Dockerfile +++ b/docker/rocm/Dockerfile @@ -47,7 +47,7 @@ RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \ RUN python3 -m pip config set global.break-system-packages true COPY docker/rocm/requirements-wheels-rocm.txt /requirements.txt -RUN pip3 uninstall -y onnxruntime-openvino \ +RUN pip3 uninstall -y onnxruntime \ && pip3 install -r /requirements.txt ####################################################################### diff --git a/docker/tensorrt/Dockerfile.amd64 b/docker/tensorrt/Dockerfile.amd64 index cbbb28bb6..ef0295a96 100644 --- a/docker/tensorrt/Dockerfile.amd64 +++ b/docker/tensorrt/Dockerfile.amd64 @@ -21,7 +21,7 @@ FROM deps AS frigate-tensorrt ARG PIP_BREAK_SYSTEM_PACKAGES RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \ - pip3 uninstall -y onnxruntime-openvino tensorflow-cpu \ + pip3 uninstall -y onnxruntime tensorflow-cpu \ && pip3 install -U /deps/trt-wheels/*.whl COPY --from=rootfs / / diff --git a/frigate/detectors/detection_runners.py b/frigate/detectors/detection_runners.py index 0b62bd999..145fe79b7 100644 --- a/frigate/detectors/detection_runners.py +++ b/frigate/detectors/detection_runners.py @@ -20,6 +20,38 @@ except ImportError: ov = None +def get_openvino_available_devices() -> list[str]: + """Get available OpenVINO devices without using ONNX Runtime. + + Returns: + List of available OpenVINO device names (e.g., ['CPU', 'GPU', 'MYRIAD']) + """ + if ov is None: + logger.debug("OpenVINO is not available") + return [] + + try: + core = ov.Core() + available_devices = core.available_devices + logger.debug(f"OpenVINO available devices: {available_devices}") + return available_devices + except Exception as e: + logger.warning(f"Failed to get OpenVINO available devices: {e}") + return [] + + +def is_openvino_gpu_npu_available() -> bool: + """Check if OpenVINO GPU or NPU devices are available. + + Returns: + True if GPU or NPU devices are available, False otherwise + """ + available_devices = get_openvino_available_devices() + # Check for GPU, NPU, or other acceleration devices (excluding CPU) + acceleration_devices = ["GPU", "MYRIAD", "NPU", "GNA", "HDDL"] + return any(device in available_devices for device in acceleration_devices) + + class BaseModelRunner(ABC): """Abstract base class for model runners.""" @@ -148,9 +180,14 @@ class OpenVINOModelRunner(BaseModelRunner): # Create reusable inference request self.infer_request = self.compiled_model.create_infer_request() - input_shape = self.compiled_model.inputs[0].get_shape() - input_element_type = self.compiled_model.inputs[0].get_element_type() - self.input_tensor = ov.Tensor(input_element_type, input_shape) + + try: + input_shape = self.compiled_model.inputs[0].get_shape() + input_element_type = self.compiled_model.inputs[0].get_element_type() + self.input_tensor = ov.Tensor(input_element_type, input_shape) + except RuntimeError: + # model is complex and has dynamic shape + self.input_tensor = None def get_input_names(self) -> list[str]: """Get input names for the model.""" @@ -172,7 +209,11 @@ class OpenVINOModelRunner(BaseModelRunner): List of output tensors """ # Handle single input case for backward compatibility - if len(inputs) == 1 and len(self.compiled_model.inputs) == 1: + if ( + len(inputs) == 1 + and len(self.compiled_model.inputs) == 1 + and self.input_tensor is not None + ): # Single input case - use the pre-allocated tensor for efficiency input_data = list(inputs.values())[0] np.copyto(self.input_tensor.data, input_data) @@ -322,20 +363,10 @@ def get_optimized_runner( if rknn_path: return RKNNModelRunner(rknn_path) - providers, options = get_ort_providers(device == "CPU", device, **kwargs) - - if device == "CPU": - return ONNXModelRunner( - ort.InferenceSession( - model_path, - providers=providers, - provider_options=options, - ) - ) - - if "OpenVINOExecutionProvider" in providers: + if device != "CPU" and is_openvino_gpu_npu_available(): return OpenVINOModelRunner(model_path, device, **kwargs) + providers, options = get_ort_providers(device == "CPU", device, **kwargs) ortSession = ort.InferenceSession( model_path, providers=providers, diff --git a/frigate/util/model.py b/frigate/util/model.py index c64287660..45a3a6c8b 100644 --- a/frigate/util/model.py +++ b/frigate/util/model.py @@ -338,14 +338,18 @@ def get_ort_providers( else: continue elif provider == "OpenVINOExecutionProvider": - os.makedirs(os.path.join(MODEL_CACHE_DIR, "openvino/ort"), exist_ok=True) - providers.append(provider) - options.append( - { - "cache_dir": os.path.join(MODEL_CACHE_DIR, "openvino/ort"), - "device_type": device, - } - ) + # OpenVINO is used directly + if device == "OpenVINO": + os.makedirs( + os.path.join(MODEL_CACHE_DIR, "openvino/ort"), exist_ok=True + ) + providers.append(provider) + options.append( + { + "cache_dir": os.path.join(MODEL_CACHE_DIR, "openvino/ort"), + "device_type": device, + } + ) elif provider == "MIGraphXExecutionProvider": # MIGraphX uses more CPU than ROCM, while also being the same speed if device == "MIGraphX":