From 4313fd97aaa308a2b60b65bfd3f6bb334237fc7b Mon Sep 17 00:00:00 2001
From: Blake Blackshear <blake@frigate.video>
Date: Fri, 7 Jun 2024 06:52:08 -0500
Subject: [PATCH] Adds support for YOLO-NAS in OpenVino (#11645)

* update onnxruntime

* support for yolo-nas in openvino

* cleanup notebook

* update docs

* improve docs

* handle AUTO issue and update docs
---
 docker/main/requirements-wheels.txt         |  2 +-
 docker/main/rootfs/labelmap/coco.txt        | 91 +++++++++++++++++++++
 docs/docs/configuration/object_detectors.md | 82 ++++++++++---------
 frigate/detectors/detection_api.py          |  2 +
 frigate/detectors/plugins/openvino.py       | 73 +++++++++++++++--
 notebooks/YOLO_NAS_Pretrained_Export.ipynb  | 75 +++++++++++++++++
 6 files changed, 279 insertions(+), 46 deletions(-)
 create mode 100644 docker/main/rootfs/labelmap/coco.txt
 create mode 100644 notebooks/YOLO_NAS_Pretrained_Export.ipynb

diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt
index c416c8915..6c8a8629e 100644
--- a/docker/main/requirements-wheels.txt
+++ b/docker/main/requirements-wheels.txt
@@ -29,5 +29,5 @@ norfair == 2.2.*
 setproctitle == 1.3.*
 ws4py == 0.5.*
 unidecode == 1.3.*
-onnxruntime == 1.16.*
+onnxruntime == 1.18.*
 openvino == 2024.1.*
diff --git a/docker/main/rootfs/labelmap/coco.txt b/docker/main/rootfs/labelmap/coco.txt
new file mode 100644
index 000000000..79fff1772
--- /dev/null
+++ b/docker/main/rootfs/labelmap/coco.txt
@@ -0,0 +1,91 @@
+0  person
+1  bicycle
+2  car
+3  motorcycle
+4  airplane
+5  bus
+6  train
+7  car
+8  boat
+9  traffic light
+10  fire hydrant
+11  street sign
+12  stop sign
+13  parking meter
+14  bench
+15  bird
+16  cat
+17  dog
+18  horse
+19  sheep
+20  cow
+21  elephant
+22  bear
+23  zebra
+24  giraffe
+25  hat
+26  backpack
+27  umbrella
+28  shoe
+29  eye glasses
+30  handbag
+31  tie
+32  suitcase
+33  frisbee
+34  skis
+35  snowboard
+36  sports ball
+37  kite
+38  baseball bat
+39  baseball glove
+40  skateboard
+41  surfboard
+42  tennis racket
+43  bottle
+44  plate
+45  wine glass
+46  cup
+47  fork
+48  knife
+49  spoon
+50  bowl
+51  banana
+52  apple
+53  sandwich
+54  orange
+55  broccoli
+56  carrot
+57  hot dog
+58  pizza
+59  donut
+60  cake
+61  chair
+62  couch
+63  potted plant
+64  bed
+65  mirror
+66  dining table
+67  window
+68  desk
+69  toilet
+70  door
+71  tv
+72  laptop
+73  mouse
+74  remote
+75  keyboard
+76  cell phone
+77  microwave
+78  oven
+79  toaster
+80  sink
+81  refrigerator
+82  blender
+83  book
+84  clock
+85  vase
+86  scissors
+87  teddy bear
+88  hair drier
+89  toothbrush
+90  hair brush
\ No newline at end of file
diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md
index f3a389798..55ef1e156 100644
--- a/docs/docs/configuration/object_detectors.md
+++ b/docs/docs/configuration/object_detectors.md
@@ -109,9 +109,13 @@ detectors:
 
 The OpenVINO detector type runs an OpenVINO IR model on AMD and Intel CPUs, Intel GPUs and Intel VPU hardware. To configure an OpenVINO detector, set the `"type"` attribute to `"openvino"`.
 
-The OpenVINO device to be used is specified using the `"device"` attribute according to the naming conventions in the [Device Documentation](https://docs.openvino.ai/latest/openvino_docs_OV_UG_Working_with_devices.html). Other supported devices could be `AUTO`, `CPU`, `GPU`, `MYRIAD`, etc. If not specified, the default OpenVINO device will be selected by the `AUTO` plugin.
+The OpenVINO device to be used is specified using the `"device"` attribute according to the naming conventions in the [Device Documentation](https://docs.openvino.ai/2024/openvino-workflow/running-inference/inference-devices-and-modes.html). The most common devices are `CPU` and `GPU`. Currently, there is a known issue with using `AUTO`. For backwards compatibility, Frigate will attempt to use `GPU` if `AUTO` is set in your configuration.
 
-OpenVINO is supported on 6th Gen Intel platforms (Skylake) and newer. It will also run on AMD CPUs despite having no official support for it. A supported Intel platform is required to use the `GPU` device with OpenVINO. The `MYRIAD` device may be run on any platform, including Arm devices. For detailed system requirements, see [OpenVINO System Requirements](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html)
+OpenVINO is supported on 6th Gen Intel platforms (Skylake) and newer. It will also run on AMD CPUs despite having no official support for it. A supported Intel platform is required to use the `GPU` device with OpenVINO. For detailed system requirements, see [OpenVINO System Requirements](https://docs.openvino.ai/2024/about-openvino/release-notes-openvino/system-requirements.html)
+
+### Supported Models
+
+#### SSDLite MobileNet v2
 
 An OpenVINO model is provided in the container at `/openvino-model/ssdlite_mobilenet_v2.xml` and is used by this detector type by default. The model comes from Intel's Open Model Zoo [SSDLite MobileNet V2](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssdlite_mobilenet_v2) and is converted to an FP16 precision IR model. Use the model configuration shown below when using the OpenVINO detector with the default model.
 
@@ -119,27 +123,26 @@ An OpenVINO model is provided in the container at `/openvino-model/ssdlite_mobil
 detectors:
   ov:
     type: openvino
-    device: AUTO
-    model:
-      path: /openvino-model/ssdlite_mobilenet_v2.xml
+    device: GPU
 
 model:
   width: 300
   height: 300
   input_tensor: nhwc
   input_pixel_format: bgr
+  path: /openvino-model/ssdlite_mobilenet_v2.xml
   labelmap_path: /openvino-model/coco_91cl_bkgr.txt
 ```
 
-This detector also supports YOLOX. Other YOLO variants are not officially supported/tested. Frigate does not come with any yolo models preloaded, so you will need to supply your own models. This detector has been verified to work with the [yolox_tiny](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolox-tiny) model from Intel's Open Model Zoo. You can follow [these instructions](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolox-tiny#download-a-model-and-convert-it-into-openvino-ir-format) to retrieve the OpenVINO-compatible `yolox_tiny` model. Make sure that the model input dimensions match the `width` and `height` parameters, and `model_type` is set accordingly. See [Full Configuration Reference](/configuration/reference.md) for a list of possible `model_type` options. Below is an example of how `yolox_tiny` can be used in Frigate:
+#### YOLOX
+
+This detector also supports YOLOX. Frigate does not come with any YOLOX models preloaded, so you will need to supply your own models. This detector has been verified to work with the [yolox_tiny](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolox-tiny) model from Intel's Open Model Zoo. You can follow [these instructions](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolox-tiny#download-a-model-and-convert-it-into-openvino-ir-format) to retrieve the OpenVINO-compatible `yolox_tiny` model. Make sure that the model input dimensions match the `width` and `height` parameters, and `model_type` is set accordingly. See [Full Configuration Reference](/configuration/reference.md) for a list of possible `model_type` options. Below is an example of how `yolox_tiny` can be used in Frigate:
 
 ```yaml
 detectors:
   ov:
     type: openvino
-    device: AUTO
-    model:
-      path: /path/to/yolox_tiny.xml
+    device: GPU
 
 model:
   width: 416
@@ -147,38 +150,41 @@ model:
   input_tensor: nchw
   input_pixel_format: bgr
   model_type: yolox
+  path: /path/to/yolox_tiny.xml
   labelmap_path: /path/to/coco_80cl.txt
 ```
 
-### Intel NCS2 VPU and Myriad X Setup
+#### YOLO-NAS
 
-Intel produces a neural net inference acceleration chip called Myriad X. This chip was sold in their Neural Compute Stick 2 (NCS2) which has been discontinued. If intending to use the MYRIAD device for acceleration, additional setup is required to pass through the USB device. The host needs a udev rule installed to handle the NCS2 device.
+[YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS.md) models are supported, but not included by default. You can build and download a compatible model with pre-trained weights using [this notebook](https://github.com/frigate/blob/dev/notebooks/YOLO_NAS_Pretrained_Export.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/blakeblackshear/frigate/blob/dev/notebooks/YOLO_NAS_Pretrained_Export.ipynb).
 
-```bash
-sudo usermod -a -G users "$(whoami)"
-cat <<EOF > 97-myriad-usbboot.rules
-SUBSYSTEM=="usb", ATTRS{idProduct}=="2485", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
-SUBSYSTEM=="usb", ATTRS{idProduct}=="f63b", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
-EOF
-sudo cp 97-myriad-usbboot.rules /etc/udev/rules.d/
-sudo udevadm control --reload-rules
-sudo udevadm trigger
+:::warning
+
+The pre-trained YOLO-NAS weights from DeciAI are subject to their license and can't be used commercially. For more information, see: https://docs.deci.ai/super-gradients/latest/LICENSE.YOLONAS.html
+
+:::
+
+The input image size in this notebook is set to 320x320. This results in lower CPU usage and faster inference times without impacting performance in most cases due to the way Frigate crops video frames to areas of interest before running detection. The notebook and config can be updated to 640x640 if desired.
+
+After placing the downloaded onnx model in your config folder, you can use the following configuration:
+
+```yaml
+detectors:
+  ov:
+    type: openvino
+    device: GPU
+
+model:
+  model_type: yolonas
+  width: 320 # <--- should match whatever was set in notebook
+  height: 320 # <--- should match whatever was set in notebook
+  input_tensor: nchw
+  input_pixel_format: bgr
+  path: /config/yolo_nas_s.onnx
+  labelmap_path: /labelmap/coco-80.txt
 ```
 
-Additionally, the Frigate docker container needs to run with the following configuration:
-
-```bash
---device-cgroup-rule='c 189:\* rmw' -v /dev/bus/usb:/dev/bus/usb
-```
-
-or in your compose file:
-
-```yml
-device_cgroup_rules:
-  - "c 189:* rmw"
-volumes:
-  - /dev/bus/usb:/dev/bus/usb
-```
+Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
 
 ## NVidia TensorRT Detector
 
@@ -354,15 +360,15 @@ model: # required
 
 The correct labelmap must be loaded for each model. If you use a custom model (see notes below), you must make sure to provide the correct labelmap. The table below lists the correct paths for the bundled models:
 
-| `path`              | `labelmap_path`       |
-| ------------------- | --------------------- |
-| deci-fp16-yolonas_* | /labelmap/coco-80.txt |
+| `path`                | `labelmap_path`       |
+| --------------------- | --------------------- |
+| deci-fp16-yolonas\_\* | /labelmap/coco-80.txt |
 
 ### Choosing a model
 
 :::warning
 
-yolo-nas models use weights from DeciAI. These weights are subject to their license and can't be used commercially. For more information, see: https://docs.deci.ai/super-gradients/latest/LICENSE.YOLONAS.html 
+The pre-trained YOLO-NAS weights from DeciAI are subject to their license and can't be used commercially. For more information, see: https://docs.deci.ai/super-gradients/latest/LICENSE.YOLONAS.html
 
 :::
 
diff --git a/frigate/detectors/detection_api.py b/frigate/detectors/detection_api.py
index a142cb0fa..403bce574 100644
--- a/frigate/detectors/detection_api.py
+++ b/frigate/detectors/detection_api.py
@@ -1,5 +1,6 @@
 import logging
 from abc import ABC, abstractmethod
+from typing import List
 
 import numpy as np
 
@@ -10,6 +11,7 @@ logger = logging.getLogger(__name__)
 
 class DetectionApi(ABC):
     type_key: str
+    supported_models: List[ModelTypeEnum]
 
     @abstractmethod
     def __init__(self, detector_config):
diff --git a/frigate/detectors/plugins/openvino.py b/frigate/detectors/plugins/openvino.py
index b87700d0d..4ac610d77 100644
--- a/frigate/detectors/plugins/openvino.py
+++ b/frigate/detectors/plugins/openvino.py
@@ -20,6 +20,7 @@ class OvDetectorConfig(BaseDetectorConfig):
 
 class OvDetector(DetectionApi):
     type_key = DETECTOR_KEY
+    supported_models = [ModelTypeEnum.ssd, ModelTypeEnum.yolonas, ModelTypeEnum.yolox]
 
     def __init__(self, detector_config: OvDetectorConfig):
         self.ov_core = ov.Core()
@@ -28,12 +29,24 @@ class OvDetector(DetectionApi):
         self.h = detector_config.model.height
         self.w = detector_config.model.width
 
+        if detector_config.device == "AUTO":
+            logger.warning(
+                "OpenVINO AUTO device type is not currently supported. Attempting to use GPU instead."
+            )
+            detector_config.device = "GPU"
+
         self.interpreter = self.ov_core.compile_model(
             model=detector_config.model.path, device_name=detector_config.device
         )
 
         self.model_invalid = False
 
+        if self.ov_model_type not in self.supported_models:
+            logger.error(
+                f"OpenVino detector does not support {self.ov_model_type} models."
+            )
+            self.model_invalid = True
+
         # Ensure the SSD model has the right input and output shapes
         if self.ov_model_type == ModelTypeEnum.ssd:
             model_inputs = self.interpreter.inputs
@@ -61,6 +74,34 @@ class OvDetector(DetectionApi):
                 logger.error(f"SSD model output doesn't match. Found {output_shape}.")
                 self.model_invalid = True
 
+        if self.ov_model_type == ModelTypeEnum.yolonas:
+            model_inputs = self.interpreter.inputs
+            model_outputs = self.interpreter.outputs
+
+            if len(model_inputs) != 1:
+                logger.error(
+                    f"YoloNAS models must only have 1 input. Found {len(model_inputs)}."
+                )
+                self.model_invalid = True
+            if len(model_outputs) != 1:
+                logger.error(
+                    f"YoloNAS models must be exported in flat format and only have 1 output. Found {len(model_outputs)}."
+                )
+                self.model_invalid = True
+
+            if model_inputs[0].get_shape() != ov.Shape([1, 3, self.w, self.h]):
+                logger.error(
+                    f"YoloNAS model input doesn't match. Found {model_inputs[0].get_shape()}, but expected {[1, 3, self.w, self.h]}."
+                )
+                self.model_invalid = True
+
+            output_shape = model_outputs[0].partial_shape
+            if output_shape[-1] != 7:
+                logger.error(
+                    f"YoloNAS models must be exported in flat format. Model output doesn't match. Found {output_shape}."
+                )
+                self.model_invalid = True
+
         if self.ov_model_type == ModelTypeEnum.yolox:
             self.output_indexes = 0
             while True:
@@ -113,12 +154,12 @@ class OvDetector(DetectionApi):
         input_tensor = ov.Tensor(array=tensor_input)
         infer_request.infer(input_tensor)
 
+        detections = np.zeros((20, 6), np.float32)
+
+        if self.model_invalid:
+            return detections
+
         if self.ov_model_type == ModelTypeEnum.ssd:
-            detections = np.zeros((20, 6), np.float32)
-
-            if self.model_invalid:
-                return detections
-
             results = infer_request.get_output_tensor(0).data[0][0]
 
             for i, (_, class_id, score, xmin, ymin, xmax, ymax) in enumerate(results):
@@ -134,6 +175,26 @@ class OvDetector(DetectionApi):
                 ]
             return detections
 
+        if self.ov_model_type == ModelTypeEnum.yolonas:
+            predictions = infer_request.get_output_tensor(0).data
+
+            for i, prediction in enumerate(predictions):
+                if i == 20:
+                    break
+                (_, x_min, y_min, x_max, y_max, confidence, class_id) = prediction
+                # when running in GPU mode, empty predictions in the output have class_id of -1
+                if class_id < 0:
+                    break
+                detections[i] = [
+                    class_id,
+                    confidence,
+                    y_min / self.h,
+                    x_min / self.w,
+                    y_max / self.h,
+                    x_max / self.w,
+                ]
+            return detections
+
         if self.ov_model_type == ModelTypeEnum.yolox:
             out_tensor = infer_request.get_output_tensor()
             # [x, y, h, w, box_score, class_no_1, ..., class_no_80],
@@ -155,8 +216,6 @@ class OvDetector(DetectionApi):
 
             ordered = dets[dets[:, 5].argsort()[::-1]][:20]
 
-            detections = np.zeros((20, 6), np.float32)
-
             for i, object_detected in enumerate(ordered):
                 detections[i] = self.process_yolo(
                     object_detected[6], object_detected[5], object_detected[:4]
diff --git a/notebooks/YOLO_NAS_Pretrained_Export.ipynb b/notebooks/YOLO_NAS_Pretrained_Export.ipynb
new file mode 100644
index 000000000..a3c303c01
--- /dev/null
+++ b/notebooks/YOLO_NAS_Pretrained_Export.ipynb
@@ -0,0 +1,75 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rmuF9iKWTbdk"
+      },
+      "outputs": [],
+      "source": [
+        "! pip install -q super_gradients==3.7.1"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dTB0jy_NNSFz"
+      },
+      "outputs": [],
+      "source": [
+        "from super_gradients.common.object_names import Models\n",
+        "from super_gradients.conversion import DetectionOutputFormatMode\n",
+        "from super_gradients.training import models\n",
+        "\n",
+        "model = models.get(Models.YOLO_NAS_S, pretrained_weights=\"coco\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GymUghyCNXem"
+      },
+      "outputs": [],
+      "source": [
+        "# export the model for compatibility with Frigate\n",
+        "\n",
+        "model.export(\"yolo_nas_s.onnx\",\n",
+        "             output_predictions_format=DetectionOutputFormatMode.FLAT_FORMAT,\n",
+        "             max_predictions_per_image=20,\n",
+        "             confidence_threshold=0.4,\n",
+        "             input_image_shape=(320,320),\n",
+        "            )"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "uBhXV5g4Nh42"
+      },
+      "outputs": [],
+      "source": [
+        "from google.colab import files\n",
+        "\n",
+        "files.download('yolo_nas_s.onnx')"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}