From 18af06237c301cfb6d08eb4b07a97fefe5a5cb0b Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Sun, 23 Mar 2025 14:02:16 -0600
Subject: [PATCH] Support RF-DETR models with OpenVINO (#17321)

* Add support for openvino to run rf-detr models

* Add more inference time examples

* organize

* Add example to docs

* Add support for yolo generic
---
 docs/docs/configuration/object_detectors.md | 27 +++++++++++
 docs/docs/frigate/hardware.md               | 52 ++++++++++-----------
 frigate/detectors/plugins/onnx.py           |  5 +-
 frigate/detectors/plugins/openvino.py       | 22 +++++++--
 4 files changed, 74 insertions(+), 32 deletions(-)

diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md
index 174343ef4..e027596d2 100644
--- a/docs/docs/configuration/object_detectors.md
+++ b/docs/docs/configuration/object_detectors.md
@@ -340,6 +340,33 @@ model:
 
 Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
 
+#### RF-DETR
+
+[RF-DETR](https://github.com/roboflow/rf-detr) is a DETR based model. The ONNX exported models are supported, but not included by default. See [the models section](#downloading-rf-detr-model) for more informatoin on downloading the RF-DETR model for use in Frigate.
+
+:::warning
+
+Due to the size and complexity of the RF-DETR model, it is only recommended to be run with discrete Arc Graphics Cards.
+
+:::
+
+After placing the downloaded onnx model in your `config/model_cache` folder, you can use the following configuration:
+
+```yaml
+detectors:
+  ov:
+    type: openvino
+    device: GPU
+
+model:
+  model_type: rfdetr
+  width: 560
+  height: 560
+  input_tensor: nchw
+  input_dtype: float
+  path: /config/model_cache/rfdetr.onnx
+```
+
 #### D-FINE
 
 [D-FINE](https://github.com/Peterande/D-FINE) is a DETR based model. The ONNX exported models are supported, but not included by default. See [the models section](#downloading-d-fine-model) for more information on downloading the D-FINE model for use in Frigate.
diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md
index c9bfe16d6..62152996d 100644
--- a/docs/docs/frigate/hardware.md
+++ b/docs/docs/frigate/hardware.md
@@ -40,7 +40,7 @@ Frigate supports multiple different detectors that work on different types of ha
 - [Hailo](#hailo-8): The Hailo8 and Hailo8L AI Acceleration module is available in m.2 format with a HAT for RPi devices offering a wide range of compatibility with devices.
   - [Supports many model architectures](../../configuration/object_detectors#configuration)
   - Runs best with tiny or small size models
-  
+
 - [Google Coral EdgeTPU](#google-coral-tpu): The Google Coral EdgeTPU is available in USB and m.2 format allowing for a wide range of compatibility with devices.
   - [Supports primarily ssdlite and mobilenet model architectures](../../configuration/object_detectors#edge-tpu-detector)
 
@@ -89,7 +89,7 @@ In real-world deployments, even with multiple cameras running concurrently, Frig
 
 ### Google Coral TPU
 
-Frigate supports both the USB and M.2 versions of the Google Coral. 
+Frigate supports both the USB and M.2 versions of the Google Coral.
 - The USB version is compatible with the widest variety of hardware and does not require a driver on the host machine. However, it does lack the automatic throttling features of the other versions.
 - The PCIe and M.2 versions require installation of a driver on the host. Follow the instructions for your version from https://coral.ai
 
@@ -107,23 +107,19 @@ More information is available [in the detector docs](/configuration/object_detec
 
 Inference speeds vary greatly depending on the CPU or GPU used, some known examples of GPU inference times are below:
 
-| Name                 | MobileNetV2 Inference Time | YOLO-NAS Inference Time   | Notes                                  |
-| -------------------- | -------------------------- | ------------------------- | -------------------------------------- |
-| Intel Celeron J4105  | ~ 25 ms                    |                           | Can only run one detector instance     |
-| Intel Celeron N3060  | 130 - 150 ms               |                           | Can only run one detector instance     |
-| Intel Celeron N3205U | ~ 120 ms                   |                           | Can only run one detector instance     |
-| Intel Celeron N4020  | 50 - 200 ms                |                           | Inference speed depends on other loads |
-| Intel i3 6100T       | 15 - 35 ms                 |                           | Can only run one detector instance     |
-| Intel i3 8100        | ~ 15 ms                    |                           |                                        |
-| Intel i5 4590        | ~ 20 ms                    |                           |                                        |
-| Intel i5 6500        | ~ 15 ms                    |                           |                                        |
-| Intel i5 7200u       | 15 - 25 ms                 |                           |                                        |
-| Intel i5 7500        | ~ 15 ms                    |                           |                                        |
-| Intel i5 1135G7      | 10 - 15 ms                 |                           |                                        |
-| Intel i3 12000       |                            | 320: ~ 19 ms 640: ~ 54 ms |                                        |
-| Intel i5 12600K      | ~ 15 ms                    | 320: ~ 20 ms 640: ~ 46 ms |                                        |
-| Intel Arc A380       | ~ 6 ms                     | 320: ~ 10 ms              |                                        |
-| Intel Arc A750       | ~ 4 ms                     | 320: ~ 8 ms               |                                        |
+| Name                 | MobileNetV2 Inference Time | YOLO-NAS Inference Time   | RF-DETR Inference Time    | Notes                                  |
+| -------------------- | -------------------------- | ------------------------- | ------------------------- | -------------------------------------- |
+| Intel i3 6100T       | 15 - 35 ms                 |                           |                           | Can only run one detector instance     |
+| Intel i5 6500        | ~ 15 ms                    |                           |                           |                                        |
+| Intel i5 7200u       | 15 - 25 ms                 |                           |                           |                                        |
+| Intel i5 7500        | ~ 15 ms                    |                           |                           |                                        |
+| Intel i3 8100        | ~ 15 ms                    |                           |                           |                                        |
+| Intel i5 1135G7      | 10 - 15 ms                 |                           |                           |                                        |
+| Intel i3 12000       |                            | 320: ~ 19 ms 640: ~ 54 ms |                           |                                        |
+| Intel i5 12600K      | ~ 15 ms                    | 320: ~ 20 ms 640: ~ 46 ms |                           |                                        |
+| Intel i7 12650H      | ~ 15 ms                    | 320: ~ 20 ms 640: ~ 42 ms | 336: 50 ms                |                                        |
+| Intel Arc A380       | ~ 6 ms                     | 320: ~ 10 ms              |                           |                                        |
+| Intel Arc A750       | ~ 4 ms                     | 320: ~ 8 ms               |                           |                                        |
 
 ### TensorRT - Nvidia GPU
 
@@ -132,15 +128,15 @@ The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which
 Inference speeds will vary greatly depending on the GPU and the model used.
 `tiny` variants are faster than the equivalent non-tiny model, some known examples are below:
 
-| Name            | YoloV7 Inference Time | YOLO-NAS Inference Time   |
-| --------------- | --------------------- | ------------------------- |
-| GTX 1060 6GB    | ~ 7 ms                |                           |
-| GTX 1070        | ~ 6 ms                |                           |
-| GTX 1660 SUPER  | ~ 4 ms                |                           |
-| RTX 3050        | 5 - 7 ms              | 320: ~ 10 ms 640: ~ 16 ms |
-| RTX 3070 Mobile | ~ 5 ms                |                           |
-| Quadro P400 2GB | 20 - 25 ms            |                           |
-| Quadro P2000    | ~ 12 ms               |                           |
+| Name            | YoloV7 Inference Time | YOLO-NAS Inference Time   | RF-DETR Inference Time    |
+| --------------- | --------------------- | ------------------------- | ------------------------- |
+| GTX 1060 6GB    | ~ 7 ms                |                           |                           |
+| GTX 1070        | ~ 6 ms                |                           |                           |
+| GTX 1660 SUPER  | ~ 4 ms                |                           |                           |
+| RTX 3050        | 5 - 7 ms              | 320: ~ 10 ms 640: ~ 16 ms | 336: ~ 16 ms 560: ~ 40 ms |
+| RTX 3070 Mobile | ~ 5 ms                |                           |                           |
+| Quadro P400 2GB | 20 - 25 ms            |                           |                           |
+| Quadro P2000    | ~ 12 ms               |                           |                           |
 
 ### AMD GPUs
 
diff --git a/frigate/detectors/plugins/onnx.py b/frigate/detectors/plugins/onnx.py
index 2679185a9..a10447b48 100644
--- a/frigate/detectors/plugins/onnx.py
+++ b/frigate/detectors/plugins/onnx.py
@@ -97,7 +97,10 @@ class ONNXDetector(DetectionApi):
                     x_max / self.w,
                 ]
             return detections
-        elif self.onnx_model_type == ModelTypeEnum.yolov9:
+        elif (
+            self.onnx_model_type == ModelTypeEnum.yolov9
+            or self.onnx_model_type == ModelTypeEnum.yologeneric
+        ):
             predictions: np.ndarray = tensor_output[0]
             return post_process_yolov9(predictions, self.w, self.h)
         else:
diff --git a/frigate/detectors/plugins/openvino.py b/frigate/detectors/plugins/openvino.py
index 75d956500..d90352772 100644
--- a/frigate/detectors/plugins/openvino.py
+++ b/frigate/detectors/plugins/openvino.py
@@ -10,7 +10,11 @@ from typing_extensions import Literal
 from frigate.const import MODEL_CACHE_DIR
 from frigate.detectors.detection_api import DetectionApi
 from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
-from frigate.util.model import post_process_dfine, post_process_yolov9
+from frigate.util.model import (
+    post_process_dfine,
+    post_process_rfdetr,
+    post_process_yolov9,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -25,11 +29,13 @@ class OvDetectorConfig(BaseDetectorConfig):
 class OvDetector(DetectionApi):
     type_key = DETECTOR_KEY
     supported_models = [
+        ModelTypeEnum.dfine,
+        ModelTypeEnum.rfdetr,
         ModelTypeEnum.ssd,
         ModelTypeEnum.yolonas,
         ModelTypeEnum.yolov9,
+        ModelTypeEnum.yologeneric,
         ModelTypeEnum.yolox,
-        ModelTypeEnum.dfine,
     ]
 
     def __init__(self, detector_config: OvDetectorConfig):
@@ -185,6 +191,13 @@ class OvDetector(DetectionApi):
 
         if self.model_invalid:
             return detections
+        elif self.ov_model_type == ModelTypeEnum.rfdetr:
+            return post_process_rfdetr(
+                [
+                    infer_request.get_output_tensor(0).data,
+                    infer_request.get_output_tensor(1).data,
+                ]
+            )
         elif self.ov_model_type == ModelTypeEnum.ssd:
             results = infer_request.get_output_tensor(0).data[0][0]
 
@@ -219,7 +232,10 @@ class OvDetector(DetectionApi):
                     x_max / self.w,
                 ]
             return detections
-        elif self.ov_model_type == ModelTypeEnum.yolov9:
+        elif (
+            self.ov_model_type == ModelTypeEnum.yolov9
+            or self.ov_model_type == ModelTypeEnum.yologeneric
+        ):
             out_tensor = infer_request.get_output_tensor(0).data
             return post_process_yolov9(out_tensor, self.w, self.h)
         elif self.ov_model_type == ModelTypeEnum.yolox: