From 0de928703facc46fd2ed947dbb2be486b41ae4dc Mon Sep 17 00:00:00 2001
From: Jason Hunter <hunterjm@gmail.com>
Date: Mon, 24 Feb 2025 10:56:01 -0500
Subject: [PATCH] Initial implementation of D-FINE model via ONNX (#16772)

* initial implementation of D-FINE model

* revert docker-compose

* add docs for D-FINE

* remove weird auto-format issue
---
 .devcontainer/devcontainer.json               | 47 +++++++++++++---
 .../onnxruntime-gpu/devcontainer-feature.json | 22 ++++++++
 .../features/onnxruntime-gpu/install.sh       | 15 +++++
 docs/docs/configuration/object_detectors.md   | 55 ++++++++++++++++++-
 frigate/detectors/detector_config.py          |  1 +
 frigate/detectors/plugins/onnx.py             | 17 +++++-
 frigate/util/model.py                         | 27 +++++++++
 7 files changed, 172 insertions(+), 12 deletions(-)
 create mode 100644 .devcontainer/features/onnxruntime-gpu/devcontainer-feature.json
 create mode 100644 .devcontainer/features/onnxruntime-gpu/install.sh

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 63adae73d..c782fb32f 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -8,9 +8,25 @@
   "overrideCommand": false,
   "remoteUser": "vscode",
   "features": {
-    "ghcr.io/devcontainers/features/common-utils:1": {}
+    "ghcr.io/devcontainers/features/common-utils:2": {}
+    // Uncomment the following lines to use ONNX Runtime with CUDA support
+    // "ghcr.io/devcontainers/features/nvidia-cuda:1": {
+    //   "installCudnn": true,
+    //   "installNvtx": true,
+    //   "installToolkit": true,
+    //   "cudaVersion": "12.5",
+    //   "cudnnVersion": "9.4.0.58"
+    // },
+    // "./features/onnxruntime-gpu": {}
   },
-  "forwardPorts": [8971, 5000, 5001, 5173, 8554, 8555],
+  "forwardPorts": [
+    8971,
+    5000,
+    5001,
+    5173,
+    8554,
+    8555
+  ],
   "portsAttributes": {
     "8971": {
       "label": "External NGINX",
@@ -64,10 +80,18 @@
         "editor.formatOnType": true,
         "python.testing.pytestEnabled": false,
         "python.testing.unittestEnabled": true,
-        "python.testing.unittestArgs": ["-v", "-s", "./frigate/test"],
+        "python.testing.unittestArgs": [
+          "-v",
+          "-s",
+          "./frigate/test"
+        ],
         "files.trimTrailingWhitespace": true,
-        "eslint.workingDirectories": ["./web"],
-        "isort.args": ["--settings-path=./pyproject.toml"],
+        "eslint.workingDirectories": [
+          "./web"
+        ],
+        "isort.args": [
+          "--settings-path=./pyproject.toml"
+        ],
         "[python]": {
           "editor.defaultFormatter": "charliermarsh.ruff",
           "editor.formatOnSave": true,
@@ -86,9 +110,16 @@
           ],
           "editor.tabSize": 2
         },
-        "cSpell.ignoreWords": ["rtmp"],
-        "cSpell.words": ["preact", "astype", "hwaccel", "mqtt"]
+        "cSpell.ignoreWords": [
+          "rtmp"
+        ],
+        "cSpell.words": [
+          "preact",
+          "astype",
+          "hwaccel",
+          "mqtt"
+        ]
       }
     }
   }
-}
+}
\ No newline at end of file
diff --git a/.devcontainer/features/onnxruntime-gpu/devcontainer-feature.json b/.devcontainer/features/onnxruntime-gpu/devcontainer-feature.json
new file mode 100644
index 000000000..30514442b
--- /dev/null
+++ b/.devcontainer/features/onnxruntime-gpu/devcontainer-feature.json
@@ -0,0 +1,22 @@
+{
+  "id": "onnxruntime-gpu",
+  "version": "0.0.1",
+  "name": "ONNX Runtime GPU (Nvidia)",
+  "description": "Installs ONNX Runtime for Nvidia GPUs.",
+  "documentationURL": "",
+  "options": {
+    "version": {
+      "type": "string",
+      "proposals": [
+        "latest",
+        "1.20.1",
+        "1.20.0"
+      ],
+      "default": "latest",
+      "description": "Version of ONNX Runtime to install"
+    }
+  },
+  "installsAfter": [
+    "ghcr.io/devcontainers/features/nvidia-cuda"
+  ]
+}
\ No newline at end of file
diff --git a/.devcontainer/features/onnxruntime-gpu/install.sh b/.devcontainer/features/onnxruntime-gpu/install.sh
new file mode 100644
index 000000000..0c090beec
--- /dev/null
+++ b/.devcontainer/features/onnxruntime-gpu/install.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+set -e
+
+VERSION=${VERSION}
+
+python3 -m pip config set global.break-system-packages true
+# if VERSION == "latest" or VERSION is empty, install the latest version
+if [ "$VERSION" == "latest" ] || [ -z "$VERSION" ]; then
+    python3 -m pip install onnxruntime-gpu
+else
+    python3 -m pip install onnxruntime-gpu==$VERSION
+fi
+
+echo "Done!"
\ No newline at end of file
diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md
index 21ba46c2d..bc76779cb 100644
--- a/docs/docs/configuration/object_detectors.md
+++ b/docs/docs/configuration/object_detectors.md
@@ -10,25 +10,31 @@ title: Object Detectors
 Frigate supports multiple different detectors that work on different types of hardware:
 
 **Most Hardware**
+
 - [Coral EdgeTPU](#edge-tpu-detector): The Google Coral EdgeTPU is available in USB and m.2 format allowing for a wide range of compatibility with devices.
 - [Hailo](#hailo-8l): The Hailo8 AI Acceleration module is available in m.2 format with a HAT for RPi devices, offering a wide range of compatibility with devices.
 
 **AMD**
+
 - [ROCm](#amdrocm-gpu-detector): ROCm can run on AMD Discrete GPUs to provide efficient object detection.
 - [ONNX](#onnx): ROCm will automatically be detected and used as a detector in the `-rocm` Frigate image when a supported ONNX model is configured.
 
 **Intel**
+
 - [OpenVino](#openvino-detector): OpenVino can run on Intel Arc GPUs, Intel integrated GPUs, and Intel CPUs to provide efficient object detection.
 - [ONNX](#onnx): OpenVINO will automatically be detected and used as a detector in the default Frigate image when a supported ONNX model is configured.
 
 **Nvidia**
+
 - [TensortRT](#nvidia-tensorrt-detector): TensorRT can run on Nvidia GPUs and Jetson devices, using one of many default models.
 - [ONNX](#onnx): TensorRT will automatically be detected and used as a detector in the `-tensorrt` or `-tensorrt-jp(4/5)` Frigate images when a supported ONNX model is configured.
 
 **Rockchip**
+
 - [RKNN](#rockchip-platform): RKNN models can run on Rockchip devices with included NPUs.
 
 **For Testing**
+
 - [CPU Detector (not recommended for actual use](#cpu-detector-not-recommended): Use a CPU to run tflite model, this is not recommended and in most cases OpenVINO can be used in CPU mode with better results.
 
 :::
@@ -147,7 +153,6 @@ model:
   path: /config/model_cache/h8l_cache/ssd_mobilenet_v1.hef
 ```
 
-
 ## OpenVINO Detector
 
 The OpenVINO detector type runs an OpenVINO IR model on AMD and Intel CPUs, Intel GPUs and Intel VPU hardware. To configure an OpenVINO detector, set the `"type"` attribute to `"openvino"`.
@@ -412,7 +417,7 @@ When using docker compose:
 ```yaml
 services:
   frigate:
-...
+
 environment:
   HSA_OVERRIDE_GFX_VERSION: "9.0.0"
 ```
@@ -555,6 +560,50 @@ model:
 
 Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
 
+#### D-FINE
+
+[D-FINE](https://github.com/Peterande/D-FINE) is the [current state of the art](https://paperswithcode.com/sota/real-time-object-detection-on-coco?p=d-fine-redefine-regression-task-in-detrs-as) at the time of writing. The ONNX exported models are supported, but not included by default.
+
+To export as ONNX:
+
+1. Clone: https://github.com/Peterande/D-FINE and install all dependencies.
+2. Select and download a checkpoint from the [readme](https://github.com/Peterande/D-FINE).
+3. Modify line 58 of `tools/deployment/export_onnx.py` and change batch size to 1: `data = torch.rand(1, 3, 640, 640)`
+4. Run the export, making sure you select the right config, for your checkpoint.
+
+Example:
+
+```
+python3 tools/deployment/export_onnx.py -c configs/dfine/objects365/dfine_hgnetv2_m_obj2coco.yml -r output/dfine_m_obj2coco.pth
+```
+
+:::tip
+
+Model export has only been tested on Linux (or WSL2). Not all dependencies are in `requirements.txt`. Some live in the deployment folder, and some are still missing entirely and must be installed manually.
+
+Make sure you change the batch size to 1 before exporting.
+
+:::
+
+After placing the downloaded onnx model in your config folder, you can use the following configuration:
+
+```yaml
+detectors:
+  onnx:
+    type: onnx
+
+model:
+  model_type: dfine
+  width: 640
+  height: 640
+  input_tensor: nchw
+  input_dtype: float
+  path: /config/model_cache/dfine_m_obj2coco.onnx
+  labelmap_path: /labelmap/coco-80.txt
+```
+
+Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
+
 ## CPU Detector (not recommended)
 
 The CPU detector type runs a TensorFlow Lite model utilizing the CPU without hardware acceleration. It is recommended to use a hardware accelerated detector type instead for better performance. To configure a CPU based detector, set the `"type"` attribute to `"cpu"`.
@@ -704,7 +753,7 @@ To convert a onnx model to the rknn format using the [rknn-toolkit2](https://git
 This is an example configuration file that you need to adjust to your specific onnx model:
 
 ```yaml
-soc: ["rk3562","rk3566", "rk3568", "rk3576", "rk3588"]
+soc: ["rk3562", "rk3566", "rk3568", "rk3576", "rk3588"]
 quantization: false
 
 output_name: "{input_basename}"
diff --git a/frigate/detectors/detector_config.py b/frigate/detectors/detector_config.py
index c8aea0a1d..16599b141 100644
--- a/frigate/detectors/detector_config.py
+++ b/frigate/detectors/detector_config.py
@@ -37,6 +37,7 @@ class ModelTypeEnum(str, Enum):
     yolox = "yolox"
     yolov9 = "yolov9"
     yolonas = "yolonas"
+    dfine = "dfine"
 
 
 class ModelConfig(BaseModel):
diff --git a/frigate/detectors/plugins/onnx.py b/frigate/detectors/plugins/onnx.py
index c8589145a..13a948de9 100644
--- a/frigate/detectors/plugins/onnx.py
+++ b/frigate/detectors/plugins/onnx.py
@@ -9,7 +9,11 @@ from frigate.detectors.detector_config import (
     BaseDetectorConfig,
     ModelTypeEnum,
 )
-from frigate.util.model import get_ort_providers, post_process_yolov9
+from frigate.util.model import (
+    get_ort_providers,
+    post_process_dfine,
+    post_process_yolov9,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -41,6 +45,7 @@ class ONNXDetector(DetectionApi):
         providers, options = get_ort_providers(
             detector_config.device == "CPU", detector_config.device
         )
+
         self.model = ort.InferenceSession(
             path, providers=providers, provider_options=options
         )
@@ -55,6 +60,16 @@ class ONNXDetector(DetectionApi):
         logger.info(f"ONNX: {path} loaded")
 
     def detect_raw(self, tensor_input: np.ndarray):
+        if self.onnx_model_type == ModelTypeEnum.dfine:
+            tensor_output = self.model.run(
+                None,
+                {
+                    "images": tensor_input,
+                    "orig_target_sizes": np.array([[self.h, self.w]], dtype=np.int64),
+                },
+            )
+            return post_process_dfine(tensor_output, self.w, self.h)
+
         model_input_name = self.model.get_inputs()[0].name
         tensor_output = self.model.run(None, {model_input_name: tensor_input})
 
diff --git a/frigate/util/model.py b/frigate/util/model.py
index da7b1a50a..0428a42ff 100644
--- a/frigate/util/model.py
+++ b/frigate/util/model.py
@@ -9,7 +9,34 @@ import onnxruntime as ort
 
 logger = logging.getLogger(__name__)
 
+
 ### Post Processing
+def post_process_dfine(tensor_output: np.ndarray, width, height) -> np.ndarray:
+    class_ids = tensor_output[0][tensor_output[2] > 0.4]
+    boxes = tensor_output[1][tensor_output[2] > 0.4]
+    scores = tensor_output[2][tensor_output[2] > 0.4]
+
+    input_shape = np.array([height, width, height, width])
+    boxes = np.divide(boxes, input_shape, dtype=np.float32)
+    indices = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=0.4, nms_threshold=0.4)
+    detections = np.zeros((20, 6), np.float32)
+
+    for i, (bbox, confidence, class_id) in enumerate(
+        zip(boxes[indices], scores[indices], class_ids[indices])
+    ):
+        if i == 20:
+            break
+
+        detections[i] = [
+            class_id,
+            confidence,
+            bbox[1],
+            bbox[0],
+            bbox[3],
+            bbox[2],
+        ]
+
+    return detections
 
 
 def post_process_yolov9(predictions: np.ndarray, width, height) -> np.ndarray: