Add support for yolox models to onnx detector (#17773)

2025-11-07 01:20:07 +01:00 · 2025-04-18 05:40:06 -06:00 · 2025-04-18 05:40:06 -06:00 · 19aaa64fe9
commit 19aaa64fe9
parent 8270967cdc
5 changed files with 108 additions and 2 deletions
--- a/docs/docs/configuration/object_detectors.md
+++ b/docs/docs/configuration/object_detectors.md
@ -659,7 +659,7 @@ YOLOv3, YOLOv4, YOLOv7, and [YOLOv9](https://github.com/WongKinYiu/yolov9) model

 :::tip

-The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well.
+The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well.  See [the models section](#downloading-yolo-models) for more information on downloading YOLO models for use in Frigate.

 :::

@ -682,6 +682,29 @@ model:

 Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.

+#### YOLOx
+
+[YOLOx](https://github.com/Megvii-BaseDetection/YOLOX) models are supported, but not included by default. See [the models section](#downloading-yolo-models) for more information on downloading the YOLOx model for use in Frigate.
+
+After placing the downloaded onnx model in your config folder, you can use the following configuration:
+
+```yaml
+detectors:
+  onnx:
+    type: onnx
+
+model:
+  model_type: yolox
+  width: 416 # <--- should match the imgsize set during model export
+  height: 416 # <--- should match the imgsize set during model export
+  input_tensor: nchw_denorm
+  input_dtype: float
+  path: /config/model_cache/yolox_tiny.onnx
+  labelmap_path: /labelmap/coco-80.txt
+```
+
+Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
+
 #### RF-DETR

 [RF-DETR](https://github.com/roboflow/rf-detr) is a DETR based model. The ONNX exported models are supported, but not included by default. See [the models section](#downloading-rf-detr-model) for more information on downloading the RF-DETR model for use in Frigate.
@ -962,6 +985,10 @@ The input image size in this notebook is set to 320x320. This results in lower C

 ### Downloading YOLO Models

+#### YOLOx
+
+YOLOx models can be downloaded [from the YOLOx repo](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime).
+
 #### YOLOv3, YOLOv4, and YOLOv7

 To export as ONNX:
--- a/frigate/detectors/detector_config.py
+++ b/frigate/detectors/detector_config.py
@ -31,6 +31,7 @@ class InputTensorEnum(str, Enum):

 class InputDTypeEnum(str, Enum):
    float = "float"
+    float_denorm = "float_denorm"  # non-normalized float
    int = "int"


--- a/frigate/detectors/plugins/onnx.py
+++ b/frigate/detectors/plugins/onnx.py
@ -14,6 +14,7 @@ from frigate.util.model import (
    post_process_dfine,
    post_process_rfdetr,
    post_process_yolo,
+    post_process_yolox,
 )

 logger = logging.getLogger(__name__)
@ -58,6 +59,25 @@ class ONNXDetector(DetectionApi):
        self.onnx_model_shape = detector_config.model.input_tensor
        path = detector_config.model.path

+        if self.onnx_model_type == ModelTypeEnum.yolox:
+            grids = []
+            expanded_strides = []
+
+            # decode and orient predictions
+            strides = [8, 16, 32]
+            hsizes = [self.h // stride for stride in strides]
+            wsizes = [self.w // stride for stride in strides]
+
+            for hsize, wsize, stride in zip(hsizes, wsizes, strides):
+                xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
+                grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
+                grids.append(grid)
+                shape = grid.shape[:2]
+                expanded_strides.append(np.full((*shape, 1), stride))
+
+            self.grids = np.concatenate(grids, 1)
+            self.expanded_strides = np.concatenate(expanded_strides, 1)
+
        logger.info(f"ONNX: {path} loaded")

    def detect_raw(self, tensor_input: np.ndarray):
@ -99,6 +119,10 @@ class ONNXDetector(DetectionApi):
            return detections
        elif self.onnx_model_type == ModelTypeEnum.yologeneric:
            return post_process_yolo(tensor_output, self.w, self.h)
+        elif self.onnx_model_type == ModelTypeEnum.yolox:
+            return post_process_yolox(
+                tensor_output[0], self.w, self.h, self.grids, self.expanded_strides
+            )
        else:
            raise Exception(
                f"{self.onnx_model_type} is currently not supported for onnx. See the docs for more info on supported models."
--- a/frigate/object_detection/base.py
+++ b/frigate/object_detection/base.py
@ -77,6 +77,8 @@ class LocalObjectDetector(ObjectDetector):
        if self.dtype == InputDTypeEnum.float:
            tensor_input = tensor_input.astype(np.float32)
            tensor_input /= 255
+        elif self.dtype == InputDTypeEnum.float_denorm:
+            tensor_input = tensor_input.astype(np.float32)

        return self.detect_api.detect_raw(tensor_input=tensor_input)

--- a/frigate/util/model.py
+++ b/frigate/util/model.py
@ -187,7 +187,12 @@ def __post_process_multipart_yolo(


 def __post_process_nms_yolo(predictions: np.ndarray, width, height) -> np.ndarray:
-    predictions = np.squeeze(predictions).T
+    predictions = np.squeeze(predictions)
+
+    # transpose the output so it has order (inferences, class_ids)
+    if predictions.shape[0] < predictions.shape[1]:
+        predictions = predictions.T
+
    scores = np.max(predictions[:, 4:], axis=1)
    predictions = predictions[scores > 0.4, :]
    scores = scores[scores > 0.4]
@ -225,6 +230,53 @@ def post_process_yolo(output: list[np.ndarray], width: int, height: int) -> np.n
        return __post_process_nms_yolo(output[0], width, height)


+def post_process_yolox(
+    predictions: np.ndarray,
+    width: int,
+    height: int,
+    grids: np.ndarray,
+    expanded_strides: np.ndarray,
+) -> np.ndarray:
+    predictions[..., :2] = (predictions[..., :2] + grids) * expanded_strides
+    predictions[..., 2:4] = np.exp(predictions[..., 2:4]) * expanded_strides
+
+    # process organized predictions
+    predictions = predictions[0]
+    boxes = predictions[:, :4]
+    scores = predictions[:, 4:5] * predictions[:, 5:]
+
+    boxes_xyxy = np.ones_like(boxes)
+    boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2
+    boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2
+    boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2
+    boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2
+
+    cls_inds = scores.argmax(1)
+    scores = scores[np.arange(len(cls_inds)), cls_inds]
+
+    indices = cv2.dnn.NMSBoxes(
+        boxes_xyxy, scores, score_threshold=0.4, nms_threshold=0.4
+    )
+
+    detections = np.zeros((20, 6), np.float32)
+    for i, (bbox, confidence, class_id) in enumerate(
+        zip(boxes_xyxy[indices], scores[indices], cls_inds[indices])
+    ):
+        if i == 20:
+            break
+
+        detections[i] = [
+            class_id,
+            confidence,
+            bbox[1] / height,
+            bbox[0] / width,
+            bbox[3] / height,
+            bbox[2] / width,
+        ]
+
+    return detections
+
+
 ### ONNX Utilities