Add support for yolox models to onnx detector (#17773)

2025-07-16 13:47:07 +02:00 · 2025-04-18 05:40:06 -06:00 · 2025-04-18 05:40:06 -06:00 · 19aaa64fe9
commit 19aaa64fe9
parent 8270967cdc
5 changed files with 108 additions and 2 deletions
--- a/docs/docs/configuration/object_detectors.md
+++ b/docs/docs/configuration/object_detectors.md
@ -659,7 +659,7 @@ YOLOv3, YOLOv4, YOLOv7, and [YOLOv9](https://github.com/WongKinYiu/yolov9) model
 :::tip
-The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well.
+The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well.  See [the models section](#downloading-yolo-models) for more information on downloading YOLO models for use in Frigate.
 :::
@ -682,6 +682,29 @@ model:
 Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
 #### YOLOx
 [YOLOx](https://github.com/Megvii-BaseDetection/YOLOX) models are supported, but not included by default. See [the models section](#downloading-yolo-models) for more information on downloading the YOLOx model for use in Frigate.
 After placing the downloaded onnx model in your config folder, you can use the following configuration:
 ```yaml
 detectors:
  onnx:
    type: onnx
 model:
  model_type: yolox
  width: 416 # <--- should match the imgsize set during model export
  height: 416 # <--- should match the imgsize set during model export
  input_tensor: nchw_denorm
  input_dtype: float
  path: /config/model_cache/yolox_tiny.onnx
  labelmap_path: /labelmap/coco-80.txt
 ```
 Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
 #### RF-DETR
 [RF-DETR](https://github.com/roboflow/rf-detr) is a DETR based model. The ONNX exported models are supported, but not included by default. See [the models section](#downloading-rf-detr-model) for more information on downloading the RF-DETR model for use in Frigate.
@ -962,6 +985,10 @@ The input image size in this notebook is set to 320x320. This results in lower C
 ### Downloading YOLO Models
 #### YOLOx
 YOLOx models can be downloaded [from the YOLOx repo](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime).
 #### YOLOv3, YOLOv4, and YOLOv7
 To export as ONNX:
--- a/frigate/detectors/detector_config.py
+++ b/frigate/detectors/detector_config.py
@ -31,6 +31,7 @@ class InputTensorEnum(str, Enum):
 class InputDTypeEnum(str, Enum):
    float = "float"
    float_denorm = "float_denorm"  # non-normalized float
    int = "int"
--- a/frigate/detectors/plugins/onnx.py
+++ b/frigate/detectors/plugins/onnx.py
@ -14,6 +14,7 @@ from frigate.util.model import (
    post_process_dfine,
    post_process_rfdetr,
    post_process_yolo,
    post_process_yolox,
 )
 logger = logging.getLogger(__name__)
@ -58,6 +59,25 @@ class ONNXDetector(DetectionApi):
        self.onnx_model_shape = detector_config.model.input_tensor
        path = detector_config.model.path
        if self.onnx_model_type == ModelTypeEnum.yolox:
            grids = []
            expanded_strides = []
            # decode and orient predictions
            strides = [8, 16, 32]
            hsizes = [self.h // stride for stride in strides]
            wsizes = [self.w // stride for stride in strides]
            for hsize, wsize, stride in zip(hsizes, wsizes, strides):
                xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
                grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
                grids.append(grid)
                shape = grid.shape[:2]
                expanded_strides.append(np.full((*shape, 1), stride))
            self.grids = np.concatenate(grids, 1)
            self.expanded_strides = np.concatenate(expanded_strides, 1)
        logger.info(f"ONNX: {path} loaded")
    def detect_raw(self, tensor_input: np.ndarray):
@ -99,6 +119,10 @@ class ONNXDetector(DetectionApi):
            return detections
        elif self.onnx_model_type == ModelTypeEnum.yologeneric:
            return post_process_yolo(tensor_output, self.w, self.h)
        elif self.onnx_model_type == ModelTypeEnum.yolox:
            return post_process_yolox(
                tensor_output[0], self.w, self.h, self.grids, self.expanded_strides
            )
        else:
            raise Exception(
                f"{self.onnx_model_type} is currently not supported for onnx. See the docs for more info on supported models."
--- a/frigate/object_detection/base.py
+++ b/frigate/object_detection/base.py
@ -77,6 +77,8 @@ class LocalObjectDetector(ObjectDetector):
        if self.dtype == InputDTypeEnum.float:
            tensor_input = tensor_input.astype(np.float32)
            tensor_input /= 255
        elif self.dtype == InputDTypeEnum.float_denorm:
            tensor_input = tensor_input.astype(np.float32)
        return self.detect_api.detect_raw(tensor_input=tensor_input)
--- a/frigate/util/model.py
+++ b/frigate/util/model.py
@ -187,7 +187,12 @@ def __post_process_multipart_yolo(
 def __post_process_nms_yolo(predictions: np.ndarray, width, height) -> np.ndarray:
-    predictions = np.squeeze(predictions).T
+    predictions = np.squeeze(predictions)
    # transpose the output so it has order (inferences, class_ids)
    if predictions.shape[0] < predictions.shape[1]:
        predictions = predictions.T
    scores = np.max(predictions[:, 4:], axis=1)
    predictions = predictions[scores > 0.4, :]
    scores = scores[scores > 0.4]
@ -225,6 +230,53 @@ def post_process_yolo(output: list[np.ndarray], width: int, height: int) -> np.n
        return __post_process_nms_yolo(output[0], width, height)
 def post_process_yolox(
    predictions: np.ndarray,
    width: int,
    height: int,
    grids: np.ndarray,
    expanded_strides: np.ndarray,
 ) -> np.ndarray:
    predictions[..., :2] = (predictions[..., :2] + grids) * expanded_strides
    predictions[..., 2:4] = np.exp(predictions[..., 2:4]) * expanded_strides
    # process organized predictions
    predictions = predictions[0]
    boxes = predictions[:, :4]
    scores = predictions[:, 4:5] * predictions[:, 5:]
    boxes_xyxy = np.ones_like(boxes)
    boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2
    boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2
    boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2
    boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2
    cls_inds = scores.argmax(1)
    scores = scores[np.arange(len(cls_inds)), cls_inds]
    indices = cv2.dnn.NMSBoxes(
        boxes_xyxy, scores, score_threshold=0.4, nms_threshold=0.4
    )
    detections = np.zeros((20, 6), np.float32)
    for i, (bbox, confidence, class_id) in enumerate(
        zip(boxes_xyxy[indices], scores[indices], cls_inds[indices])
    ):
        if i == 20:
            break
        detections[i] = [
            class_id,
            confidence,
            bbox[1] / height,
            bbox[0] / width,
            bbox[3] / height,
            bbox[2] / width,
        ]
    return detections
 ### ONNX Utilities