Add support for yolox models to onnx detector (#17773)

This commit is contained in:
Nicolas Mowen 2025-04-18 05:40:06 -06:00 committed by GitHub
parent 8270967cdc
commit 19aaa64fe9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 108 additions and 2 deletions

View File

@ -659,7 +659,7 @@ YOLOv3, YOLOv4, YOLOv7, and [YOLOv9](https://github.com/WongKinYiu/yolov9) model
:::tip
The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well.
The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well. See [the models section](#downloading-yolo-models) for more information on downloading YOLO models for use in Frigate.
:::
@ -682,6 +682,29 @@ model:
Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
#### YOLOx
[YOLOx](https://github.com/Megvii-BaseDetection/YOLOX) models are supported, but not included by default. See [the models section](#downloading-yolo-models) for more information on downloading the YOLOx model for use in Frigate.
After placing the downloaded onnx model in your config folder, you can use the following configuration:
```yaml
detectors:
onnx:
type: onnx
model:
model_type: yolox
width: 416 # <--- should match the imgsize set during model export
height: 416 # <--- should match the imgsize set during model export
input_tensor: nchw_denorm
input_dtype: float
path: /config/model_cache/yolox_tiny.onnx
labelmap_path: /labelmap/coco-80.txt
```
Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
#### RF-DETR
[RF-DETR](https://github.com/roboflow/rf-detr) is a DETR based model. The ONNX exported models are supported, but not included by default. See [the models section](#downloading-rf-detr-model) for more information on downloading the RF-DETR model for use in Frigate.
@ -962,6 +985,10 @@ The input image size in this notebook is set to 320x320. This results in lower C
### Downloading YOLO Models
#### YOLOx
YOLOx models can be downloaded [from the YOLOx repo](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime).
#### YOLOv3, YOLOv4, and YOLOv7
To export as ONNX:

View File

@ -31,6 +31,7 @@ class InputTensorEnum(str, Enum):
class InputDTypeEnum(str, Enum):
float = "float"
float_denorm = "float_denorm" # non-normalized float
int = "int"

View File

@ -14,6 +14,7 @@ from frigate.util.model import (
post_process_dfine,
post_process_rfdetr,
post_process_yolo,
post_process_yolox,
)
logger = logging.getLogger(__name__)
@ -58,6 +59,25 @@ class ONNXDetector(DetectionApi):
self.onnx_model_shape = detector_config.model.input_tensor
path = detector_config.model.path
if self.onnx_model_type == ModelTypeEnum.yolox:
grids = []
expanded_strides = []
# decode and orient predictions
strides = [8, 16, 32]
hsizes = [self.h // stride for stride in strides]
wsizes = [self.w // stride for stride in strides]
for hsize, wsize, stride in zip(hsizes, wsizes, strides):
xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
grids.append(grid)
shape = grid.shape[:2]
expanded_strides.append(np.full((*shape, 1), stride))
self.grids = np.concatenate(grids, 1)
self.expanded_strides = np.concatenate(expanded_strides, 1)
logger.info(f"ONNX: {path} loaded")
def detect_raw(self, tensor_input: np.ndarray):
@ -99,6 +119,10 @@ class ONNXDetector(DetectionApi):
return detections
elif self.onnx_model_type == ModelTypeEnum.yologeneric:
return post_process_yolo(tensor_output, self.w, self.h)
elif self.onnx_model_type == ModelTypeEnum.yolox:
return post_process_yolox(
tensor_output[0], self.w, self.h, self.grids, self.expanded_strides
)
else:
raise Exception(
f"{self.onnx_model_type} is currently not supported for onnx. See the docs for more info on supported models."

View File

@ -77,6 +77,8 @@ class LocalObjectDetector(ObjectDetector):
if self.dtype == InputDTypeEnum.float:
tensor_input = tensor_input.astype(np.float32)
tensor_input /= 255
elif self.dtype == InputDTypeEnum.float_denorm:
tensor_input = tensor_input.astype(np.float32)
return self.detect_api.detect_raw(tensor_input=tensor_input)

View File

@ -187,7 +187,12 @@ def __post_process_multipart_yolo(
def __post_process_nms_yolo(predictions: np.ndarray, width, height) -> np.ndarray:
predictions = np.squeeze(predictions).T
predictions = np.squeeze(predictions)
# transpose the output so it has order (inferences, class_ids)
if predictions.shape[0] < predictions.shape[1]:
predictions = predictions.T
scores = np.max(predictions[:, 4:], axis=1)
predictions = predictions[scores > 0.4, :]
scores = scores[scores > 0.4]
@ -225,6 +230,53 @@ def post_process_yolo(output: list[np.ndarray], width: int, height: int) -> np.n
return __post_process_nms_yolo(output[0], width, height)
def post_process_yolox(
predictions: np.ndarray,
width: int,
height: int,
grids: np.ndarray,
expanded_strides: np.ndarray,
) -> np.ndarray:
predictions[..., :2] = (predictions[..., :2] + grids) * expanded_strides
predictions[..., 2:4] = np.exp(predictions[..., 2:4]) * expanded_strides
# process organized predictions
predictions = predictions[0]
boxes = predictions[:, :4]
scores = predictions[:, 4:5] * predictions[:, 5:]
boxes_xyxy = np.ones_like(boxes)
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2
cls_inds = scores.argmax(1)
scores = scores[np.arange(len(cls_inds)), cls_inds]
indices = cv2.dnn.NMSBoxes(
boxes_xyxy, scores, score_threshold=0.4, nms_threshold=0.4
)
detections = np.zeros((20, 6), np.float32)
for i, (bbox, confidence, class_id) in enumerate(
zip(boxes_xyxy[indices], scores[indices], cls_inds[indices])
):
if i == 20:
break
detections[i] = [
class_id,
confidence,
bbox[1] / height,
bbox[0] / width,
bbox[3] / height,
bbox[2] / width,
]
return detections
### ONNX Utilities