diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index 5b92c03d0..7ad3b3a11 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -659,7 +659,7 @@ YOLOv3, YOLOv4, YOLOv7, and [YOLOv9](https://github.com/WongKinYiu/yolov9) model :::tip -The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well. +The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well. See [the models section](#downloading-yolo-models) for more information on downloading YOLO models for use in Frigate. ::: @@ -682,6 +682,29 @@ model: Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects. +#### YOLOx + +[YOLOx](https://github.com/Megvii-BaseDetection/YOLOX) models are supported, but not included by default. See [the models section](#downloading-yolo-models) for more information on downloading the YOLOx model for use in Frigate. + +After placing the downloaded onnx model in your config folder, you can use the following configuration: + +```yaml +detectors: + onnx: + type: onnx + +model: + model_type: yolox + width: 416 # <--- should match the imgsize set during model export + height: 416 # <--- should match the imgsize set during model export + input_tensor: nchw_denorm + input_dtype: float + path: /config/model_cache/yolox_tiny.onnx + labelmap_path: /labelmap/coco-80.txt +``` + +Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects. + #### RF-DETR [RF-DETR](https://github.com/roboflow/rf-detr) is a DETR based model. The ONNX exported models are supported, but not included by default. See [the models section](#downloading-rf-detr-model) for more information on downloading the RF-DETR model for use in Frigate. @@ -962,6 +985,10 @@ The input image size in this notebook is set to 320x320. This results in lower C ### Downloading YOLO Models +#### YOLOx + +YOLOx models can be downloaded [from the YOLOx repo](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime). + #### YOLOv3, YOLOv4, and YOLOv7 To export as ONNX: diff --git a/frigate/detectors/detector_config.py b/frigate/detectors/detector_config.py index d87405d08..e719e1062 100644 --- a/frigate/detectors/detector_config.py +++ b/frigate/detectors/detector_config.py @@ -31,6 +31,7 @@ class InputTensorEnum(str, Enum): class InputDTypeEnum(str, Enum): float = "float" + float_denorm = "float_denorm" # non-normalized float int = "int" diff --git a/frigate/detectors/plugins/onnx.py b/frigate/detectors/plugins/onnx.py index aef6e909b..2f9b3d8a4 100644 --- a/frigate/detectors/plugins/onnx.py +++ b/frigate/detectors/plugins/onnx.py @@ -14,6 +14,7 @@ from frigate.util.model import ( post_process_dfine, post_process_rfdetr, post_process_yolo, + post_process_yolox, ) logger = logging.getLogger(__name__) @@ -58,6 +59,25 @@ class ONNXDetector(DetectionApi): self.onnx_model_shape = detector_config.model.input_tensor path = detector_config.model.path + if self.onnx_model_type == ModelTypeEnum.yolox: + grids = [] + expanded_strides = [] + + # decode and orient predictions + strides = [8, 16, 32] + hsizes = [self.h // stride for stride in strides] + wsizes = [self.w // stride for stride in strides] + + for hsize, wsize, stride in zip(hsizes, wsizes, strides): + xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize)) + grid = np.stack((xv, yv), 2).reshape(1, -1, 2) + grids.append(grid) + shape = grid.shape[:2] + expanded_strides.append(np.full((*shape, 1), stride)) + + self.grids = np.concatenate(grids, 1) + self.expanded_strides = np.concatenate(expanded_strides, 1) + logger.info(f"ONNX: {path} loaded") def detect_raw(self, tensor_input: np.ndarray): @@ -99,6 +119,10 @@ class ONNXDetector(DetectionApi): return detections elif self.onnx_model_type == ModelTypeEnum.yologeneric: return post_process_yolo(tensor_output, self.w, self.h) + elif self.onnx_model_type == ModelTypeEnum.yolox: + return post_process_yolox( + tensor_output[0], self.w, self.h, self.grids, self.expanded_strides + ) else: raise Exception( f"{self.onnx_model_type} is currently not supported for onnx. See the docs for more info on supported models." diff --git a/frigate/object_detection/base.py b/frigate/object_detection/base.py index dfc39ac2d..b837984fa 100644 --- a/frigate/object_detection/base.py +++ b/frigate/object_detection/base.py @@ -77,6 +77,8 @@ class LocalObjectDetector(ObjectDetector): if self.dtype == InputDTypeEnum.float: tensor_input = tensor_input.astype(np.float32) tensor_input /= 255 + elif self.dtype == InputDTypeEnum.float_denorm: + tensor_input = tensor_input.astype(np.float32) return self.detect_api.detect_raw(tensor_input=tensor_input) diff --git a/frigate/util/model.py b/frigate/util/model.py index a4ff9bd75..d402ea36d 100644 --- a/frigate/util/model.py +++ b/frigate/util/model.py @@ -187,7 +187,12 @@ def __post_process_multipart_yolo( def __post_process_nms_yolo(predictions: np.ndarray, width, height) -> np.ndarray: - predictions = np.squeeze(predictions).T + predictions = np.squeeze(predictions) + + # transpose the output so it has order (inferences, class_ids) + if predictions.shape[0] < predictions.shape[1]: + predictions = predictions.T + scores = np.max(predictions[:, 4:], axis=1) predictions = predictions[scores > 0.4, :] scores = scores[scores > 0.4] @@ -225,6 +230,53 @@ def post_process_yolo(output: list[np.ndarray], width: int, height: int) -> np.n return __post_process_nms_yolo(output[0], width, height) +def post_process_yolox( + predictions: np.ndarray, + width: int, + height: int, + grids: np.ndarray, + expanded_strides: np.ndarray, +) -> np.ndarray: + predictions[..., :2] = (predictions[..., :2] + grids) * expanded_strides + predictions[..., 2:4] = np.exp(predictions[..., 2:4]) * expanded_strides + + # process organized predictions + predictions = predictions[0] + boxes = predictions[:, :4] + scores = predictions[:, 4:5] * predictions[:, 5:] + + boxes_xyxy = np.ones_like(boxes) + boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2 + boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2 + boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2 + boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2 + + cls_inds = scores.argmax(1) + scores = scores[np.arange(len(cls_inds)), cls_inds] + + indices = cv2.dnn.NMSBoxes( + boxes_xyxy, scores, score_threshold=0.4, nms_threshold=0.4 + ) + + detections = np.zeros((20, 6), np.float32) + for i, (bbox, confidence, class_id) in enumerate( + zip(boxes_xyxy[indices], scores[indices], cls_inds[indices]) + ): + if i == 20: + break + + detections[i] = [ + class_id, + confidence, + bbox[1] / height, + bbox[0] / width, + bbox[3] / height, + bbox[2] / width, + ] + + return detections + + ### ONNX Utilities