mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-07-16 13:47:07 +02:00
Add support for yolox models to onnx detector (#17773)
This commit is contained in:
parent
8270967cdc
commit
19aaa64fe9
@ -659,7 +659,7 @@ YOLOv3, YOLOv4, YOLOv7, and [YOLOv9](https://github.com/WongKinYiu/yolov9) model
|
|||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
|
||||||
The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well.
|
The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well. See [the models section](#downloading-yolo-models) for more information on downloading YOLO models for use in Frigate.
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
@ -682,6 +682,29 @@ model:
|
|||||||
|
|
||||||
Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
|
Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
|
||||||
|
|
||||||
|
#### YOLOx
|
||||||
|
|
||||||
|
[YOLOx](https://github.com/Megvii-BaseDetection/YOLOX) models are supported, but not included by default. See [the models section](#downloading-yolo-models) for more information on downloading the YOLOx model for use in Frigate.
|
||||||
|
|
||||||
|
After placing the downloaded onnx model in your config folder, you can use the following configuration:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
detectors:
|
||||||
|
onnx:
|
||||||
|
type: onnx
|
||||||
|
|
||||||
|
model:
|
||||||
|
model_type: yolox
|
||||||
|
width: 416 # <--- should match the imgsize set during model export
|
||||||
|
height: 416 # <--- should match the imgsize set during model export
|
||||||
|
input_tensor: nchw_denorm
|
||||||
|
input_dtype: float
|
||||||
|
path: /config/model_cache/yolox_tiny.onnx
|
||||||
|
labelmap_path: /labelmap/coco-80.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
|
||||||
|
|
||||||
#### RF-DETR
|
#### RF-DETR
|
||||||
|
|
||||||
[RF-DETR](https://github.com/roboflow/rf-detr) is a DETR based model. The ONNX exported models are supported, but not included by default. See [the models section](#downloading-rf-detr-model) for more information on downloading the RF-DETR model for use in Frigate.
|
[RF-DETR](https://github.com/roboflow/rf-detr) is a DETR based model. The ONNX exported models are supported, but not included by default. See [the models section](#downloading-rf-detr-model) for more information on downloading the RF-DETR model for use in Frigate.
|
||||||
@ -962,6 +985,10 @@ The input image size in this notebook is set to 320x320. This results in lower C
|
|||||||
|
|
||||||
### Downloading YOLO Models
|
### Downloading YOLO Models
|
||||||
|
|
||||||
|
#### YOLOx
|
||||||
|
|
||||||
|
YOLOx models can be downloaded [from the YOLOx repo](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime).
|
||||||
|
|
||||||
#### YOLOv3, YOLOv4, and YOLOv7
|
#### YOLOv3, YOLOv4, and YOLOv7
|
||||||
|
|
||||||
To export as ONNX:
|
To export as ONNX:
|
||||||
|
@ -31,6 +31,7 @@ class InputTensorEnum(str, Enum):
|
|||||||
|
|
||||||
class InputDTypeEnum(str, Enum):
|
class InputDTypeEnum(str, Enum):
|
||||||
float = "float"
|
float = "float"
|
||||||
|
float_denorm = "float_denorm" # non-normalized float
|
||||||
int = "int"
|
int = "int"
|
||||||
|
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@ from frigate.util.model import (
|
|||||||
post_process_dfine,
|
post_process_dfine,
|
||||||
post_process_rfdetr,
|
post_process_rfdetr,
|
||||||
post_process_yolo,
|
post_process_yolo,
|
||||||
|
post_process_yolox,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -58,6 +59,25 @@ class ONNXDetector(DetectionApi):
|
|||||||
self.onnx_model_shape = detector_config.model.input_tensor
|
self.onnx_model_shape = detector_config.model.input_tensor
|
||||||
path = detector_config.model.path
|
path = detector_config.model.path
|
||||||
|
|
||||||
|
if self.onnx_model_type == ModelTypeEnum.yolox:
|
||||||
|
grids = []
|
||||||
|
expanded_strides = []
|
||||||
|
|
||||||
|
# decode and orient predictions
|
||||||
|
strides = [8, 16, 32]
|
||||||
|
hsizes = [self.h // stride for stride in strides]
|
||||||
|
wsizes = [self.w // stride for stride in strides]
|
||||||
|
|
||||||
|
for hsize, wsize, stride in zip(hsizes, wsizes, strides):
|
||||||
|
xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
|
||||||
|
grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
|
||||||
|
grids.append(grid)
|
||||||
|
shape = grid.shape[:2]
|
||||||
|
expanded_strides.append(np.full((*shape, 1), stride))
|
||||||
|
|
||||||
|
self.grids = np.concatenate(grids, 1)
|
||||||
|
self.expanded_strides = np.concatenate(expanded_strides, 1)
|
||||||
|
|
||||||
logger.info(f"ONNX: {path} loaded")
|
logger.info(f"ONNX: {path} loaded")
|
||||||
|
|
||||||
def detect_raw(self, tensor_input: np.ndarray):
|
def detect_raw(self, tensor_input: np.ndarray):
|
||||||
@ -99,6 +119,10 @@ class ONNXDetector(DetectionApi):
|
|||||||
return detections
|
return detections
|
||||||
elif self.onnx_model_type == ModelTypeEnum.yologeneric:
|
elif self.onnx_model_type == ModelTypeEnum.yologeneric:
|
||||||
return post_process_yolo(tensor_output, self.w, self.h)
|
return post_process_yolo(tensor_output, self.w, self.h)
|
||||||
|
elif self.onnx_model_type == ModelTypeEnum.yolox:
|
||||||
|
return post_process_yolox(
|
||||||
|
tensor_output[0], self.w, self.h, self.grids, self.expanded_strides
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"{self.onnx_model_type} is currently not supported for onnx. See the docs for more info on supported models."
|
f"{self.onnx_model_type} is currently not supported for onnx. See the docs for more info on supported models."
|
||||||
|
@ -77,6 +77,8 @@ class LocalObjectDetector(ObjectDetector):
|
|||||||
if self.dtype == InputDTypeEnum.float:
|
if self.dtype == InputDTypeEnum.float:
|
||||||
tensor_input = tensor_input.astype(np.float32)
|
tensor_input = tensor_input.astype(np.float32)
|
||||||
tensor_input /= 255
|
tensor_input /= 255
|
||||||
|
elif self.dtype == InputDTypeEnum.float_denorm:
|
||||||
|
tensor_input = tensor_input.astype(np.float32)
|
||||||
|
|
||||||
return self.detect_api.detect_raw(tensor_input=tensor_input)
|
return self.detect_api.detect_raw(tensor_input=tensor_input)
|
||||||
|
|
||||||
|
@ -187,7 +187,12 @@ def __post_process_multipart_yolo(
|
|||||||
|
|
||||||
|
|
||||||
def __post_process_nms_yolo(predictions: np.ndarray, width, height) -> np.ndarray:
|
def __post_process_nms_yolo(predictions: np.ndarray, width, height) -> np.ndarray:
|
||||||
predictions = np.squeeze(predictions).T
|
predictions = np.squeeze(predictions)
|
||||||
|
|
||||||
|
# transpose the output so it has order (inferences, class_ids)
|
||||||
|
if predictions.shape[0] < predictions.shape[1]:
|
||||||
|
predictions = predictions.T
|
||||||
|
|
||||||
scores = np.max(predictions[:, 4:], axis=1)
|
scores = np.max(predictions[:, 4:], axis=1)
|
||||||
predictions = predictions[scores > 0.4, :]
|
predictions = predictions[scores > 0.4, :]
|
||||||
scores = scores[scores > 0.4]
|
scores = scores[scores > 0.4]
|
||||||
@ -225,6 +230,53 @@ def post_process_yolo(output: list[np.ndarray], width: int, height: int) -> np.n
|
|||||||
return __post_process_nms_yolo(output[0], width, height)
|
return __post_process_nms_yolo(output[0], width, height)
|
||||||
|
|
||||||
|
|
||||||
|
def post_process_yolox(
|
||||||
|
predictions: np.ndarray,
|
||||||
|
width: int,
|
||||||
|
height: int,
|
||||||
|
grids: np.ndarray,
|
||||||
|
expanded_strides: np.ndarray,
|
||||||
|
) -> np.ndarray:
|
||||||
|
predictions[..., :2] = (predictions[..., :2] + grids) * expanded_strides
|
||||||
|
predictions[..., 2:4] = np.exp(predictions[..., 2:4]) * expanded_strides
|
||||||
|
|
||||||
|
# process organized predictions
|
||||||
|
predictions = predictions[0]
|
||||||
|
boxes = predictions[:, :4]
|
||||||
|
scores = predictions[:, 4:5] * predictions[:, 5:]
|
||||||
|
|
||||||
|
boxes_xyxy = np.ones_like(boxes)
|
||||||
|
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2
|
||||||
|
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2
|
||||||
|
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2
|
||||||
|
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2
|
||||||
|
|
||||||
|
cls_inds = scores.argmax(1)
|
||||||
|
scores = scores[np.arange(len(cls_inds)), cls_inds]
|
||||||
|
|
||||||
|
indices = cv2.dnn.NMSBoxes(
|
||||||
|
boxes_xyxy, scores, score_threshold=0.4, nms_threshold=0.4
|
||||||
|
)
|
||||||
|
|
||||||
|
detections = np.zeros((20, 6), np.float32)
|
||||||
|
for i, (bbox, confidence, class_id) in enumerate(
|
||||||
|
zip(boxes_xyxy[indices], scores[indices], cls_inds[indices])
|
||||||
|
):
|
||||||
|
if i == 20:
|
||||||
|
break
|
||||||
|
|
||||||
|
detections[i] = [
|
||||||
|
class_id,
|
||||||
|
confidence,
|
||||||
|
bbox[1] / height,
|
||||||
|
bbox[0] / width,
|
||||||
|
bbox[3] / height,
|
||||||
|
bbox[2] / width,
|
||||||
|
]
|
||||||
|
|
||||||
|
return detections
|
||||||
|
|
||||||
|
|
||||||
### ONNX Utilities
|
### ONNX Utilities
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user