mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-07-30 13:48:07 +02:00
Refactor yolov9 detector to support v3, v4, v7 as well (#17697)
* Implement blobbed yolov7 post processing and consolidate yolo implementation * Update documentation * Add repo * fix name
This commit is contained in:
parent
4feba8bcf3
commit
bd0ee86db9
@ -312,13 +312,13 @@ model:
|
||||
|
||||
Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
|
||||
|
||||
#### YOLOv9
|
||||
#### YOLO (v3, v4, v7, v9)
|
||||
|
||||
[YOLOv9](https://github.com/WongKinYiu/yolov9) models are supported, but not included by default.
|
||||
YOLOv3, YOLOv4, YOLOv7, and [YOLOv9](https://github.com/WongKinYiu/yolov9) models are supported, but not included by default.
|
||||
|
||||
:::tip
|
||||
|
||||
The YOLOv9 detector has been designed to support YOLOv9 models, but may support other YOLO model architectures as well.
|
||||
The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well.
|
||||
|
||||
:::
|
||||
|
||||
@ -331,12 +331,12 @@ detectors:
|
||||
device: GPU
|
||||
|
||||
model:
|
||||
model_type: yolov9
|
||||
width: 640 # <--- should match the imgsize set during model export
|
||||
height: 640 # <--- should match the imgsize set during model export
|
||||
model_type: yolo-generic
|
||||
width: 320 # <--- should match the imgsize set during model export
|
||||
height: 320 # <--- should match the imgsize set during model export
|
||||
input_tensor: nchw
|
||||
input_dtype: float
|
||||
path: /config/model_cache/yolov9-t.onnx
|
||||
path: /config/model_cache/yolo.onnx
|
||||
labelmap_path: /labelmap/coco-80.txt
|
||||
```
|
||||
|
||||
@ -653,13 +653,13 @@ model:
|
||||
labelmap_path: /labelmap/coco-80.txt
|
||||
```
|
||||
|
||||
#### YOLOv9
|
||||
#### YOLO (v3, v4, v7, v9)
|
||||
|
||||
[YOLOv9](https://github.com/WongKinYiu/yolov9) models are supported, but not included by default.
|
||||
YOLOv3, YOLOv4, YOLOv7, and [YOLOv9](https://github.com/WongKinYiu/yolov9) models are supported, but not included by default.
|
||||
|
||||
:::tip
|
||||
|
||||
The YOLOv9 detector has been designed to support YOLOv9 models, but may support other YOLO model architectures as well.
|
||||
The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well.
|
||||
|
||||
:::
|
||||
|
||||
@ -671,12 +671,12 @@ detectors:
|
||||
type: onnx
|
||||
|
||||
model:
|
||||
model_type: yolov9
|
||||
width: 640 # <--- should match the imgsize set during model export
|
||||
height: 640 # <--- should match the imgsize set during model export
|
||||
model_type: yolo-generic
|
||||
width: 320 # <--- should match the imgsize set during model export
|
||||
height: 320 # <--- should match the imgsize set during model export
|
||||
input_tensor: nchw
|
||||
input_dtype: float
|
||||
path: /config/model_cache/yolov9-t.onnx
|
||||
path: /config/model_cache/yolo.onnx
|
||||
labelmap_path: /labelmap/coco-80.txt
|
||||
```
|
||||
|
||||
@ -684,7 +684,7 @@ Note that the labelmap uses a subset of the complete COCO label set that has onl
|
||||
|
||||
#### RF-DETR
|
||||
|
||||
[RF-DETR](https://github.com/roboflow/rf-detr) is a DETR based model. The ONNX exported models are supported, but not included by default. See [the models section](#downloading-rf-detr-model) for more informatoin on downloading the RF-DETR model for use in Frigate.
|
||||
[RF-DETR](https://github.com/roboflow/rf-detr) is a DETR based model. The ONNX exported models are supported, but not included by default. See [the models section](#downloading-rf-detr-model) for more information on downloading the RF-DETR model for use in Frigate.
|
||||
|
||||
After placing the downloaded onnx model in your `config/model_cache` folder, you can use the following configuration:
|
||||
|
||||
@ -959,3 +959,27 @@ The pre-trained YOLO-NAS weights from DeciAI are subject to their license and ca
|
||||
:::
|
||||
|
||||
The input image size in this notebook is set to 320x320. This results in lower CPU usage and faster inference times without impacting performance in most cases due to the way Frigate crops video frames to areas of interest before running detection. The notebook and config can be updated to 640x640 if desired.
|
||||
|
||||
### Downloading YOLO Models
|
||||
|
||||
#### YOLOv3, YOLOv4, and YOLOv7
|
||||
|
||||
To export as ONNX:
|
||||
|
||||
```sh
|
||||
git clone https://github.com/NateMeyer/tensorrt_demos
|
||||
cd tensorrt_demos/yolo
|
||||
./download_yolo.sh
|
||||
python3 yolo_to_onnx.py -m yolov7-320
|
||||
```
|
||||
|
||||
#### YOLOv9
|
||||
|
||||
YOLOv9 models can be exported using the below code or they [can be downloaded from hugging face](https://huggingface.co/Xenova/yolov9-onnx/tree/main)
|
||||
|
||||
```sh
|
||||
git clone https://github.com/WongKinYiu/yolov9
|
||||
cd yolov9
|
||||
wget -O yolov9-t.pt "https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-t-converted.pt"
|
||||
python3 export.py --weights ./yolov9-t-converted.pt --imgsz 320 --simplify
|
||||
```
|
||||
|
@ -37,7 +37,6 @@ class ModelTypeEnum(str, Enum):
|
||||
rfdetr = "rfdetr"
|
||||
ssd = "ssd"
|
||||
yolox = "yolox"
|
||||
yolov9 = "yolov9"
|
||||
yolonas = "yolonas"
|
||||
yologeneric = "yolo-generic"
|
||||
|
||||
|
@ -13,7 +13,7 @@ from frigate.util.model import (
|
||||
get_ort_providers,
|
||||
post_process_dfine,
|
||||
post_process_rfdetr,
|
||||
post_process_yolov9,
|
||||
post_process_yolo,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -97,12 +97,8 @@ class ONNXDetector(DetectionApi):
|
||||
x_max / self.w,
|
||||
]
|
||||
return detections
|
||||
elif (
|
||||
self.onnx_model_type == ModelTypeEnum.yolov9
|
||||
or self.onnx_model_type == ModelTypeEnum.yologeneric
|
||||
):
|
||||
predictions: np.ndarray = tensor_output[0]
|
||||
return post_process_yolov9(predictions, self.w, self.h)
|
||||
elif self.onnx_model_type == ModelTypeEnum.yologeneric:
|
||||
return post_process_yolo(tensor_output, self.w, self.h)
|
||||
else:
|
||||
raise Exception(
|
||||
f"{self.onnx_model_type} is currently not supported for onnx. See the docs for more info on supported models."
|
||||
|
@ -13,7 +13,7 @@ from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
|
||||
from frigate.util.model import (
|
||||
post_process_dfine,
|
||||
post_process_rfdetr,
|
||||
post_process_yolov9,
|
||||
post_process_yolo,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -33,7 +33,6 @@ class OvDetector(DetectionApi):
|
||||
ModelTypeEnum.rfdetr,
|
||||
ModelTypeEnum.ssd,
|
||||
ModelTypeEnum.yolonas,
|
||||
ModelTypeEnum.yolov9,
|
||||
ModelTypeEnum.yologeneric,
|
||||
ModelTypeEnum.yolox,
|
||||
]
|
||||
@ -232,12 +231,13 @@ class OvDetector(DetectionApi):
|
||||
x_max / self.w,
|
||||
]
|
||||
return detections
|
||||
elif (
|
||||
self.ov_model_type == ModelTypeEnum.yolov9
|
||||
or self.ov_model_type == ModelTypeEnum.yologeneric
|
||||
):
|
||||
out_tensor = infer_request.get_output_tensor(0).data
|
||||
return post_process_yolov9(out_tensor, self.w, self.h)
|
||||
elif self.ov_model_type == ModelTypeEnum.yologeneric:
|
||||
out_tensor = []
|
||||
|
||||
for item in infer_request.output_tensors:
|
||||
out_tensor.append(item.data)
|
||||
|
||||
return post_process_yolo(out_tensor, self.w, self.h)
|
||||
elif self.ov_model_type == ModelTypeEnum.yolox:
|
||||
out_tensor = infer_request.get_output_tensor()
|
||||
# [x, y, h, w, box_score, class_no_1, ..., class_no_80],
|
||||
|
@ -99,7 +99,94 @@ def post_process_rfdetr(tensor_output: list[np.ndarray, np.ndarray]) -> np.ndarr
|
||||
return detections
|
||||
|
||||
|
||||
def post_process_yolov9(predictions: np.ndarray, width, height) -> np.ndarray:
|
||||
def __post_process_multipart_yolo(
|
||||
output_list,
|
||||
width,
|
||||
height,
|
||||
):
|
||||
anchors = [
|
||||
[(12, 16), (19, 36), (40, 28)],
|
||||
[(36, 75), (76, 55), (72, 146)],
|
||||
[(142, 110), (192, 243), (459, 401)],
|
||||
]
|
||||
|
||||
stride_map = {0: 8, 1: 16, 2: 32}
|
||||
|
||||
all_boxes = []
|
||||
all_scores = []
|
||||
all_class_ids = []
|
||||
|
||||
for i, output in enumerate(output_list):
|
||||
bs, _, ny, nx = output.shape
|
||||
stride = stride_map[i]
|
||||
anchor_set = anchors[i]
|
||||
|
||||
num_anchors = len(anchor_set)
|
||||
output = output.reshape(bs, num_anchors, 85, ny, nx)
|
||||
output = output.transpose(0, 1, 3, 4, 2)
|
||||
output = output[0]
|
||||
|
||||
for a_idx, (anchor_w, anchor_h) in enumerate(anchor_set):
|
||||
for y in range(ny):
|
||||
for x in range(nx):
|
||||
pred = output[a_idx, y, x]
|
||||
class_probs = pred[5:]
|
||||
class_id = np.argmax(class_probs)
|
||||
class_conf = class_probs[class_id]
|
||||
conf = class_conf * pred[4]
|
||||
|
||||
if conf < 0.4:
|
||||
continue
|
||||
|
||||
dx = pred[0]
|
||||
dy = pred[1]
|
||||
dw = pred[2]
|
||||
dh = pred[3]
|
||||
|
||||
bx = ((dx * 2.0 - 0.5) + x) * stride
|
||||
by = ((dy * 2.0 - 0.5) + y) * stride
|
||||
bw = ((dw * 2.0) ** 2) * anchor_w
|
||||
bh = ((dh * 2.0) ** 2) * anchor_h
|
||||
|
||||
x1 = max(0, bx - bw / 2) / width
|
||||
y1 = max(0, by - bh / 2) / height
|
||||
x2 = min(width, bx + bw / 2) / width
|
||||
y2 = min(height, by + bh / 2) / height
|
||||
|
||||
all_boxes.append([x1, y1, x2, y2])
|
||||
all_scores.append(conf)
|
||||
all_class_ids.append(class_id)
|
||||
|
||||
formatted_boxes = [
|
||||
[
|
||||
int(x1 * width),
|
||||
int(y1 * height),
|
||||
int((x2 - x1) * width),
|
||||
int((y2 - y1) * height),
|
||||
]
|
||||
for x1, y1, x2, y2 in all_boxes
|
||||
]
|
||||
|
||||
indices = cv2.dnn.NMSBoxes(
|
||||
bboxes=formatted_boxes,
|
||||
scores=all_scores,
|
||||
score_threshold=0.4,
|
||||
nms_threshold=0.4,
|
||||
)
|
||||
|
||||
results = np.zeros((20, 6), np.float32)
|
||||
|
||||
if len(indices) > 0:
|
||||
for i, idx in enumerate(indices.flatten()[:20]):
|
||||
class_id = all_class_ids[idx]
|
||||
conf = all_scores[idx]
|
||||
x1, y1, x2, y2 = all_boxes[idx]
|
||||
results[i] = [class_id, conf, y1, x1, y2, x2]
|
||||
|
||||
return np.array(results, dtype=np.float32)
|
||||
|
||||
|
||||
def __post_process_nms_yolo(predictions: np.ndarray, width, height) -> np.ndarray:
|
||||
predictions = np.squeeze(predictions).T
|
||||
scores = np.max(predictions[:, 4:], axis=1)
|
||||
predictions = predictions[scores > 0.4, :]
|
||||
@ -131,6 +218,13 @@ def post_process_yolov9(predictions: np.ndarray, width, height) -> np.ndarray:
|
||||
return detections
|
||||
|
||||
|
||||
def post_process_yolo(output: list[np.ndarray], width: int, height: int) -> np.ndarray:
|
||||
if len(output) > 1:
|
||||
return __post_process_multipart_yolo(output, width, height)
|
||||
else:
|
||||
return __post_process_nms_yolo(output[0], width, height)
|
||||
|
||||
|
||||
### ONNX Utilities
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user