diff --git a/frigate/detectors/detection_api.py b/frigate/detectors/detection_api.py index ccccc1b65..31dbd3671 100644 --- a/frigate/detectors/detection_api.py +++ b/frigate/detectors/detection_api.py @@ -16,7 +16,7 @@ class DetectionApi(ABC): @abstractmethod def __init__(self, detector_config: BaseDetectorConfig): self.detector_config = detector_config - self.thresh = 0.5 + self.thresh = 0.4 self.height = detector_config.model.height self.width = detector_config.model.width @@ -24,58 +24,21 @@ class DetectionApi(ABC): def detect_raw(self, tensor_input): pass - def post_process_yolonas(self, output): - """ - @param output: output of inference - expected shape: [np.array(1, N, 4), np.array(1, N, 80)] - where N depends on the input size e.g. N=2100 for 320x320 images + def calculate_grids_strides(self) -> None: + grids = [] + expanded_strides = [] - @return: best results: np.array(20, 6) where each row is - in this order (class_id, score, y1/height, x1/width, y2/height, x2/width) - """ + # decode and orient predictions + strides = [8, 16, 32] + hsizes = [self.height // stride for stride in strides] + wsizes = [self.width // stride for stride in strides] - N = output[0].shape[1] + for hsize, wsize, stride in zip(hsizes, wsizes, strides): + xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize)) + grid = np.stack((xv, yv), 2).reshape(1, -1, 2) + grids.append(grid) + shape = grid.shape[:2] + expanded_strides.append(np.full((*shape, 1), stride)) - boxes = output[0].reshape(N, 4) - scores = output[1].reshape(N, 80) - - class_ids = np.argmax(scores, axis=1) - scores = scores[np.arange(N), class_ids] - - args_best = np.argwhere(scores > self.thresh)[:, 0] - - num_matches = len(args_best) - if num_matches == 0: - return np.zeros((20, 6), np.float32) - elif num_matches > 20: - args_best20 = np.argpartition(scores[args_best], -20)[-20:] - args_best = args_best[args_best20] - - boxes = boxes[args_best] - class_ids = class_ids[args_best] - scores = scores[args_best] - - boxes = np.transpose( - np.vstack( - ( - boxes[:, 1] / self.height, - boxes[:, 0] / self.width, - boxes[:, 3] / self.height, - boxes[:, 2] / self.width, - ) - ) - ) - - results = np.hstack( - (class_ids[..., np.newaxis], scores[..., np.newaxis], boxes) - ) - - return np.resize(results, (20, 6)) - - def post_process(self, output): - if self.detector_config.model.model_type == ModelTypeEnum.yolonas: - return self.post_process_yolonas(output) - else: - raise ValueError( - f'Model type "{self.detector_config.model.model_type}" is currently not supported.' - ) + self.grids = np.concatenate(grids, 1) + self.expanded_strides = np.concatenate(expanded_strides, 1) diff --git a/frigate/detectors/plugins/onnx.py b/frigate/detectors/plugins/onnx.py index 2f9b3d8a4..45e37d6cd 100644 --- a/frigate/detectors/plugins/onnx.py +++ b/frigate/detectors/plugins/onnx.py @@ -31,6 +31,8 @@ class ONNXDetector(DetectionApi): type_key = DETECTOR_KEY def __init__(self, detector_config: ONNXDetectorConfig): + super().__init__(detector_config) + try: import onnxruntime as ort @@ -52,31 +54,13 @@ class ONNXDetector(DetectionApi): path, providers=providers, provider_options=options ) - self.h = detector_config.model.height - self.w = detector_config.model.width self.onnx_model_type = detector_config.model.model_type self.onnx_model_px = detector_config.model.input_pixel_format self.onnx_model_shape = detector_config.model.input_tensor path = detector_config.model.path if self.onnx_model_type == ModelTypeEnum.yolox: - grids = [] - expanded_strides = [] - - # decode and orient predictions - strides = [8, 16, 32] - hsizes = [self.h // stride for stride in strides] - wsizes = [self.w // stride for stride in strides] - - for hsize, wsize, stride in zip(hsizes, wsizes, strides): - xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize)) - grid = np.stack((xv, yv), 2).reshape(1, -1, 2) - grids.append(grid) - shape = grid.shape[:2] - expanded_strides.append(np.full((*shape, 1), stride)) - - self.grids = np.concatenate(grids, 1) - self.expanded_strides = np.concatenate(expanded_strides, 1) + self.calculate_grids_strides() logger.info(f"ONNX: {path} loaded") @@ -86,10 +70,12 @@ class ONNXDetector(DetectionApi): None, { "images": tensor_input, - "orig_target_sizes": np.array([[self.h, self.w]], dtype=np.int64), + "orig_target_sizes": np.array( + [[self.height, self.width]], dtype=np.int64 + ), }, ) - return post_process_dfine(tensor_output, self.w, self.h) + return post_process_dfine(tensor_output, self.width, self.height) model_input_name = self.model.get_inputs()[0].name tensor_output = self.model.run(None, {model_input_name: tensor_input}) @@ -111,17 +97,21 @@ class ONNXDetector(DetectionApi): detections[i] = [ class_id, confidence, - y_min / self.h, - x_min / self.w, - y_max / self.h, - x_max / self.w, + y_min / self.height, + x_min / self.width, + y_max / self.height, + x_max / self.width, ] return detections elif self.onnx_model_type == ModelTypeEnum.yologeneric: - return post_process_yolo(tensor_output, self.w, self.h) + return post_process_yolo(tensor_output, self.width, self.height) elif self.onnx_model_type == ModelTypeEnum.yolox: return post_process_yolox( - tensor_output[0], self.w, self.h, self.grids, self.expanded_strides + tensor_output[0], + self.width, + self.height, + self.grids, + self.expanded_strides, ) else: raise Exception( diff --git a/frigate/detectors/plugins/openvino.py b/frigate/detectors/plugins/openvino.py index 9c7ed5248..08d068d5e 100644 --- a/frigate/detectors/plugins/openvino.py +++ b/frigate/detectors/plugins/openvino.py @@ -38,6 +38,7 @@ class OvDetector(DetectionApi): ] def __init__(self, detector_config: OvDetectorConfig): + super().__init__(detector_config) self.ov_core = ov.Core() self.ov_model_type = detector_config.model.model_type @@ -133,25 +134,7 @@ class OvDetector(DetectionApi): break self.num_classes = tensor_shape[2] - 5 logger.info(f"YOLOX model has {self.num_classes} classes") - self.set_strides_grids() - - def set_strides_grids(self): - grids = [] - expanded_strides = [] - - strides = [8, 16, 32] - - hsize_list = [self.h // stride for stride in strides] - wsize_list = [self.w // stride for stride in strides] - - for hsize, wsize, stride in zip(hsize_list, wsize_list, strides): - xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize)) - grid = np.stack((xv, yv), 2).reshape(1, -1, 2) - grids.append(grid) - shape = grid.shape[:2] - expanded_strides.append(np.full((*shape, 1), stride)) - self.grids = np.concatenate(grids, 1) - self.expanded_strides = np.concatenate(expanded_strides, 1) + self.calculate_grids_strides() ## Takes in class ID, confidence score, and array of [x, y, w, h] that describes detection position, ## returns an array that's easily passable back to Frigate. diff --git a/frigate/detectors/plugins/rknn.py b/frigate/detectors/plugins/rknn.py index 407c93917..a7027f9ad 100644 --- a/frigate/detectors/plugins/rknn.py +++ b/frigate/detectors/plugins/rknn.py @@ -4,6 +4,7 @@ import re import urllib.request from typing import Literal +import numpy as np from pydantic import Field from frigate.const import MODEL_CACHE_DIR @@ -150,6 +151,62 @@ class Rknn(DetectionApi): 'Make sure to set the model input_tensor to "nhwc" in your config.' ) + def post_process_yolonas(self, output: list[np.ndarray]): + """ + @param output: output of inference + expected shape: [np.array(1, N, 4), np.array(1, N, 80)] + where N depends on the input size e.g. N=2100 for 320x320 images + + @return: best results: np.array(20, 6) where each row is + in this order (class_id, score, y1/height, x1/width, y2/height, x2/width) + """ + + N = output[0].shape[1] + + boxes = output[0].reshape(N, 4) + scores = output[1].reshape(N, 80) + + class_ids = np.argmax(scores, axis=1) + scores = scores[np.arange(N), class_ids] + + args_best = np.argwhere(scores > self.thresh)[:, 0] + + num_matches = len(args_best) + if num_matches == 0: + return np.zeros((20, 6), np.float32) + elif num_matches > 20: + args_best20 = np.argpartition(scores[args_best], -20)[-20:] + args_best = args_best[args_best20] + + boxes = boxes[args_best] + class_ids = class_ids[args_best] + scores = scores[args_best] + + boxes = np.transpose( + np.vstack( + ( + boxes[:, 1] / self.height, + boxes[:, 0] / self.width, + boxes[:, 3] / self.height, + boxes[:, 2] / self.width, + ) + ) + ) + + results = np.hstack( + (class_ids[..., np.newaxis], scores[..., np.newaxis], boxes) + ) + + return np.resize(results, (20, 6)) + + def post_process(self, output): + if self.detector_config.model.model_type == ModelTypeEnum.yolonas: + return self.post_process_yolonas(output) + else: + raise ValueError( + f'Model type "{self.detector_config.model.model_type}" is currently not supported.' + ) + def detect_raw(self, tensor_input): output = self.rknn.inference( [ diff --git a/frigate/util/model.py b/frigate/util/model.py index d402ea36d..3a9f6159b 100644 --- a/frigate/util/model.py +++ b/frigate/util/model.py @@ -148,27 +148,17 @@ def __post_process_multipart_yolo( bw = ((dw * 2.0) ** 2) * anchor_w bh = ((dh * 2.0) ** 2) * anchor_h - x1 = max(0, bx - bw / 2) / width - y1 = max(0, by - bh / 2) / height - x2 = min(width, bx + bw / 2) / width - y2 = min(height, by + bh / 2) / height + x1 = max(0, bx - bw / 2) + y1 = max(0, by - bh / 2) + x2 = min(width, bx + bw / 2) + y2 = min(height, by + bh / 2) all_boxes.append([x1, y1, x2, y2]) all_scores.append(conf) all_class_ids.append(class_id) - formatted_boxes = [ - [ - int(x1 * width), - int(y1 * height), - int((x2 - x1) * width), - int((y2 - y1) * height), - ] - for x1, y1, x2, y2 in all_boxes - ] - indices = cv2.dnn.NMSBoxes( - bboxes=formatted_boxes, + bboxes=all_boxes, scores=all_scores, score_threshold=0.4, nms_threshold=0.4, @@ -181,7 +171,14 @@ def __post_process_multipart_yolo( class_id = all_class_ids[idx] conf = all_scores[idx] x1, y1, x2, y2 = all_boxes[idx] - results[i] = [class_id, conf, y1, x1, y2, x2] + results[i] = [ + class_id, + conf, + y1 / height, + x1 / width, + y2 / height, + x2 / width, + ] return np.array(results, dtype=np.float32) @@ -200,9 +197,14 @@ def __post_process_nms_yolo(predictions: np.ndarray, width, height) -> np.ndarra # Rescale box boxes = predictions[:, :4] + boxes_xyxy = np.ones_like(boxes) + boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2 + boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2 + boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2 + boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2 + boxes = boxes_xyxy - input_shape = np.array([width, height, width, height]) - boxes = np.divide(boxes, input_shape, dtype=np.float32) + # run NMS indices = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=0.4, nms_threshold=0.4) detections = np.zeros((20, 6), np.float32) for i, (bbox, confidence, class_id) in enumerate( @@ -214,10 +216,10 @@ def __post_process_nms_yolo(predictions: np.ndarray, width, height) -> np.ndarra detections[i] = [ class_id, confidence, - bbox[1] - bbox[3] / 2, - bbox[0] - bbox[2] / 2, - bbox[1] + bbox[3] / 2, - bbox[0] + bbox[2] / 2, + bbox[1] / height, + bbox[0] / width, + bbox[3] / height, + bbox[2] / width, ] return detections