diff --git a/frigate/test/test_video.py b/frigate/test/test_video.py index 9fd46a877..1eb70cf50 100644 --- a/frigate/test/test_video.py +++ b/frigate/test/test_video.py @@ -11,6 +11,7 @@ from frigate.util.object import ( get_cluster_candidates, get_cluster_region, get_region_from_grid, + reduce_detections, ) @@ -192,6 +193,95 @@ class TestObjectBoundingBoxes(unittest.TestCase): assert intersection(box_a, box_b) == None assert intersection(box_b, box_c) == (899, 128, 985, 151) + def test_overlapping_objects_reduced(self): + """Test that object not on edge of region is used when a higher scoring object at the edge of region is provided.""" + detections = [ + ( + "car", + 0.81, + (1209, 73, 1437, 163), + 20520, + 2.53333333, + (1150, 0, 1500, 200), + ), + ( + "car", + 0.88, + (1238, 73, 1401, 171), + 15974, + 1.663265306122449, + (1242, 0, 1602, 360), + ), + ] + frame_shape = (720, 2560) + consolidated_detections = reduce_detections(frame_shape, detections) + assert consolidated_detections == [ + ( + "car", + 0.81, + (1209, 73, 1437, 163), + 20520, + 2.53333333, + (1150, 0, 1500, 200), + ) + ] + + def test_non_overlapping_objects_not_reduced(self): + """Test that non overlapping objects are not reduced.""" + detections = [ + ( + "car", + 0.81, + (1209, 73, 1437, 163), + 20520, + 2.53333333, + (1150, 0, 1500, 200), + ), + ( + "car", + 0.83203125, + (1121, 55, 1214, 100), + 4185, + 2.066666666666667, + (922, 0, 1242, 320), + ), + ( + "car", + 0.85546875, + (1414, 97, 1571, 186), + 13973, + 1.7640449438202248, + (1248, 0, 1568, 320), + ), + ] + frame_shape = (720, 2560) + consolidated_detections = reduce_detections(frame_shape, detections) + assert len(consolidated_detections) == len(detections) + + def test_overlapping_different_size_objects_not_reduced(self): + """Test that overlapping objects that are significantly different in size are not reduced.""" + detections = [ + ( + "car", + 0.81, + (164, 279, 816, 719), + 286880, + 1.48, + (90, 0, 910, 820), + ), + ( + "car", + 0.83203125, + (248, 340, 328, 385), + 3600, + 1.777, + (0, 0, 460, 460), + ), + ] + frame_shape = (720, 2560) + consolidated_detections = reduce_detections(frame_shape, detections) + assert len(consolidated_detections) == len(detections) + class TestRegionGrid(unittest.TestCase): def setUp(self) -> None: diff --git a/frigate/util/object.py b/frigate/util/object.py index 6c7c8f9b9..69d2c6b32 100644 --- a/frigate/util/object.py +++ b/frigate/util/object.py @@ -3,6 +3,7 @@ import datetime import logging import math +from collections import defaultdict import cv2 import numpy as np @@ -15,6 +16,7 @@ from frigate.models import Event, Regions, Timeline from frigate.util.image import ( area, calculate_region, + clipped, intersection, intersection_over_union, yuv_region_2_bgr, @@ -414,43 +416,6 @@ def get_cluster_region(frame_shape, min_region, cluster, boxes): ) -def get_consolidated_object_detections(detected_object_groups): - """Drop detections that overlap too much""" - consolidated_detections = [] - for group in detected_object_groups.values(): - # if the group only has 1 item, skip - if len(group) == 1: - consolidated_detections.append(group[0]) - continue - - # sort smallest to largest by area - sorted_by_area = sorted(group, key=lambda g: g[3]) - - for current_detection_idx in range(0, len(sorted_by_area)): - current_detection = sorted_by_area[current_detection_idx] - current_label = current_detection[0] - current_box = current_detection[2] - overlap = 0 - for to_check_idx in range( - min(current_detection_idx + 1, len(sorted_by_area)), - len(sorted_by_area), - ): - to_check = sorted_by_area[to_check_idx][2] - intersect_box = intersection(current_box, to_check) - # if 90% of smaller detection is inside of another detection, consolidate - if intersect_box is not None and area(intersect_box) / area( - current_box - ) > LABEL_CONSOLIDATION_MAP.get( - current_label, LABEL_CONSOLIDATION_DEFAULT - ): - overlap = 1 - break - if overlap == 0: - consolidated_detections.append(sorted_by_area[current_detection_idx]) - - return consolidated_detections - - def get_startup_regions( frame_shape: tuple[int], region_min_size: int, @@ -483,3 +448,99 @@ def get_startup_regions( ) return regions + + +def reduce_detections( + frame_shape: tuple[int], + all_detections: list[tuple[any]], +) -> list[tuple[any]]: + """Take a list of detections and reduce overlaps to create a list of confident detections.""" + + def reduce_overlapping_detections(detections: list[tuple[any]]) -> list[tuple[any]]: + """apply non-maxima suppression to suppress weak, overlapping bounding boxes.""" + detected_object_groups = defaultdict(lambda: []) + for detection in detections: + detected_object_groups[detection[0]].append(detection) + + selected_objects = [] + for group in detected_object_groups.values(): + # o[2] is the box of the object: xmin, ymin, xmax, ymax + # apply max/min to ensure values do not exceed the known frame size + boxes = [ + ( + o[2][0], + o[2][1], + o[2][2] - o[2][0], + o[2][3] - o[2][1], + ) + for o in group + ] + + # reduce confidences for objects that are on edge of region + # 0.6 should be used to ensure that the object is still considered and not dropped + # due to min score requirement of NMSBoxes + confidences = [0.6 if clipped(o, frame_shape) else o[1] for o in group] + + idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) + + # add objects + for index in idxs: + index = index if isinstance(index, np.int32) else index[0] + obj = group[index] + selected_objects.append(obj) + + # set the detections list to only include top objects + return selected_objects + + def get_consolidated_object_detections(detections: list[tuple[any]]): + """Drop detections that overlap too much.""" + detected_object_groups = defaultdict(lambda: []) + for detection in detections: + detected_object_groups[detection[0]].append(detection) + + consolidated_detections = [] + for group in detected_object_groups.values(): + # if the group only has 1 item, skip + if len(group) == 1: + consolidated_detections.append(group[0]) + continue + + # sort smallest to largest by area + sorted_by_area = sorted(group, key=lambda g: g[3]) + + for current_detection_idx in range(0, len(sorted_by_area)): + current_detection = sorted_by_area[current_detection_idx] + current_label = current_detection[0] + current_box = current_detection[2] + overlap = 0 + for to_check_idx in range( + min(current_detection_idx + 1, len(sorted_by_area)), + len(sorted_by_area), + ): + to_check = sorted_by_area[to_check_idx][2] + + # if area of current detection / area of check < 5% they should not be compared + # this covers cases where a large car parked in a driveway doesn't block detections + # of cars in the street behind it + if area(current_box) / area(to_check) < 0.05: + continue + + intersect_box = intersection(current_box, to_check) + # if % of smaller detection is inside of another detection, consolidate + if intersect_box is not None and area(intersect_box) / area( + current_box + ) > LABEL_CONSOLIDATION_MAP.get( + current_label, LABEL_CONSOLIDATION_DEFAULT + ): + overlap = 1 + break + if overlap == 0: + consolidated_detections.append( + sorted_by_area[current_detection_idx] + ) + + return consolidated_detections + + return get_consolidated_object_detections( + reduce_overlapping_detections(all_detections) + ) diff --git a/frigate/video.py b/frigate/video.py index aa85a89df..3d3c1d97e 100755 --- a/frigate/video.py +++ b/frigate/video.py @@ -7,10 +7,8 @@ import signal import subprocess as sp import threading import time -from collections import defaultdict import cv2 -import numpy as np from setproctitle import setproctitle from frigate.config import CameraConfig, DetectConfig, ModelConfig @@ -39,12 +37,12 @@ from frigate.util.object import ( get_cluster_candidates, get_cluster_region, get_cluster_region_from_grid, - get_consolidated_object_detections, get_min_region_size, get_startup_regions, inside_any, intersects_any, is_object_filtered, + reduce_detections, ) from frigate.util.services import listen @@ -688,50 +686,10 @@ def process_frames( ) ) - ######### - # merge objects - ######### - # group by name - detected_object_groups = defaultdict(lambda: []) - for detection in detections: - detected_object_groups[detection[0]].append(detection) - - selected_objects = [] - for group in detected_object_groups.values(): - # apply non-maxima suppression to suppress weak, overlapping bounding boxes - # o[2] is the box of the object: xmin, ymin, xmax, ymax - # apply max/min to ensure values do not exceed the known frame size - boxes = [ - ( - o[2][0], - o[2][1], - o[2][2] - o[2][0], - o[2][3] - o[2][1], - ) - for o in group - ] - confidences = [o[1] for o in group] - idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) - - # add objects - for index in idxs: - index = index if isinstance(index, np.int32) else index[0] - obj = group[index] - selected_objects.append(obj) - - # set the detections list to only include top objects - detections = selected_objects + consolidated_detections = reduce_detections(frame_shape, detections) # if detection was run on this frame, consolidate if len(regions) > 0: - # group by name - detected_object_groups = defaultdict(lambda: []) - for detection in detections: - detected_object_groups[detection[0]].append(detection) - - consolidated_detections = get_consolidated_object_detections( - detected_object_groups - ) tracked_detections = [ d for d in consolidated_detections