blakeblackshear.frigate/frigate/util/object.py

"""Utils for reading and writing object detection data."""

import datetime
import logging
import math
from collections import defaultdict

import cv2
import numpy as np
from peewee import DoesNotExist

from frigate.config import DetectConfig, ModelConfig
from frigate.const import LABEL_CONSOLIDATION_DEFAULT, LABEL_CONSOLIDATION_MAP
from frigate.detectors.detector_config import PixelFormatEnum
from frigate.models import Event, Regions, Timeline
from frigate.util.image import (
    area,
    calculate_region,
    clipped,
    intersection,
    intersection_over_union,
    yuv_region_2_bgr,
    yuv_region_2_rgb,
    yuv_region_2_yuv,
)

logger = logging.getLogger(__name__)

GRID_SIZE = 8


def get_camera_regions_grid(
    name: str, detect: DetectConfig
) -> list[list[dict[str, any]]]:
    """Build a grid of expected region sizes for a camera."""
    # get grid from db if available
    try:
        regions: Regions = Regions.select().where(Regions.camera == name).get()
        grid = regions.grid
        last_update = regions.last_update
    except DoesNotExist:
        grid = []
        for x in range(GRID_SIZE):
            row = []
            for y in range(GRID_SIZE):
                row.append({"sizes": []})
            grid.append(row)
        last_update = 0

    # get events for timeline entries
    events = (
        Event.select(Event.id)
        .where(Event.camera == name)
        .where((Event.false_positive == None) | (Event.false_positive == False))
        .where(Event.start_time > last_update)
    )
    valid_event_ids = [e["id"] for e in events.dicts()]
    logger.debug(f"Found {len(valid_event_ids)} new events for {name}")

    # no new events, return as is
    if not valid_event_ids:
        return grid

    new_update = datetime.datetime.now().timestamp()
    timeline = (
        Timeline.select(
            *[
                Timeline.camera,
                Timeline.source,
                Timeline.data,
            ]
        )
        .where(Timeline.source_id << valid_event_ids)
        .limit(10000)
        .dicts()
    )

    logger.debug(f"Found {len(timeline)} new entries for {name}")

    width = detect.width
    height = detect.height

    for t in timeline:
        if t.get("source") != "tracked_object":
            continue

        box = t["data"]["box"]

        # calculate centroid position
        x = box[0] + (box[2] / 2)
        y = box[1] + (box[3] / 2)

        x_pos = int(x * GRID_SIZE)
        y_pos = int(y * GRID_SIZE)

        calculated_region = calculate_region(
            (height, width),
            box[0] * width,
            box[1] * height,
            (box[0] + box[2]) * width,
            (box[1] + box[3]) * height,
            320,
            1.35,
        )
        # save width of region to grid as relative
        grid[x_pos][y_pos]["sizes"].append(
            (calculated_region[2] - calculated_region[0]) / width
        )

    for x in range(GRID_SIZE):
        for y in range(GRID_SIZE):
            cell = grid[x][y]

            if len(cell["sizes"]) == 0:
                continue

            std_dev = np.std(cell["sizes"])
            mean = np.mean(cell["sizes"])
            logger.debug(f"std dev: {std_dev} mean: {mean}")
            cell["x"] = x
            cell["y"] = y
            cell["std_dev"] = std_dev
            cell["mean"] = mean

    # update db with new grid
    region = {
        Regions.camera: name,
        Regions.grid: grid,
        Regions.last_update: new_update,
    }
    (
        Regions.insert(region)
        .on_conflict(
            conflict_target=[Regions.camera],
            update=region,
        )
        .execute()
    )

    return grid


def get_cluster_region_from_grid(frame_shape, min_region, cluster, boxes, region_grid):
    min_x = frame_shape[1]
    min_y = frame_shape[0]
    max_x = 0
    max_y = 0
    for b in cluster:
        min_x = min(boxes[b][0], min_x)
        min_y = min(boxes[b][1], min_y)
        max_x = max(boxes[b][2], max_x)
        max_y = max(boxes[b][3], max_y)
    return get_region_from_grid(
        frame_shape, [min_x, min_y, max_x, max_y], min_region, region_grid
    )


def get_region_from_grid(
    frame_shape: tuple[int],
    cluster: list[int],
    min_region: int,
    region_grid: list[list[dict[str, any]]],
) -> list[int]:
    """Get a region for a box based on the region grid."""
    box = calculate_region(
        frame_shape, cluster[0], cluster[1], cluster[2], cluster[3], min_region
    )
    centroid = (
        box[0] + (min(frame_shape[1], box[2]) - box[0]) / 2,
        box[1] + (min(frame_shape[0], box[3]) - box[1]) / 2,
    )
    grid_x = int(centroid[0] / frame_shape[1] * GRID_SIZE)
    grid_y = int(centroid[1] / frame_shape[0] * GRID_SIZE)

    cell = region_grid[grid_x][grid_y]

    # if there is no known data, get standard region for motion box
    if not cell or not cell["sizes"]:
        return calculate_region(frame_shape, box[0], box[1], box[2], box[3], min_region)

    # convert the calculated region size to relative
    calc_size = (box[2] - box[0]) / frame_shape[1]

    # if region is within expected size, don't resize
    if (
        (cell["mean"] - cell["std_dev"])
        <= calc_size
        <= (cell["mean"] + cell["std_dev"])
    ):
        return box
    # TODO not sure how to handle case where cluster is larger than expected region
    elif calc_size > (cell["mean"] + cell["std_dev"]):
        return box

    size = cell["mean"] * frame_shape[1]

    # get region based on grid size
    return calculate_region(
        frame_shape,
        max(0, centroid[0] - size / 2),
        max(0, centroid[1] - size / 2),
        min(frame_shape[1], centroid[0] + size / 2),
        min(frame_shape[0], centroid[1] + size / 2),
        min_region,
    )


def is_object_filtered(obj, objects_to_track, object_filters):
    object_name = obj[0]
    object_score = obj[1]
    object_box = obj[2]
    object_area = obj[3]
    object_ratio = obj[4]

    if object_name not in objects_to_track:
        return True

    if object_name in object_filters:
        obj_settings = object_filters[object_name]

        # if the min area is larger than the
        # detected object, don't add it to detected objects
        if obj_settings.min_area > object_area:
            return True

        # if the detected object is larger than the
        # max area, don't add it to detected objects
        if obj_settings.max_area < object_area:
            return True

        # if the score is lower than the min_score, skip
        if obj_settings.min_score > object_score:
            return True

        # if the object is not proportionally wide enough
        if obj_settings.min_ratio > object_ratio:
            return True

        # if the object is proportionally too wide
        if obj_settings.max_ratio < object_ratio:
            return True

        if obj_settings.mask is not None:
            # compute the coordinates of the object and make sure
            # the location isn't outside the bounds of the image (can happen from rounding)
            object_xmin = object_box[0]
            object_xmax = object_box[2]
            object_ymax = object_box[3]
            y_location = min(int(object_ymax), len(obj_settings.mask) - 1)
            x_location = min(
                int((object_xmax + object_xmin) / 2.0),
                len(obj_settings.mask[0]) - 1,
            )

            # if the object is in a masked location, don't add it to detected objects
            if obj_settings.mask[y_location][x_location] == 0:
                return True

    return False


def get_min_region_size(model_config: ModelConfig) -> int:
    """Get the min region size."""
    return max(model_config.height, model_config.width)


def create_tensor_input(frame, model_config: ModelConfig, region):
    if model_config.input_pixel_format == PixelFormatEnum.rgb:
        cropped_frame = yuv_region_2_rgb(frame, region)
    elif model_config.input_pixel_format == PixelFormatEnum.bgr:
        cropped_frame = yuv_region_2_bgr(frame, region)
    else:
        cropped_frame = yuv_region_2_yuv(frame, region)

    # Resize if needed
    if cropped_frame.shape != (model_config.height, model_config.width, 3):
        cropped_frame = cv2.resize(
            cropped_frame,
            dsize=(model_config.width, model_config.height),
            interpolation=cv2.INTER_LINEAR,
        )

    # Expand dimensions since the model expects images to have shape: [1, height, width, 3]
    return np.expand_dims(cropped_frame, axis=0)


def box_overlaps(b1, b2):
    if b1[2] < b2[0] or b1[0] > b2[2] or b1[1] > b2[3] or b1[3] < b2[1]:
        return False
    return True


def box_inside(b1, b2):
    # check if b2 is inside b1
    if b2[0] >= b1[0] and b2[1] >= b1[1] and b2[2] <= b1[2] and b2[3] <= b1[3]:
        return True
    return False


def reduce_boxes(boxes, iou_threshold=0.0):
    clusters = []

    for box in boxes:
        matched = 0
        for cluster in clusters:
            if intersection_over_union(box, cluster) > iou_threshold:
                matched = 1
                cluster[0] = min(cluster[0], box[0])
                cluster[1] = min(cluster[1], box[1])
                cluster[2] = max(cluster[2], box[2])
                cluster[3] = max(cluster[3], box[3])

        if not matched:
            clusters.append(list(box))

    return [tuple(c) for c in clusters]


def intersects_any(box_a, boxes):
    for box in boxes:
        if box_overlaps(box_a, box):
            return True
    return False


def inside_any(box_a, boxes):
    for box in boxes:
        # check if box_a is inside of box
        if box_inside(box, box_a):
            return True
    return False


def get_cluster_boundary(box, min_region):
    # compute the max region size for the current box (box is 10% of region)
    box_width = box[2] - box[0]
    box_height = box[3] - box[1]
    max_region_area = abs(box_width * box_height) / 0.1
    max_region_size = max(min_region, int(math.sqrt(max_region_area)))

    centroid = (box_width / 2 + box[0], box_height / 2 + box[1])

    max_x_dist = int(max_region_size - box_width / 2 * 1.1)
    max_y_dist = int(max_region_size - box_height / 2 * 1.1)

    return [
        int(centroid[0] - max_x_dist),
        int(centroid[1] - max_y_dist),
        int(centroid[0] + max_x_dist),
        int(centroid[1] + max_y_dist),
    ]


def get_cluster_candidates(frame_shape, min_region, boxes):
    # and create a cluster of other boxes using it's max region size
    # only include boxes where the region is an appropriate(except the region could possibly be smaller?)
    # size in the cluster. in order to be in the cluster, the furthest corner needs to be within x,y offset
    # determined by the max_region size minus half the box + 20%
    # TODO: see if we can do this with numpy
    cluster_candidates = []
    used_boxes = []
    # loop over each box
    for current_index, b in enumerate(boxes):
        if current_index in used_boxes:
            continue
        cluster = [current_index]
        used_boxes.append(current_index)
        cluster_boundary = get_cluster_boundary(b, min_region)
        # find all other boxes that fit inside the boundary
        for compare_index, compare_box in enumerate(boxes):
            if compare_index in used_boxes:
                continue

            # if the box is not inside the potential cluster area, cluster them
            if not box_inside(cluster_boundary, compare_box):
                continue

            # get the region if you were to add this box to the cluster
            potential_cluster = cluster + [compare_index]
            cluster_region = get_cluster_region(
                frame_shape, min_region, potential_cluster, boxes
            )
            # if region could be smaller and either box would be too small
            # for the resulting region, dont cluster
            should_cluster = True
            if (cluster_region[2] - cluster_region[0]) > min_region:
                for b in potential_cluster:
                    box = boxes[b]
                    # boxes should be more than 5% of the area of the region
                    if area(box) / area(cluster_region) < 0.05:
                        should_cluster = False
                        break

            if should_cluster:
                cluster.append(compare_index)
                used_boxes.append(compare_index)
        cluster_candidates.append(cluster)

    # return the unique clusters only
    unique = {tuple(sorted(c)) for c in cluster_candidates}
    return [list(tup) for tup in unique]


def get_cluster_region(frame_shape, min_region, cluster, boxes):
    min_x = frame_shape[1]
    min_y = frame_shape[0]
    max_x = 0
    max_y = 0
    for b in cluster:
        min_x = min(boxes[b][0], min_x)
        min_y = min(boxes[b][1], min_y)
        max_x = max(boxes[b][2], max_x)
        max_y = max(boxes[b][3], max_y)
    return calculate_region(
        frame_shape, min_x, min_y, max_x, max_y, min_region, multiplier=1.2
    )


def get_startup_regions(
    frame_shape: tuple[int],
    region_min_size: int,
    region_grid: list[list[dict[str, any]]],
) -> list[list[int]]:
    """Get a list of regions to run on startup."""
    # return 8 most popular regions for the camera
    all_cells = np.concatenate(region_grid).flat
    startup_cells = sorted(all_cells, key=lambda c: len(c["sizes"]), reverse=True)[0:8]
    regions = []

    for cell in startup_cells:
        # rest of the cells are empty
        if not cell["sizes"]:
            break

        x = frame_shape[1] / GRID_SIZE * (0.5 + cell["x"])
        y = frame_shape[0] / GRID_SIZE * (0.5 + cell["y"])
        size = cell["mean"] * frame_shape[1]
        regions.append(
            calculate_region(
                frame_shape,
                x - size / 2,
                y - size / 2,
                x + size / 2,
                y + size / 2,
                region_min_size,
                multiplier=1,
            )
        )

    return regions


def reduce_detections(
    frame_shape: tuple[int],
    all_detections: list[tuple[any]],
) -> list[tuple[any]]:
    """Take a list of detections and reduce overlaps to create a list of confident detections."""

    def reduce_overlapping_detections(detections: list[tuple[any]]) -> list[tuple[any]]:
        """apply non-maxima suppression to suppress weak, overlapping bounding boxes."""
        detected_object_groups = defaultdict(lambda: [])
        for detection in detections:
            detected_object_groups[detection[0]].append(detection)

        selected_objects = []
        for group in detected_object_groups.values():
            # o[2] is the box of the object: xmin, ymin, xmax, ymax
            # apply max/min to ensure values do not exceed the known frame size
            boxes = [
                (
                    o[2][0],
                    o[2][1],
                    o[2][2] - o[2][0],
                    o[2][3] - o[2][1],
                )
                for o in group
            ]

            # reduce confidences for objects that are on edge of region
            # 0.6 should be used to ensure that the object is still considered and not dropped
            # due to min score requirement of NMSBoxes
            confidences = [0.6 if clipped(o, frame_shape) else o[1] for o in group]

            idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

            # add objects
            for index in idxs:
                index = index if isinstance(index, np.int32) else index[0]
                obj = group[index]
                selected_objects.append(obj)

        # set the detections list to only include top objects
        return selected_objects

    def get_consolidated_object_detections(detections: list[tuple[any]]):
        """Drop detections that overlap too much."""
        detected_object_groups = defaultdict(lambda: [])
        for detection in detections:
            detected_object_groups[detection[0]].append(detection)

        consolidated_detections = []
        for group in detected_object_groups.values():
            # if the group only has 1 item, skip
            if len(group) == 1:
                consolidated_detections.append(group[0])
                continue

            # sort smallest to largest by area
            sorted_by_area = sorted(group, key=lambda g: g[3])

            for current_detection_idx in range(0, len(sorted_by_area)):
                current_detection = sorted_by_area[current_detection_idx]
                current_label = current_detection[0]
                current_box = current_detection[2]
                overlap = 0
                for to_check_idx in range(
                    min(current_detection_idx + 1, len(sorted_by_area)),
                    len(sorted_by_area),
                ):
                    to_check = sorted_by_area[to_check_idx][2]

                    # if area of current detection / area of check < 5% they should not be compared
                    # this covers cases where a large car parked in a driveway doesn't block detections
                    # of cars in the street behind it
                    if area(current_box) / area(to_check) < 0.05:
                        continue

                    intersect_box = intersection(current_box, to_check)
                    # if % of smaller detection is inside of another detection, consolidate
                    if intersect_box is not None and area(intersect_box) / area(
                        current_box
                    ) > LABEL_CONSOLIDATION_MAP.get(
                        current_label, LABEL_CONSOLIDATION_DEFAULT
                    ):
                        overlap = 1
                        break
                if overlap == 0:
                    consolidated_detections.append(
                        sorted_by_area[current_detection_idx]
                    )

        return consolidated_detections

    return get_consolidated_object_detections(
        reduce_overlapping_detections(all_detections)
    )