Improve motion detection and region selection (#6741)

* refactor existing motion detector * implement and use cnt bgsub * pass fps to motion detector * create a simplified motion detector * lightning detection * update default motion config * lint imports * use estimated boxes for regions * use improved motion detector * update test * use a different strategy for clustering motion and object boxes * increase alpha during calibration * simplify object consolidation * add some reasonable constraints to the estimated box * adjust cluster boundary to 10% * refactor * add disabled debug code * fix variable scope
2025-10-13 11:16:29 +02:00 · 2023-06-11 09:45:11 -04:00 · 2023-06-11 09:45:11 -04:00 · d81dd60fef
commit d81dd60fef
parent 32569842d3
10 changed files with 693 additions and 125 deletions
--- a/benchmark_motion.py
+++ b/benchmark_motion.py
@ -0,0 +1,103 @@
+import datetime
+import multiprocessing as mp
+import os
+from statistics import mean
+
+import cv2
+import numpy as np
+
+from frigate.config import MotionConfig
+from frigate.motion.frigate_motion import FrigateMotionDetector
+from frigate.motion.improved_motion import ImprovedMotionDetector
+
+# get info on the video
+# cap = cv2.VideoCapture("debug/front_cam_2023_05_23_08_41__2023_05_23_08_43.mp4")
+# cap = cv2.VideoCapture("debug/motion_test_clips/rain_1.mp4")
+cap = cv2.VideoCapture("debug/motion_test_clips/ir_off.mp4")
+# cap = cv2.VideoCapture("airport.mp4")
+width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+fps = cap.get(cv2.CAP_PROP_FPS)
+frame_shape = (height, width, 3)
+
+# create the motion config
+motion_config = MotionConfig()
+motion_config.mask = np.zeros((height, width), np.uint8)
+motion_config.mask[:] = 255
+motion_config.improve_contrast = 1
+motion_config.frame_alpha = 0.02
+motion_config.threshold = 40
+motion_config.contour_area = 15
+save_images = True
+
+# create motion detectors
+frigate_motion_detector = FrigateMotionDetector(
+    frame_shape=frame_shape,
+    config=motion_config,
+    fps=fps,
+    improve_contrast=mp.Value("i", motion_config.improve_contrast),
+    threshold=mp.Value("i", motion_config.threshold),
+    contour_area=mp.Value("i", motion_config.contour_area),
+)
+frigate_motion_detector.save_images = save_images
+
+improved_motion_detector = ImprovedMotionDetector(
+    frame_shape=frame_shape,
+    config=motion_config,
+    fps=fps,
+    improve_contrast=mp.Value("i", motion_config.improve_contrast),
+    threshold=mp.Value("i", motion_config.threshold),
+    contour_area=mp.Value("i", motion_config.contour_area),
+)
+improved_motion_detector.save_images = save_images
+
+# read and process frames
+frame_times = {"frigate": [], "improved": []}
+ret, frame = cap.read()
+frame_counter = 1
+while ret:
+    yuv_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2YUV_I420)
+
+    start_frame = datetime.datetime.now().timestamp()
+    frigate_motion_detector.detect(yuv_frame)
+    frame_times["frigate"].append(datetime.datetime.now().timestamp() - start_frame)
+
+    start_frame = datetime.datetime.now().timestamp()
+    improved_motion_detector.detect(yuv_frame)
+    frame_times["improved"].append(datetime.datetime.now().timestamp() - start_frame)
+
+    frigate_frame = f"debug/frames/frigate-{frame_counter}.jpg"
+    improved_frame = f"debug/frames/improved-{frame_counter}.jpg"
+    if os.path.exists(frigate_frame) and os.path.exists(improved_frame):
+        image_row_1 = cv2.hconcat(
+            [
+                cv2.imread(frigate_frame),
+                cv2.imread(improved_frame),
+            ]
+        )
+
+        image_row_2 = cv2.resize(
+            frame,
+            dsize=(
+                frigate_motion_detector.motion_frame_size[1] * 2,
+                frigate_motion_detector.motion_frame_size[0] * 2,
+            ),
+            interpolation=cv2.INTER_LINEAR,
+        )
+
+        cv2.imwrite(
+            f"debug/frames/all-{frame_counter}.jpg",
+            cv2.vconcat([image_row_1, image_row_2]),
+        )
+        os.unlink(frigate_frame)
+        os.unlink(improved_frame)
+    frame_counter += 1
+
+    ret, frame = cap.read()
+
+cap.release()
+
+print("Frigate Motion Detector")
+print(f"Average frame processing time: {mean(frame_times['frigate'])*1000:.2f}ms")
+print("Improved Motion Detector")
+print(f"Average frame processing time: {mean(frame_times['improved'])*1000:.2f}ms")
--- a/docs/docs/configuration/index.md
+++ b/docs/docs/configuration/index.md
@ -261,27 +261,29 @@ motion:
  # Optional: The threshold passed to cv2.threshold to determine if a pixel is different enough to be counted as motion. (default: shown below)
  # Increasing this value will make motion detection less sensitive and decreasing it will make motion detection more sensitive.
  # The value should be between 1 and 255.
-  threshold: 25
-  # Optional: Minimum size in pixels in the resized motion image that counts as motion (default: 30)
+  threshold: 40
+  # Optional: The percentage of the image used to detect lightning or other substantial changes where motion detection
+  #           needs to recalibrate. (default: shown below)
+  # Increasing this value will make motion detection more likely to consider lightning or ir mode changes as valid motion.
+  # Decreasing this value will make motion detection more likely to ignore large amounts of motion such as a person approaching
+  # a doorbell camera.
+  lightning_threshold: 0.8
+  # Optional: Minimum size in pixels in the resized motion image that counts as motion (default: shown below)
  # Increasing this value will prevent smaller areas of motion from being detected. Decreasing will
  # make motion detection more sensitive to smaller moving objects.
  # As a rule of thumb:
  #  - 15 - high sensitivity
  #  - 30 - medium sensitivity
  #  - 50 - low sensitivity
-  contour_area: 30
-  # Optional: Alpha value passed to cv2.accumulateWeighted when averaging the motion delta across multiple frames (default: shown below)
-  # Higher values mean the current frame impacts the delta a lot, and a single raindrop may register as motion.
-  # Too low and a fast moving person wont be detected as motion.
-  delta_alpha: 0.2
+  contour_area: 15
  # Optional: Alpha value passed to cv2.accumulateWeighted when averaging frames to determine the background (default: shown below)
  # Higher values mean the current frame impacts the average a lot, and a new object will be averaged into the background faster.
  # Low values will cause things like moving shadows to be detected as motion for longer.
  # https://www.geeksforgeeks.org/background-subtraction-in-an-image-using-concept-of-running-average/
-  frame_alpha: 0.2
+  frame_alpha: 0.02
  # Optional: Height of the resized motion frame  (default: 50)
-  # This operates as an efficient blur alternative. Higher values will result in more granular motion detection at the expense
-  # of higher CPU usage. Lower values result in less CPU, but small changes may not register as motion.
+  # Higher values will result in more granular motion detection at the expense of higher CPU usage.
+  # Lower values result in less CPU, but small changes may not register as motion.
  frame_height: 50
  # Optional: motion mask
  # NOTE: see docs for more detailed info on creating masks
@ -289,7 +291,7 @@ motion:
  # Optional: improve contrast (default: shown below)
  # Enables dynamic contrast improvement. This should help improve night detections at the cost of making motion detection more sensitive
  # for daytime.
-  improve_contrast: False
+  improve_contrast: True
  # Optional: Delay when updating camera motion through MQTT from ON -> OFF (default: shown below).
  mqtt_off_delay: 30

--- a/frigate/config.py
+++ b/frigate/config.py
@ -189,15 +189,18 @@ class RecordConfig(FrigateBaseModel):

 class MotionConfig(FrigateBaseModel):
    threshold: int = Field(
-        default=25,
+        default=40,
        title="Motion detection threshold (1-255).",
        ge=1,
        le=255,
    )
-    improve_contrast: bool = Field(default=False, title="Improve Contrast")
-    contour_area: Optional[int] = Field(default=30, title="Contour Area")
+    lightning_threshold: float = Field(
+        default=0.8, title="Lightning detection threshold (0.3-1.0).", ge=0.3, le=1.0
+    )
+    improve_contrast: bool = Field(default=True, title="Improve Contrast")
+    contour_area: Optional[int] = Field(default=15, title="Contour Area")
    delta_alpha: float = Field(default=0.2, title="Delta Alpha")
-    frame_alpha: float = Field(default=0.2, title="Frame Alpha")
+    frame_alpha: float = Field(default=0.02, title="Frame Alpha")
    frame_height: Optional[int] = Field(default=50, title="Frame Height")
    mask: Union[str, List[str]] = Field(
        default="", title="Coordinates polygon for the motion mask."
--- a/frigate/motion/init.py
+++ b/frigate/motion/init.py
@ -0,0 +1,22 @@
+from abc import ABC, abstractmethod
+from typing import Tuple
+
+from frigate.config import MotionConfig
+
+
+class MotionDetector(ABC):
+    @abstractmethod
+    def __init__(
+        self,
+        frame_shape: Tuple[int, int, int],
+        config: MotionConfig,
+        fps: int,
+        improve_contrast,
+        threshold,
+        contour_area,
+    ):
+        pass
+
+    @abstractmethod
+    def detect(self, frame):
+        pass
--- a/frigate/motion/frigate_motion.py
+++ b/frigate/motion/frigate_motion.py
@ -3,16 +3,18 @@ import imutils
 import numpy as np

 from frigate.config import MotionConfig
+from frigate.motion import MotionDetector


-class MotionDetector:
+class FrigateMotionDetector(MotionDetector):
    def __init__(
        self,
        frame_shape,
        config: MotionConfig,
-        improve_contrast_enabled,
-        motion_threshold,
-        motion_contour_area,
+        fps: int,
+        improve_contrast,
+        threshold,
+        contour_area,
    ):
        self.config = config
        self.frame_shape = frame_shape
@ -32,9 +34,9 @@ class MotionDetector:
        )
        self.mask = np.where(resized_mask == [0])
        self.save_images = False
-        self.improve_contrast = improve_contrast_enabled
-        self.threshold = motion_threshold
-        self.contour_area = motion_contour_area
+        self.improve_contrast = improve_contrast
+        self.threshold = threshold
+        self.contour_area = contour_area

    def detect(self, frame):
        motion_boxes = []
@ -130,18 +132,10 @@ class MotionDetector:
                            (0, 0, 255),
                            2,
                        )
-                # print("--------")
-                image_row_1 = cv2.hconcat(
-                    [
-                        cv2.cvtColor(frameDelta, cv2.COLOR_GRAY2BGR),
-                        cv2.cvtColor(avg_delta_image, cv2.COLOR_GRAY2BGR),
-                    ]
+
+                cv2.imwrite(
+                    f"debug/frames/frigate-{self.frame_counter}.jpg", thresh_dilated
                )
-                image_row_2 = cv2.hconcat(
-                    [cv2.cvtColor(thresh, cv2.COLOR_GRAY2BGR), thresh_dilated]
-                )
-                combined_image = cv2.vconcat([image_row_1, image_row_2])
-                cv2.imwrite(f"motion/motion-{self.frame_counter}.jpg", combined_image)

        if len(motion_boxes) > 0:
            self.motion_frame_count += 1
--- a/frigate/motion/improved_motion.py
+++ b/frigate/motion/improved_motion.py
@ -0,0 +1,143 @@
+import cv2
+import imutils
+import numpy as np
+
+from frigate.config import MotionConfig
+from frigate.motion import MotionDetector
+
+
+class ImprovedMotionDetector(MotionDetector):
+    def __init__(
+        self,
+        frame_shape,
+        config: MotionConfig,
+        fps: int,
+        improve_contrast,
+        threshold,
+        contour_area,
+    ):
+        self.config = config
+        self.frame_shape = frame_shape
+        self.resize_factor = frame_shape[0] / config.frame_height
+        self.motion_frame_size = (
+            config.frame_height,
+            config.frame_height * frame_shape[1] // frame_shape[0],
+        )
+        self.avg_frame = np.zeros(self.motion_frame_size, np.float32)
+        self.avg_delta = np.zeros(self.motion_frame_size, np.float32)
+        self.motion_frame_count = 0
+        self.frame_counter = 0
+        resized_mask = cv2.resize(
+            config.mask,
+            dsize=(self.motion_frame_size[1], self.motion_frame_size[0]),
+            interpolation=cv2.INTER_LINEAR,
+        )
+        self.mask = np.where(resized_mask == [0])
+        self.save_images = False
+        self.calibrating = True
+        self.improve_contrast = improve_contrast
+        self.threshold = threshold
+        self.contour_area = contour_area
+
+    def detect(self, frame):
+        motion_boxes = []
+
+        gray = frame[0 : self.frame_shape[0], 0 : self.frame_shape[1]]
+
+        # resize frame
+        resized_frame = cv2.resize(
+            gray,
+            dsize=(self.motion_frame_size[1], self.motion_frame_size[0]),
+            interpolation=cv2.INTER_LINEAR,
+        )
+
+        resized_frame = cv2.GaussianBlur(resized_frame, (3, 3), cv2.BORDER_DEFAULT)
+
+        # Improve contrast
+        if self.improve_contrast.value:
+            resized_frame = cv2.equalizeHist(resized_frame)
+
+        # mask frame
+        resized_frame[self.mask] = [255]
+
+        if self.save_images or self.calibrating:
+            self.frame_counter += 1
+        # compare to average
+        frameDelta = cv2.absdiff(resized_frame, cv2.convertScaleAbs(self.avg_frame))
+
+        # compute the threshold image for the current frame
+        thresh = cv2.threshold(
+            frameDelta, self.threshold.value, 255, cv2.THRESH_BINARY
+        )[1]
+
+        # dilate the thresholded image to fill in holes, then find contours
+        # on thresholded image
+        thresh_dilated = cv2.dilate(thresh, None, iterations=1)
+        cnts = cv2.findContours(
+            thresh_dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+        )
+        cnts = imutils.grab_contours(cnts)
+
+        # loop over the contours
+        total_contour_area = 0
+        for c in cnts:
+            # if the contour is big enough, count it as motion
+            contour_area = cv2.contourArea(c)
+            total_contour_area += contour_area
+            if contour_area > self.contour_area.value:
+                x, y, w, h = cv2.boundingRect(c)
+                motion_boxes.append(
+                    (
+                        int(x * self.resize_factor),
+                        int(y * self.resize_factor),
+                        int((x + w) * self.resize_factor),
+                        int((y + h) * self.resize_factor),
+                    )
+                )
+
+        pct_motion = total_contour_area / (
+            self.motion_frame_size[0] * self.motion_frame_size[1]
+        )
+
+        # once the motion drops to less than 1% for the first time, assume its calibrated
+        if pct_motion < 0.01:
+            self.calibrating = False
+
+        # if calibrating or the motion contours are > 80% of the image area (lightning, ir, ptz) recalibrate
+        if self.calibrating or pct_motion > self.config.lightning_threshold:
+            motion_boxes = []
+            self.calibrating = True
+
+        if self.save_images:
+            thresh_dilated = cv2.cvtColor(thresh_dilated, cv2.COLOR_GRAY2BGR)
+            for b in motion_boxes:
+                cv2.rectangle(
+                    thresh_dilated,
+                    (int(b[0] / self.resize_factor), int(b[1] / self.resize_factor)),
+                    (int(b[2] / self.resize_factor), int(b[3] / self.resize_factor)),
+                    (0, 0, 255),
+                    2,
+                )
+            cv2.imwrite(
+                f"debug/frames/improved-{self.frame_counter}.jpg", thresh_dilated
+            )
+
+        if len(motion_boxes) > 0:
+            self.motion_frame_count += 1
+            if self.motion_frame_count >= 10:
+                # only average in the current frame if the difference persists for a bit
+                cv2.accumulateWeighted(
+                    resized_frame,
+                    self.avg_frame,
+                    0.2 if self.calibrating else self.config.frame_alpha,
+                )
+        else:
+            # when no motion, just keep averaging the frames together
+            cv2.accumulateWeighted(
+                resized_frame,
+                self.avg_frame,
+                0.2 if self.calibrating else self.config.frame_alpha,
+            )
+            self.motion_frame_count = 0
+
+        return motion_boxes
--- a/frigate/test/test_config.py
+++ b/frigate/test/test_config.py
@ -758,7 +758,7 @@ class TestConfig(unittest.TestCase):
        assert config == frigate_config.dict(exclude_unset=True)

        runtime_config = frigate_config.runtime_config()
-        assert round(runtime_config.cameras["back"].motion.contour_area) == 30
+        assert round(runtime_config.cameras["back"].motion.contour_area) == 15

    def test_merge_labelmap(self):
        config = {
--- a/frigate/test/test_video.py
+++ b/frigate/test/test_video.py
@ -0,0 +1,178 @@
+import unittest
+
+import cv2
+import numpy as np
+from norfair.drawing.color import Palette
+from norfair.drawing.drawer import Drawer
+
+from frigate.video import (
+    get_cluster_boundary,
+    get_cluster_candidates,
+    get_cluster_region,
+)
+
+
+def draw_box(frame, box, color=(255, 0, 0), thickness=2):
+    cv2.rectangle(
+        frame,
+        (box[0], box[1]),
+        (box[2], box[3]),
+        color,
+        thickness,
+    )
+
+
+def save_clusters_image(name, boxes, candidates, regions=[]):
+    canvas = np.zeros((1000, 2000, 3), np.uint8)
+    for cluster in candidates:
+        color = Palette.choose_color(np.random.rand())
+        for b in cluster:
+            box = boxes[b]
+            draw_box(canvas, box, color, 2)
+            # bottom right
+            text_anchor = (
+                box[2],
+                box[3],
+            )
+            canvas = Drawer.text(
+                canvas,
+                str(b),
+                position=text_anchor,
+                size=None,
+                color=(255, 255, 255),
+                thickness=None,
+            )
+    for r in regions:
+        draw_box(canvas, r, (0, 255, 0), 2)
+    cv2.imwrite(
+        f"debug/frames/{name}.jpg",
+        canvas,
+    )
+
+
+def save_cluster_boundary_image(name, boxes, bounding_boxes):
+    canvas = np.zeros((1000, 2000, 3), np.uint8)
+    color = Palette.choose_color(np.random.rand())
+    for box in boxes:
+        draw_box(canvas, box, color, 2)
+    for bound in bounding_boxes:
+        draw_box(canvas, bound, (0, 255, 0), 2)
+    cv2.imwrite(
+        f"debug/frames/{name}.jpg",
+        canvas,
+    )
+
+
+class TestConfig(unittest.TestCase):
+    def setUp(self):
+        self.frame_shape = (1000, 2000)
+        self.min_region_size = 160
+
+    def test_cluster_candidates(self):
+        boxes = [(100, 100, 200, 200), (202, 150, 252, 200), (900, 900, 950, 950)]
+
+        cluster_candidates = get_cluster_candidates(
+            self.frame_shape, self.min_region_size, boxes
+        )
+
+        # save_clusters_image("cluster_candidates", boxes, cluster_candidates)
+
+        assert len(cluster_candidates) == 2
+
+    def test_cluster_boundary(self):
+        boxes = [(100, 100, 200, 200), (215, 215, 325, 325)]
+        boundary_boxes = [
+            get_cluster_boundary(box, self.min_region_size) for box in boxes
+        ]
+
+        # save_cluster_boundary_image("bound", boxes, boundary_boxes)
+        assert len(boundary_boxes) == 2
+
+    def test_cluster_regions(self):
+        boxes = [(100, 100, 200, 200), (202, 150, 252, 200), (900, 900, 950, 950)]
+
+        cluster_candidates = get_cluster_candidates(
+            self.frame_shape, self.min_region_size, boxes
+        )
+
+        regions = [
+            get_cluster_region(self.frame_shape, self.min_region_size, candidate, boxes)
+            for candidate in cluster_candidates
+        ]
+
+        # save_clusters_image("regions", boxes, cluster_candidates, regions)
+        assert len(regions) == 2
+
+    def test_box_too_small_for_cluster(self):
+        boxes = [(100, 100, 600, 600), (655, 100, 700, 145)]
+
+        cluster_candidates = get_cluster_candidates(
+            self.frame_shape, self.min_region_size, boxes
+        )
+
+        regions = [
+            get_cluster_region(self.frame_shape, self.min_region_size, candidate, boxes)
+            for candidate in cluster_candidates
+        ]
+
+        save_clusters_image("too_small", boxes, cluster_candidates, regions)
+
+        assert len(cluster_candidates) == 2
+        assert len(regions) == 2
+
+    def test_redundant_clusters(self):
+        boxes = [(100, 100, 200, 200), (305, 305, 415, 415)]
+
+        cluster_candidates = get_cluster_candidates(
+            self.frame_shape, self.min_region_size, boxes
+        )
+
+        regions = [
+            get_cluster_region(self.frame_shape, self.min_region_size, candidate, boxes)
+            for candidate in cluster_candidates
+        ]
+
+        # save_clusters_image("redundant", boxes, cluster_candidates, regions)
+
+        assert len(cluster_candidates) == 2
+        assert all([len(c) == 1 for c in cluster_candidates])
+        assert len(regions) == 2
+
+    def test_combine_boxes(self):
+        boxes = [
+            (460, 0, 561, 144),
+            (565, 0, 586, 71),
+        ]
+
+        # boundary_boxes = [get_cluster_boundary(box) for box in boxes]
+        # save_cluster_boundary_image("combine_bound", boxes, boundary_boxes)
+
+        cluster_candidates = get_cluster_candidates(
+            self.frame_shape, self.min_region_size, boxes
+        )
+
+        regions = [
+            get_cluster_region(self.frame_shape, self.min_region_size, candidate, boxes)
+            for candidate in cluster_candidates
+        ]
+
+        # save_clusters_image("combine", boxes, cluster_candidates, regions)
+        assert len(regions) == 1
+
+    def test_dont_combine_boxes(self):
+        boxes = [(460, 0, 532, 129), (586, 0, 606, 46)]
+
+        # boundary_boxes = [get_cluster_boundary(box) for box in boxes]
+        # save_cluster_boundary_image("dont_combine_bound", boxes, boundary_boxes)
+
+        cluster_candidates = get_cluster_candidates(
+            self.frame_shape, self.min_region_size, boxes
+        )
+
+        regions = [
+            get_cluster_region(self.frame_shape, self.min_region_size, candidate, boxes)
+            for candidate in cluster_candidates
+        ]
+
+        # save_clusters_image("dont_combine", boxes, cluster_candidates, regions)
+        assert len(regions) == 2
--- a/frigate/track/norfair_tracker.py
+++ b/frigate/track/norfair_tracker.py
@ -231,16 +231,32 @@ class NorfairTracker(ObjectTracker):
        # update or create new tracks
        active_ids = []
        for t in tracked_objects:
+            estimate = tuple(t.estimate.flatten().astype(int))
+            # keep the estimate within the bounds of the image
+            estimate = (
+                max(0, estimate[0]),
+                max(0, estimate[1]),
+                min(self.detect_config.width - 1, estimate[2]),
+                min(self.detect_config.height - 1, estimate[3]),
+            )
+            obj = {
+                **t.last_detection.data,
+                "estimate": estimate,
+            }
            active_ids.append(t.global_id)
            if t.global_id not in self.track_id_map:
-                self.register(t.global_id, t.last_detection.data)
+                self.register(t.global_id, obj)
            # if there wasn't a detection in this frame, increment disappeared
            elif t.last_detection.data["frame_time"] != frame_time:
                id = self.track_id_map[t.global_id]
                self.disappeared[id] += 1
+                # sometimes the estimate gets way off
+                # only update if the upper left corner is actually upper left
+                if estimate[0] < estimate[2] and estimate[1] < estimate[3]:
+                    self.tracked_objects[id]["estimate"] = obj["estimate"]
            # else update it
            else:
-                self.update(t.global_id, t.last_detection.data)
+                self.update(t.global_id, obj)

        # clear expired tracks
        expired_ids = [k for k in self.track_id_map.keys() if k not in active_ids]
--- a/frigate/video.py
+++ b/frigate/video.py
@ -1,9 +1,9 @@
 import datetime
 import logging
+import math
 import multiprocessing as mp
 import os
 import queue
-import random
 import signal
 import subprocess as sp
 import threading
@ -18,6 +18,7 @@ from frigate.config import CameraConfig, DetectConfig, PixelFormatEnum
 from frigate.const import CACHE_DIR
 from frigate.log import LogPipe
 from frigate.motion import MotionDetector
+from frigate.motion.improved_motion import ImprovedMotionDetector
 from frigate.object_detection import RemoteObjectDetector
 from frigate.track import ObjectTracker
 from frigate.track.norfair_tracker import NorfairTracker
@ -27,7 +28,7 @@ from frigate.util import (
    SharedMemoryFrameManager,
    area,
    calculate_region,
-    clipped,
+    draw_box_with_label,
    intersection,
    intersection_over_union,
    listen,
@ -462,9 +463,10 @@ def track_camera(
    objects_to_track = config.objects.track
    object_filters = config.objects.filters

-    motion_detector = MotionDetector(
+    motion_detector = ImprovedMotionDetector(
        frame_shape,
        config.motion,
+        config.detect.fps,
        improve_contrast_enabled,
        motion_threshold,
        motion_contour_area,
@ -505,6 +507,13 @@ def box_overlaps(b1, b2):
    return True


+def box_inside(b1, b2):
+    # check if b2 is inside b1
+    if b2[0] >= b1[0] and b2[1] >= b1[1] and b2[2] <= b1[2] and b2[3] <= b1[3]:
+        return True
+    return False
+
+
 def reduce_boxes(boxes, iou_threshold=0.0):
    clusters = []

@ -575,6 +584,91 @@ def detect(
    return detections


+def get_cluster_boundary(box, min_region):
+    # compute the max region size for the current box (box is 10% of region)
+    box_width = box[2] - box[0]
+    box_height = box[3] - box[1]
+    max_region_area = abs(box_width * box_height) / 0.1
+    max_region_size = max(min_region, int(math.sqrt(max_region_area)))
+
+    centroid = (box_width / 2 + box[0], box_height / 2 + box[1])
+
+    max_x_dist = int(max_region_size - box_width / 2 * 1.1)
+    max_y_dist = int(max_region_size - box_height / 2 * 1.1)
+
+    return [
+        int(centroid[0] - max_x_dist),
+        int(centroid[1] - max_y_dist),
+        int(centroid[0] + max_x_dist),
+        int(centroid[1] + max_y_dist),
+    ]
+
+
+def get_cluster_candidates(frame_shape, min_region, boxes):
+    # and create a cluster of other boxes using it's max region size
+    # only include boxes where the region is an appropriate(except the region could possibly be smaller?)
+    # size in the cluster. in order to be in the cluster, the furthest corner needs to be within x,y offset
+    # determined by the max_region size minus half the box + 20%
+    # TODO: see if we can do this with numpy
+    cluster_candidates = []
+    used_boxes = []
+    # loop over each box
+    for current_index, b in enumerate(boxes):
+        if current_index in used_boxes:
+            continue
+        cluster = [current_index]
+        used_boxes.append(current_index)
+        cluster_boundary = get_cluster_boundary(b, min_region)
+        # find all other boxes that fit inside the boundary
+        for compare_index, compare_box in enumerate(boxes):
+            if compare_index in used_boxes:
+                continue
+
+            # if the box is not inside the potential cluster area, cluster them
+            if not box_inside(cluster_boundary, compare_box):
+                continue
+
+            # get the region if you were to add this box to the cluster
+            potential_cluster = cluster + [compare_index]
+            cluster_region = get_cluster_region(
+                frame_shape, min_region, potential_cluster, boxes
+            )
+            # if region could be smaller and either box would be too small
+            # for the resulting region, dont cluster
+            should_cluster = True
+            if (cluster_region[2] - cluster_region[0]) > min_region:
+                for b in potential_cluster:
+                    box = boxes[b]
+                    # boxes should be more than 5% of the area of the region
+                    if area(box) / area(cluster_region) < 0.05:
+                        should_cluster = False
+                        break
+
+            if should_cluster:
+                cluster.append(compare_index)
+                used_boxes.append(compare_index)
+        cluster_candidates.append(cluster)
+
+    # return the unique clusters only
+    unique = {tuple(sorted(c)) for c in cluster_candidates}
+    return [list(tup) for tup in unique]
+
+
+def get_cluster_region(frame_shape, min_region, cluster, boxes):
+    min_x = frame_shape[1]
+    min_y = frame_shape[0]
+    max_x = 0
+    max_y = 0
+    for b in cluster:
+        min_x = min(boxes[b][0], min_x)
+        min_y = min(boxes[b][1], min_y)
+        max_x = max(boxes[b][2], max_x)
+        max_y = max(boxes[b][3], max_y)
+    return calculate_region(
+        frame_shape, min_x, min_y, max_x, max_y, min_region, multiplier=1.2
+    )
+
+
 def process_frames(
    camera_name: str,
    frame_queue: mp.Queue,
@ -603,6 +697,8 @@ def process_frames(

    startup_scan_counter = 0

+    region_min_size = int(max(model_config.height, model_config.width) / 2)
+
    while not stop_event.is_set():
        if exit_on_empty and frame_queue.empty():
            logger.info("Exiting track_objects...")
@ -654,35 +750,22 @@ def process_frames(

            # get tracked object boxes that aren't stationary
            tracked_object_boxes = [
-                obj["box"]
+                obj["estimate"]
                for obj in object_tracker.tracked_objects.values()
                if obj["id"] not in stationary_object_ids
            ]

-            # combine motion boxes with known locations of existing objects
-            combined_boxes = reduce_boxes(motion_boxes + tracked_object_boxes)
+            combined_boxes = motion_boxes + tracked_object_boxes

-            region_min_size = max(model_config.height, model_config.width)
-            # compute regions
-            regions = [
-                calculate_region(
-                    frame_shape,
-                    a[0],
-                    a[1],
-                    a[2],
-                    a[3],
-                    region_min_size,
-                    multiplier=random.uniform(1.2, 1.5),
+            cluster_candidates = get_cluster_candidates(
+                frame_shape, region_min_size, combined_boxes
            )
-                for a in combined_boxes
-            ]

-            # consolidate regions with heavy overlap
            regions = [
-                calculate_region(
-                    frame_shape, a[0], a[1], a[2], a[3], region_min_size, multiplier=1.0
+                get_cluster_region(
+                    frame_shape, region_min_size, candidate, combined_boxes
                )
-                for a in reduce_boxes(regions, 0.4)
+                for candidate in cluster_candidates
            ]

            # if starting up, get the next startup scan region
@ -733,13 +816,8 @@ def process_frames(
                )

            #########
-            # merge objects, check for clipped objects and look again up to 4 times
+            # merge objects
            #########
-            refining = len(regions) > 0
-            refine_count = 0
-            while refining and refine_count < 4:
-                refining = False
-
            # group by name
            detected_object_groups = defaultdict(lambda: [])
            for detection in detections:
@ -762,46 +840,15 @@ def process_frames(
                confidences = [o[1] for o in group]
                idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

+                # add objects
                for index in idxs:
                    index = index if isinstance(index, np.int32) else index[0]
                    obj = group[index]
-                        if clipped(obj, frame_shape):
-                            box = obj[2]
-                            # calculate a new region that will hopefully get the entire object
-                            region = calculate_region(
-                                frame_shape,
-                                box[0],
-                                box[1],
-                                box[2],
-                                box[3],
-                                region_min_size,
-                            )
-
-                            regions.append(region)
-
-                            selected_objects.extend(
-                                detect(
-                                    detect_config,
-                                    object_detector,
-                                    frame,
-                                    model_config,
-                                    region,
-                                    objects_to_track,
-                                    object_filters,
-                                )
-                            )
-
-                            refining = True
-                        else:
                    selected_objects.append(obj)

-                # set the detections list to only include top, complete objects
-                # and new detections
+            # set the detections list to only include top objects
            detections = selected_objects

-                if refining:
-                    refine_count += 1
-
            ## drop detections that overlap too much
            consolidated_detections = []

@ -848,7 +895,7 @@ def process_frames(
            else:
                object_tracker.update_frame_times(frame_time)

-        # debug tracking by writing frames
+        # debug object tracking
        if False:
            bgr_frame = cv2.cvtColor(
                frame,
@ -858,7 +905,67 @@ def process_frames(
            cv2.imwrite(
                f"debug/frames/track-{'{:.6f}'.format(frame_time)}.jpg", bgr_frame
            )
+        # debug
+        if False:
+            bgr_frame = cv2.cvtColor(
+                frame,
+                cv2.COLOR_YUV2BGR_I420,
+            )

+            for m_box in motion_boxes:
+                cv2.rectangle(
+                    bgr_frame,
+                    (m_box[0], m_box[1]),
+                    (m_box[2], m_box[3]),
+                    (0, 0, 255),
+                    2,
+                )
+
+            for b in tracked_object_boxes:
+                cv2.rectangle(
+                    bgr_frame,
+                    (b[0], b[1]),
+                    (b[2], b[3]),
+                    (255, 0, 0),
+                    2,
+                )
+
+            for obj in object_tracker.tracked_objects.values():
+                if obj["frame_time"] == frame_time:
+                    thickness = 2
+                    color = model_config.colormap[obj["label"]]
+                else:
+                    thickness = 1
+                    color = (255, 0, 0)
+
+                # draw the bounding boxes on the frame
+                box = obj["box"]
+
+                draw_box_with_label(
+                    bgr_frame,
+                    box[0],
+                    box[1],
+                    box[2],
+                    box[3],
+                    obj["label"],
+                    obj["id"],
+                    thickness=thickness,
+                    color=color,
+                )
+
+            for region in regions:
+                cv2.rectangle(
+                    bgr_frame,
+                    (region[0], region[1]),
+                    (region[2], region[3]),
+                    (0, 255, 0),
+                    2,
+                )
+
+            cv2.imwrite(
+                f"debug/frames/{camera_name}-{'{:.6f}'.format(frame_time)}.jpg",
+                bgr_frame,
+            )
        # add to the queue if not full
        if detected_objects_queue.full():
            frame_manager.delete(f"{camera_name}{frame_time}")