From 8ac4b001a213d02e1c993fcb2ff3696514676c51 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Thu, 17 Oct 2024 10:02:27 -0600
Subject: [PATCH] Various fixes (#14410)

* Fix access

* Reorganize tracked object for imports

* Separate out rockchip build

* Formatting

* Use original ffmpeg build

* Fix build

* Update default search type value
---
 .github/workflows/ci.yml                    |  22 +
 docker/main/install_deps.sh                 |   5 +-
 frigate/api/defs/events_query_parameters.py |   2 +-
 frigate/object_processing.py                | 456 +-------------------
 frigate/ptz/autotrack.py                    |  26 +-
 frigate/test/test_obects.py                 |   4 +-
 frigate/track/object_attribute.py           |  44 --
 frigate/track/tracked_object.py             | 447 +++++++++++++++++++
 frigate/util/image.py                       |  66 +++
 frigate/video.py                            |   6 +-
 10 files changed, 563 insertions(+), 515 deletions(-)
 delete mode 100644 frigate/track/object_attribute.py
 create mode 100644 frigate/track/tracked_object.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bbf47a57d..3a5a67041 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -155,6 +155,28 @@ jobs:
             tensorrt.tags=${{ steps.setup.outputs.image-name }}-tensorrt
             *.cache-from=type=registry,ref=${{ steps.setup.outputs.cache-name }}-amd64
             *.cache-to=type=registry,ref=${{ steps.setup.outputs.cache-name }}-amd64,mode=max
+  arm64_extra_builds:
+    runs-on: ubuntu-latest
+    name: ARM Extra Build
+    needs:
+      - arm64_build
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+      - name: Set up QEMU and Buildx
+        id: setup
+        uses: ./.github/actions/setup
+        with:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Build and push Rockchip build
+        uses: docker/bake-action@v3
+        with:
+          push: true
+          targets: rk
+          files: docker/rockchip/rk.hcl
+          set: |
+            rk.tags=${{ steps.setup.outputs.image-name }}-rk
+            *.cache-from=type=gha
   combined_extra_builds:
     runs-on: ubuntu-latest
     name: Combined Extra Builds
diff --git a/docker/main/install_deps.sh b/docker/main/install_deps.sh
index 46f2a5357..2d7662053 100755
--- a/docker/main/install_deps.sh
+++ b/docker/main/install_deps.sh
@@ -8,6 +8,7 @@ apt-get -qq install --no-install-recommends -y \
     apt-transport-https \
     gnupg \
     wget \
+    lbzip2 \
     procps vainfo \
     unzip locales tzdata libxml2 xz-utils \
     python3.9 \
@@ -45,7 +46,7 @@ if [[ "${TARGETARCH}" == "amd64" ]]; then
     wget -qO btbn-ffmpeg.tar.xz "https://github.com/NickM-27/FFmpeg-Builds/releases/download/autobuild-2022-07-31-12-37/ffmpeg-n5.1-2-g915ef932a3-linux64-gpl-5.1.tar.xz"
     tar -xf btbn-ffmpeg.tar.xz -C /usr/lib/ffmpeg/5.0 --strip-components 1
     rm -rf btbn-ffmpeg.tar.xz /usr/lib/ffmpeg/5.0/doc /usr/lib/ffmpeg/5.0/bin/ffplay
-    wget -qO btbn-ffmpeg.tar.xz "https://github.com/BtbN/FFmpeg-Builds/releases/download/autobuild-2024-09-30-15-36/ffmpeg-n7.1-linux64-gpl-7.1.tar.xz"
+    wget -qO btbn-ffmpeg.tar.xz "https://github.com/NickM-27/FFmpeg-Builds/releases/download/autobuild-2024-09-19-12-51/ffmpeg-n7.0.2-18-g3e6cec1286-linux64-gpl-7.0.tar.xz"
     tar -xf btbn-ffmpeg.tar.xz -C /usr/lib/ffmpeg/7.0 --strip-components 1
     rm -rf btbn-ffmpeg.tar.xz /usr/lib/ffmpeg/7.0/doc /usr/lib/ffmpeg/7.0/bin/ffplay
 fi
@@ -57,7 +58,7 @@ if [[ "${TARGETARCH}" == "arm64" ]]; then
     wget -qO btbn-ffmpeg.tar.xz "https://github.com/NickM-27/FFmpeg-Builds/releases/download/autobuild-2022-07-31-12-37/ffmpeg-n5.1-2-g915ef932a3-linuxarm64-gpl-5.1.tar.xz"
     tar -xf btbn-ffmpeg.tar.xz -C /usr/lib/ffmpeg/5.0 --strip-components 1
     rm -rf btbn-ffmpeg.tar.xz /usr/lib/ffmpeg/5.0/doc /usr/lib/ffmpeg/5.0/bin/ffplay
-    wget -qO btbn-ffmpeg.tar.xz "https://github.com/BtbN/FFmpeg-Builds/releases/download/autobuild-2024-09-30-15-36/ffmpeg-n7.1-linuxarm64-gpl-7.1.tar.xz"
+    wget -qO btbn-ffmpeg.tar.xz "https://github.com/NickM-27/FFmpeg-Builds/releases/download/autobuild-2024-09-19-12-51/ffmpeg-n7.0.2-18-g3e6cec1286-linuxarm64-gpl-7.0.tar.xz"
     tar -xf btbn-ffmpeg.tar.xz -C /usr/lib/ffmpeg/7.0 --strip-components 1
     rm -rf btbn-ffmpeg.tar.xz /usr/lib/ffmpeg/7.0/doc /usr/lib/ffmpeg/7.0/bin/ffplay
 fi
diff --git a/frigate/api/defs/events_query_parameters.py b/frigate/api/defs/events_query_parameters.py
index c4e40bd4e..f4c98809c 100644
--- a/frigate/api/defs/events_query_parameters.py
+++ b/frigate/api/defs/events_query_parameters.py
@@ -35,7 +35,7 @@ class EventsQueryParams(BaseModel):
 class EventsSearchQueryParams(BaseModel):
     query: Optional[str] = None
     event_id: Optional[str] = None
-    search_type: Optional[str] = "thumbnail,description"
+    search_type: Optional[str] = "thumbnail"
     include_thumbnails: Optional[int] = 1
     limit: Optional[int] = 50
     cameras: Optional[str] = "all"
diff --git a/frigate/object_processing.py b/frigate/object_processing.py
index 6e63562a4..7ba3270f1 100644
--- a/frigate/object_processing.py
+++ b/frigate/object_processing.py
@@ -1,4 +1,3 @@
-import base64
 import datetime
 import json
 import logging
@@ -7,7 +6,6 @@ import queue
 import threading
 from collections import Counter, defaultdict
 from multiprocessing.synchronize import Event as MpEvent
-from statistics import median
 from typing import Callable
 
 import cv2
@@ -18,9 +16,7 @@ from frigate.comms.dispatcher import Dispatcher
 from frigate.comms.events_updater import EventEndSubscriber, EventUpdatePublisher
 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.config import (
-    CameraConfig,
     FrigateConfig,
-    ModelConfig,
     MqttConfig,
     RecordConfig,
     SnapshotsConfig,
@@ -29,466 +25,18 @@ from frigate.config import (
 from frigate.const import CLIPS_DIR, UPDATE_CAMERA_ACTIVITY
 from frigate.events.types import EventStateEnum, EventTypeEnum
 from frigate.ptz.autotrack import PtzAutoTrackerThread
+from frigate.track.tracked_object import TrackedObject
 from frigate.util.image import (
     SharedMemoryFrameManager,
-    area,
-    calculate_region,
     draw_box_with_label,
     draw_timestamp,
+    is_better_thumbnail,
     is_label_printable,
 )
 
 logger = logging.getLogger(__name__)
 
 
-def on_edge(box, frame_shape):
-    if (
-        box[0] == 0
-        or box[1] == 0
-        or box[2] == frame_shape[1] - 1
-        or box[3] == frame_shape[0] - 1
-    ):
-        return True
-
-
-def has_better_attr(current_thumb, new_obj, attr_label) -> bool:
-    max_new_attr = max(
-        [0]
-        + [area(a["box"]) for a in new_obj["attributes"] if a["label"] == attr_label]
-    )
-    max_current_attr = max(
-        [0]
-        + [
-            area(a["box"])
-            for a in current_thumb["attributes"]
-            if a["label"] == attr_label
-        ]
-    )
-
-    # if the thumb has a higher scoring attr
-    return max_new_attr > max_current_attr
-
-
-def is_better_thumbnail(label, current_thumb, new_obj, frame_shape) -> bool:
-    # larger is better
-    # cutoff images are less ideal, but they should also be smaller?
-    # better scores are obviously better too
-
-    # check face on person
-    if label == "person":
-        if has_better_attr(current_thumb, new_obj, "face"):
-            return True
-        # if the current thumb has a face attr, dont update unless it gets better
-        if any([a["label"] == "face" for a in current_thumb["attributes"]]):
-            return False
-
-    # check license_plate on car
-    if label == "car":
-        if has_better_attr(current_thumb, new_obj, "license_plate"):
-            return True
-        # if the current thumb has a license_plate attr, dont update unless it gets better
-        if any([a["label"] == "license_plate" for a in current_thumb["attributes"]]):
-            return False
-
-    # if the new_thumb is on an edge, and the current thumb is not
-    if on_edge(new_obj["box"], frame_shape) and not on_edge(
-        current_thumb["box"], frame_shape
-    ):
-        return False
-
-    # if the score is better by more than 5%
-    if new_obj["score"] > current_thumb["score"] + 0.05:
-        return True
-
-    # if the area is 10% larger
-    if new_obj["area"] > current_thumb["area"] * 1.1:
-        return True
-
-    return False
-
-
-class TrackedObject:
-    def __init__(
-        self,
-        model_config: ModelConfig,
-        camera_config: CameraConfig,
-        frame_cache,
-        obj_data: dict[str, any],
-    ):
-        # set the score history then remove as it is not part of object state
-        self.score_history = obj_data["score_history"]
-        del obj_data["score_history"]
-
-        self.obj_data = obj_data
-        self.colormap = model_config.colormap
-        self.logos = model_config.all_attribute_logos
-        self.camera_config = camera_config
-        self.frame_cache = frame_cache
-        self.zone_presence: dict[str, int] = {}
-        self.zone_loitering: dict[str, int] = {}
-        self.current_zones = []
-        self.entered_zones = []
-        self.attributes = defaultdict(float)
-        self.false_positive = True
-        self.has_clip = False
-        self.has_snapshot = False
-        self.top_score = self.computed_score = 0.0
-        self.thumbnail_data = None
-        self.last_updated = 0
-        self.last_published = 0
-        self.frame = None
-        self.active = True
-        self.pending_loitering = False
-        self.previous = self.to_dict()
-
-    def _is_false_positive(self):
-        # once a true positive, always a true positive
-        if not self.false_positive:
-            return False
-
-        threshold = self.camera_config.objects.filters[self.obj_data["label"]].threshold
-        return self.computed_score < threshold
-
-    def compute_score(self):
-        """get median of scores for object."""
-        return median(self.score_history)
-
-    def update(self, current_frame_time: float, obj_data, has_valid_frame: bool):
-        thumb_update = False
-        significant_change = False
-        autotracker_update = False
-        # if the object is not in the current frame, add a 0.0 to the score history
-        if obj_data["frame_time"] != current_frame_time:
-            self.score_history.append(0.0)
-        else:
-            self.score_history.append(obj_data["score"])
-
-        # only keep the last 10 scores
-        if len(self.score_history) > 10:
-            self.score_history = self.score_history[-10:]
-
-        # calculate if this is a false positive
-        self.computed_score = self.compute_score()
-        if self.computed_score > self.top_score:
-            self.top_score = self.computed_score
-        self.false_positive = self._is_false_positive()
-        self.active = self.is_active()
-
-        if not self.false_positive and has_valid_frame:
-            # determine if this frame is a better thumbnail
-            if self.thumbnail_data is None or is_better_thumbnail(
-                self.obj_data["label"],
-                self.thumbnail_data,
-                obj_data,
-                self.camera_config.frame_shape,
-            ):
-                self.thumbnail_data = {
-                    "frame_time": current_frame_time,
-                    "box": obj_data["box"],
-                    "area": obj_data["area"],
-                    "region": obj_data["region"],
-                    "score": obj_data["score"],
-                    "attributes": obj_data["attributes"],
-                }
-                thumb_update = True
-
-        # check zones
-        current_zones = []
-        bottom_center = (obj_data["centroid"][0], obj_data["box"][3])
-        in_loitering_zone = False
-
-        # check each zone
-        for name, zone in self.camera_config.zones.items():
-            # if the zone is not for this object type, skip
-            if len(zone.objects) > 0 and obj_data["label"] not in zone.objects:
-                continue
-            contour = zone.contour
-            zone_score = self.zone_presence.get(name, 0) + 1
-            # check if the object is in the zone
-            if cv2.pointPolygonTest(contour, bottom_center, False) >= 0:
-                # if the object passed the filters once, dont apply again
-                if name in self.current_zones or not zone_filtered(self, zone.filters):
-                    # an object is only considered present in a zone if it has a zone inertia of 3+
-                    if zone_score >= zone.inertia:
-                        # if the zone has loitering time, update loitering status
-                        if zone.loitering_time > 0:
-                            in_loitering_zone = True
-
-                        loitering_score = self.zone_loitering.get(name, 0) + 1
-
-                        # loitering time is configured as seconds, convert to count of frames
-                        if loitering_score >= (
-                            self.camera_config.zones[name].loitering_time
-                            * self.camera_config.detect.fps
-                        ):
-                            current_zones.append(name)
-
-                            if name not in self.entered_zones:
-                                self.entered_zones.append(name)
-                        else:
-                            self.zone_loitering[name] = loitering_score
-                    else:
-                        self.zone_presence[name] = zone_score
-            else:
-                # once an object has a zone inertia of 3+ it is not checked anymore
-                if 0 < zone_score < zone.inertia:
-                    self.zone_presence[name] = zone_score - 1
-
-        # update loitering status
-        self.pending_loitering = in_loitering_zone
-
-        # maintain attributes
-        for attr in obj_data["attributes"]:
-            if self.attributes[attr["label"]] < attr["score"]:
-                self.attributes[attr["label"]] = attr["score"]
-
-        # populate the sub_label for object with highest scoring logo
-        if self.obj_data["label"] in ["car", "package", "person"]:
-            recognized_logos = {
-                k: self.attributes[k] for k in self.logos if k in self.attributes
-            }
-            if len(recognized_logos) > 0:
-                max_logo = max(recognized_logos, key=recognized_logos.get)
-
-                # don't overwrite sub label if it is already set
-                if (
-                    self.obj_data.get("sub_label") is None
-                    or self.obj_data["sub_label"][0] == max_logo
-                ):
-                    self.obj_data["sub_label"] = (max_logo, recognized_logos[max_logo])
-
-        # check for significant change
-        if not self.false_positive:
-            # if the zones changed, signal an update
-            if set(self.current_zones) != set(current_zones):
-                significant_change = True
-
-            # if the position changed, signal an update
-            if self.obj_data["position_changes"] != obj_data["position_changes"]:
-                significant_change = True
-
-            if self.obj_data["attributes"] != obj_data["attributes"]:
-                significant_change = True
-
-            # if the state changed between stationary and active
-            if self.previous["active"] != self.active:
-                significant_change = True
-
-            # update at least once per minute
-            if self.obj_data["frame_time"] - self.previous["frame_time"] > 60:
-                significant_change = True
-
-            # update autotrack at most 3 objects per second
-            if self.obj_data["frame_time"] - self.previous["frame_time"] >= (1 / 3):
-                autotracker_update = True
-
-        self.obj_data.update(obj_data)
-        self.current_zones = current_zones
-        return (thumb_update, significant_change, autotracker_update)
-
-    def to_dict(self, include_thumbnail: bool = False):
-        event = {
-            "id": self.obj_data["id"],
-            "camera": self.camera_config.name,
-            "frame_time": self.obj_data["frame_time"],
-            "snapshot": self.thumbnail_data,
-            "label": self.obj_data["label"],
-            "sub_label": self.obj_data.get("sub_label"),
-            "top_score": self.top_score,
-            "false_positive": self.false_positive,
-            "start_time": self.obj_data["start_time"],
-            "end_time": self.obj_data.get("end_time", None),
-            "score": self.obj_data["score"],
-            "box": self.obj_data["box"],
-            "area": self.obj_data["area"],
-            "ratio": self.obj_data["ratio"],
-            "region": self.obj_data["region"],
-            "active": self.active,
-            "stationary": not self.active,
-            "motionless_count": self.obj_data["motionless_count"],
-            "position_changes": self.obj_data["position_changes"],
-            "current_zones": self.current_zones.copy(),
-            "entered_zones": self.entered_zones.copy(),
-            "has_clip": self.has_clip,
-            "has_snapshot": self.has_snapshot,
-            "attributes": self.attributes,
-            "current_attributes": self.obj_data["attributes"],
-            "pending_loitering": self.pending_loitering,
-        }
-
-        if include_thumbnail:
-            event["thumbnail"] = base64.b64encode(self.get_thumbnail()).decode("utf-8")
-
-        return event
-
-    def is_active(self):
-        return not self.is_stationary()
-
-    def is_stationary(self):
-        return (
-            self.obj_data["motionless_count"]
-            > self.camera_config.detect.stationary.threshold
-        )
-
-    def get_thumbnail(self):
-        if (
-            self.thumbnail_data is None
-            or self.thumbnail_data["frame_time"] not in self.frame_cache
-        ):
-            ret, jpg = cv2.imencode(".jpg", np.zeros((175, 175, 3), np.uint8))
-
-        jpg_bytes = self.get_jpg_bytes(
-            timestamp=False, bounding_box=False, crop=True, height=175
-        )
-
-        if jpg_bytes:
-            return jpg_bytes
-        else:
-            ret, jpg = cv2.imencode(".jpg", np.zeros((175, 175, 3), np.uint8))
-            return jpg.tobytes()
-
-    def get_clean_png(self):
-        if self.thumbnail_data is None:
-            return None
-
-        try:
-            best_frame = cv2.cvtColor(
-                self.frame_cache[self.thumbnail_data["frame_time"]],
-                cv2.COLOR_YUV2BGR_I420,
-            )
-        except KeyError:
-            logger.warning(
-                f"Unable to create clean png because frame {self.thumbnail_data['frame_time']} is not in the cache"
-            )
-            return None
-
-        ret, png = cv2.imencode(".png", best_frame)
-        if ret:
-            return png.tobytes()
-        else:
-            return None
-
-    def get_jpg_bytes(
-        self, timestamp=False, bounding_box=False, crop=False, height=None, quality=70
-    ):
-        if self.thumbnail_data is None:
-            return None
-
-        try:
-            best_frame = cv2.cvtColor(
-                self.frame_cache[self.thumbnail_data["frame_time"]],
-                cv2.COLOR_YUV2BGR_I420,
-            )
-        except KeyError:
-            logger.warning(
-                f"Unable to create jpg because frame {self.thumbnail_data['frame_time']} is not in the cache"
-            )
-            return None
-
-        if bounding_box:
-            thickness = 2
-            color = self.colormap[self.obj_data["label"]]
-
-            # draw the bounding boxes on the frame
-            box = self.thumbnail_data["box"]
-            draw_box_with_label(
-                best_frame,
-                box[0],
-                box[1],
-                box[2],
-                box[3],
-                self.obj_data["label"],
-                f"{int(self.thumbnail_data['score']*100)}% {int(self.thumbnail_data['area'])}",
-                thickness=thickness,
-                color=color,
-            )
-
-            # draw any attributes
-            for attribute in self.thumbnail_data["attributes"]:
-                box = attribute["box"]
-                draw_box_with_label(
-                    best_frame,
-                    box[0],
-                    box[1],
-                    box[2],
-                    box[3],
-                    attribute["label"],
-                    f"{attribute['score']:.0%}",
-                    thickness=thickness,
-                    color=color,
-                )
-
-        if crop:
-            box = self.thumbnail_data["box"]
-            box_size = 300
-            region = calculate_region(
-                best_frame.shape,
-                box[0],
-                box[1],
-                box[2],
-                box[3],
-                box_size,
-                multiplier=1.1,
-            )
-            best_frame = best_frame[region[1] : region[3], region[0] : region[2]]
-
-        if height:
-            width = int(height * best_frame.shape[1] / best_frame.shape[0])
-            best_frame = cv2.resize(
-                best_frame, dsize=(width, height), interpolation=cv2.INTER_AREA
-            )
-        if timestamp:
-            color = self.camera_config.timestamp_style.color
-            draw_timestamp(
-                best_frame,
-                self.thumbnail_data["frame_time"],
-                self.camera_config.timestamp_style.format,
-                font_effect=self.camera_config.timestamp_style.effect,
-                font_thickness=self.camera_config.timestamp_style.thickness,
-                font_color=(color.blue, color.green, color.red),
-                position=self.camera_config.timestamp_style.position,
-            )
-
-        ret, jpg = cv2.imencode(
-            ".jpg", best_frame, [int(cv2.IMWRITE_JPEG_QUALITY), quality]
-        )
-        if ret:
-            return jpg.tobytes()
-        else:
-            return None
-
-
-def zone_filtered(obj: TrackedObject, object_config):
-    object_name = obj.obj_data["label"]
-
-    if object_name in object_config:
-        obj_settings = object_config[object_name]
-
-        # if the min area is larger than the
-        # detected object, don't add it to detected objects
-        if obj_settings.min_area > obj.obj_data["area"]:
-            return True
-
-        # if the detected object is larger than the
-        # max area, don't add it to detected objects
-        if obj_settings.max_area < obj.obj_data["area"]:
-            return True
-
-        # if the score is lower than the threshold, skip
-        if obj_settings.threshold > obj.computed_score:
-            return True
-
-        # if the object is not proportionally wide enough
-        if obj_settings.min_ratio > obj.obj_data["ratio"]:
-            return True
-
-        # if the object is proportionally too wide
-        if obj_settings.max_ratio < obj.obj_data["ratio"]:
-            return True
-
-    return False
-
-
 # Maintains the state of a camera
 class CameraState:
     def __init__(
diff --git a/frigate/ptz/autotrack.py b/frigate/ptz/autotrack.py
index fd9933bcb..e9226f267 100644
--- a/frigate/ptz/autotrack.py
+++ b/frigate/ptz/autotrack.py
@@ -32,6 +32,7 @@ from frigate.const import (
     CONFIG_DIR,
 )
 from frigate.ptz.onvif import OnvifController
+from frigate.track.tracked_object import TrackedObject
 from frigate.util.builtin import update_yaml_file
 from frigate.util.image import SharedMemoryFrameManager, intersection_over_union
 
@@ -214,7 +215,7 @@ class PtzAutoTracker:
             ):
                 self._autotracker_setup(camera_config, camera)
 
-    def _autotracker_setup(self, camera_config, camera):
+    def _autotracker_setup(self, camera_config: CameraConfig, camera: str):
         logger.debug(f"{camera}: Autotracker init")
 
         self.object_types[camera] = camera_config.onvif.autotracking.track
@@ -852,7 +853,7 @@ class PtzAutoTracker:
             logger.debug(f"{camera}: Valid velocity ")
             return True, velocities.flatten()
 
-    def _get_distance_threshold(self, camera, obj):
+    def _get_distance_threshold(self, camera: str, obj: TrackedObject):
         # Returns true if Euclidean distance from object to center of frame is
         # less than 10% of the of the larger dimension (width or height) of the frame,
         # multiplied by a scaling factor for object size.
@@ -888,7 +889,9 @@ class PtzAutoTracker:
 
         return distance_threshold
 
-    def _should_zoom_in(self, camera, obj, box, predicted_time, debug_zooming=False):
+    def _should_zoom_in(
+        self, camera: str, obj: TrackedObject, box, predicted_time, debug_zooming=False
+    ):
         # returns True if we should zoom in, False if we should zoom out, None to do nothing
         camera_config = self.config.cameras[camera]
         camera_width = camera_config.frame_shape[1]
@@ -1019,7 +1022,7 @@ class PtzAutoTracker:
         # Don't zoom at all
         return None
 
-    def _autotrack_move_ptz(self, camera, obj):
+    def _autotrack_move_ptz(self, camera: str, obj: TrackedObject):
         camera_config = self.config.cameras[camera]
         camera_width = camera_config.frame_shape[1]
         camera_height = camera_config.frame_shape[0]
@@ -1090,7 +1093,12 @@ class PtzAutoTracker:
                 self._enqueue_move(camera, obj.obj_data["frame_time"], 0, 0, zoom)
 
     def _get_zoom_amount(
-        self, camera, obj, predicted_box, predicted_movement_time, debug_zoom=True
+        self,
+        camera: str,
+        obj: TrackedObject,
+        predicted_box,
+        predicted_movement_time,
+        debug_zoom=True,
     ):
         camera_config = self.config.cameras[camera]
 
@@ -1186,13 +1194,13 @@ class PtzAutoTracker:
 
         return zoom
 
-    def is_autotracking(self, camera):
+    def is_autotracking(self, camera: str):
         return self.tracked_object[camera] is not None
 
-    def autotracked_object_region(self, camera):
+    def autotracked_object_region(self, camera: str):
         return self.tracked_object[camera]["region"]
 
-    def autotrack_object(self, camera, obj):
+    def autotrack_object(self, camera: str, obj: TrackedObject):
         camera_config = self.config.cameras[camera]
 
         if camera_config.onvif.autotracking.enabled:
@@ -1208,7 +1216,7 @@ class PtzAutoTracker:
             if (
                 # new object
                 self.tracked_object[camera] is None
-                and obj.camera == camera
+                and obj.camera_config.name == camera
                 and obj.obj_data["label"] in self.object_types[camera]
                 and set(obj.entered_zones) & set(self.required_zones[camera])
                 and not obj.previous["false_positive"]
diff --git a/frigate/test/test_obects.py b/frigate/test/test_obects.py
index f1c039ef8..8fe831980 100644
--- a/frigate/test/test_obects.py
+++ b/frigate/test/test_obects.py
@@ -1,11 +1,11 @@
 import unittest
 
-from frigate.track.object_attribute import ObjectAttribute
+from frigate.track.tracked_object import TrackedObjectAttribute
 
 
 class TestAttribute(unittest.TestCase):
     def test_overlapping_object_selection(self) -> None:
-        attribute = ObjectAttribute(
+        attribute = TrackedObjectAttribute(
             (
                 "amazon",
                 0.80078125,
diff --git a/frigate/track/object_attribute.py b/frigate/track/object_attribute.py
deleted file mode 100644
index 54433c5f3..000000000
--- a/frigate/track/object_attribute.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""Object attribute."""
-
-from frigate.util.object import area, box_inside
-
-
-class ObjectAttribute:
-    def __init__(self, raw_data: tuple) -> None:
-        self.label = raw_data[0]
-        self.score = raw_data[1]
-        self.box = raw_data[2]
-        self.area = raw_data[3]
-        self.ratio = raw_data[4]
-        self.region = raw_data[5]
-
-    def get_tracking_data(self) -> dict[str, any]:
-        """Return data saved to the object."""
-        return {
-            "label": self.label,
-            "score": self.score,
-            "box": self.box,
-        }
-
-    def find_best_object(self, objects: list[dict[str, any]]) -> str:
-        """Find the best attribute for each object and return its ID."""
-        best_object_area = None
-        best_object_id = None
-
-        for obj in objects:
-            if not box_inside(obj["box"], self.box):
-                continue
-
-            object_area = area(obj["box"])
-
-            # if multiple objects have the same attribute then they
-            # are overlapping, it is most likely that the smaller object
-            # is the one with the attribute
-            if best_object_area is None:
-                best_object_area = object_area
-                best_object_id = obj["id"]
-            elif object_area < best_object_area:
-                best_object_area = object_area
-                best_object_id = obj["id"]
-
-        return best_object_id
diff --git a/frigate/track/tracked_object.py b/frigate/track/tracked_object.py
new file mode 100644
index 000000000..a4b4e8426
--- /dev/null
+++ b/frigate/track/tracked_object.py
@@ -0,0 +1,447 @@
+"""Object attribute."""
+
+import base64
+import logging
+from collections import defaultdict
+from statistics import median
+
+import cv2
+import numpy as np
+
+from frigate.config import (
+    CameraConfig,
+    ModelConfig,
+)
+from frigate.util.image import (
+    area,
+    calculate_region,
+    draw_box_with_label,
+    draw_timestamp,
+    is_better_thumbnail,
+)
+from frigate.util.object import box_inside
+
+logger = logging.getLogger(__name__)
+
+
+class TrackedObject:
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        camera_config: CameraConfig,
+        frame_cache,
+        obj_data: dict[str, any],
+    ):
+        # set the score history then remove as it is not part of object state
+        self.score_history = obj_data["score_history"]
+        del obj_data["score_history"]
+
+        self.obj_data = obj_data
+        self.colormap = model_config.colormap
+        self.logos = model_config.all_attribute_logos
+        self.camera_config = camera_config
+        self.frame_cache = frame_cache
+        self.zone_presence: dict[str, int] = {}
+        self.zone_loitering: dict[str, int] = {}
+        self.current_zones = []
+        self.entered_zones = []
+        self.attributes = defaultdict(float)
+        self.false_positive = True
+        self.has_clip = False
+        self.has_snapshot = False
+        self.top_score = self.computed_score = 0.0
+        self.thumbnail_data = None
+        self.last_updated = 0
+        self.last_published = 0
+        self.frame = None
+        self.active = True
+        self.pending_loitering = False
+        self.previous = self.to_dict()
+
+    def _is_false_positive(self):
+        # once a true positive, always a true positive
+        if not self.false_positive:
+            return False
+
+        threshold = self.camera_config.objects.filters[self.obj_data["label"]].threshold
+        return self.computed_score < threshold
+
+    def compute_score(self):
+        """get median of scores for object."""
+        return median(self.score_history)
+
+    def update(self, current_frame_time: float, obj_data, has_valid_frame: bool):
+        thumb_update = False
+        significant_change = False
+        autotracker_update = False
+        # if the object is not in the current frame, add a 0.0 to the score history
+        if obj_data["frame_time"] != current_frame_time:
+            self.score_history.append(0.0)
+        else:
+            self.score_history.append(obj_data["score"])
+
+        # only keep the last 10 scores
+        if len(self.score_history) > 10:
+            self.score_history = self.score_history[-10:]
+
+        # calculate if this is a false positive
+        self.computed_score = self.compute_score()
+        if self.computed_score > self.top_score:
+            self.top_score = self.computed_score
+        self.false_positive = self._is_false_positive()
+        self.active = self.is_active()
+
+        if not self.false_positive and has_valid_frame:
+            # determine if this frame is a better thumbnail
+            if self.thumbnail_data is None or is_better_thumbnail(
+                self.obj_data["label"],
+                self.thumbnail_data,
+                obj_data,
+                self.camera_config.frame_shape,
+            ):
+                self.thumbnail_data = {
+                    "frame_time": current_frame_time,
+                    "box": obj_data["box"],
+                    "area": obj_data["area"],
+                    "region": obj_data["region"],
+                    "score": obj_data["score"],
+                    "attributes": obj_data["attributes"],
+                }
+                thumb_update = True
+
+        # check zones
+        current_zones = []
+        bottom_center = (obj_data["centroid"][0], obj_data["box"][3])
+        in_loitering_zone = False
+
+        # check each zone
+        for name, zone in self.camera_config.zones.items():
+            # if the zone is not for this object type, skip
+            if len(zone.objects) > 0 and obj_data["label"] not in zone.objects:
+                continue
+            contour = zone.contour
+            zone_score = self.zone_presence.get(name, 0) + 1
+            # check if the object is in the zone
+            if cv2.pointPolygonTest(contour, bottom_center, False) >= 0:
+                # if the object passed the filters once, dont apply again
+                if name in self.current_zones or not zone_filtered(self, zone.filters):
+                    # an object is only considered present in a zone if it has a zone inertia of 3+
+                    if zone_score >= zone.inertia:
+                        # if the zone has loitering time, update loitering status
+                        if zone.loitering_time > 0:
+                            in_loitering_zone = True
+
+                        loitering_score = self.zone_loitering.get(name, 0) + 1
+
+                        # loitering time is configured as seconds, convert to count of frames
+                        if loitering_score >= (
+                            self.camera_config.zones[name].loitering_time
+                            * self.camera_config.detect.fps
+                        ):
+                            current_zones.append(name)
+
+                            if name not in self.entered_zones:
+                                self.entered_zones.append(name)
+                        else:
+                            self.zone_loitering[name] = loitering_score
+                    else:
+                        self.zone_presence[name] = zone_score
+            else:
+                # once an object has a zone inertia of 3+ it is not checked anymore
+                if 0 < zone_score < zone.inertia:
+                    self.zone_presence[name] = zone_score - 1
+
+        # update loitering status
+        self.pending_loitering = in_loitering_zone
+
+        # maintain attributes
+        for attr in obj_data["attributes"]:
+            if self.attributes[attr["label"]] < attr["score"]:
+                self.attributes[attr["label"]] = attr["score"]
+
+        # populate the sub_label for object with highest scoring logo
+        if self.obj_data["label"] in ["car", "package", "person"]:
+            recognized_logos = {
+                k: self.attributes[k] for k in self.logos if k in self.attributes
+            }
+            if len(recognized_logos) > 0:
+                max_logo = max(recognized_logos, key=recognized_logos.get)
+
+                # don't overwrite sub label if it is already set
+                if (
+                    self.obj_data.get("sub_label") is None
+                    or self.obj_data["sub_label"][0] == max_logo
+                ):
+                    self.obj_data["sub_label"] = (max_logo, recognized_logos[max_logo])
+
+        # check for significant change
+        if not self.false_positive:
+            # if the zones changed, signal an update
+            if set(self.current_zones) != set(current_zones):
+                significant_change = True
+
+            # if the position changed, signal an update
+            if self.obj_data["position_changes"] != obj_data["position_changes"]:
+                significant_change = True
+
+            if self.obj_data["attributes"] != obj_data["attributes"]:
+                significant_change = True
+
+            # if the state changed between stationary and active
+            if self.previous["active"] != self.active:
+                significant_change = True
+
+            # update at least once per minute
+            if self.obj_data["frame_time"] - self.previous["frame_time"] > 60:
+                significant_change = True
+
+            # update autotrack at most 3 objects per second
+            if self.obj_data["frame_time"] - self.previous["frame_time"] >= (1 / 3):
+                autotracker_update = True
+
+        self.obj_data.update(obj_data)
+        self.current_zones = current_zones
+        return (thumb_update, significant_change, autotracker_update)
+
+    def to_dict(self, include_thumbnail: bool = False):
+        event = {
+            "id": self.obj_data["id"],
+            "camera": self.camera_config.name,
+            "frame_time": self.obj_data["frame_time"],
+            "snapshot": self.thumbnail_data,
+            "label": self.obj_data["label"],
+            "sub_label": self.obj_data.get("sub_label"),
+            "top_score": self.top_score,
+            "false_positive": self.false_positive,
+            "start_time": self.obj_data["start_time"],
+            "end_time": self.obj_data.get("end_time", None),
+            "score": self.obj_data["score"],
+            "box": self.obj_data["box"],
+            "area": self.obj_data["area"],
+            "ratio": self.obj_data["ratio"],
+            "region": self.obj_data["region"],
+            "active": self.active,
+            "stationary": not self.active,
+            "motionless_count": self.obj_data["motionless_count"],
+            "position_changes": self.obj_data["position_changes"],
+            "current_zones": self.current_zones.copy(),
+            "entered_zones": self.entered_zones.copy(),
+            "has_clip": self.has_clip,
+            "has_snapshot": self.has_snapshot,
+            "attributes": self.attributes,
+            "current_attributes": self.obj_data["attributes"],
+            "pending_loitering": self.pending_loitering,
+        }
+
+        if include_thumbnail:
+            event["thumbnail"] = base64.b64encode(self.get_thumbnail()).decode("utf-8")
+
+        return event
+
+    def is_active(self):
+        return not self.is_stationary()
+
+    def is_stationary(self):
+        return (
+            self.obj_data["motionless_count"]
+            > self.camera_config.detect.stationary.threshold
+        )
+
+    def get_thumbnail(self):
+        if (
+            self.thumbnail_data is None
+            or self.thumbnail_data["frame_time"] not in self.frame_cache
+        ):
+            ret, jpg = cv2.imencode(".jpg", np.zeros((175, 175, 3), np.uint8))
+
+        jpg_bytes = self.get_jpg_bytes(
+            timestamp=False, bounding_box=False, crop=True, height=175
+        )
+
+        if jpg_bytes:
+            return jpg_bytes
+        else:
+            ret, jpg = cv2.imencode(".jpg", np.zeros((175, 175, 3), np.uint8))
+            return jpg.tobytes()
+
+    def get_clean_png(self):
+        if self.thumbnail_data is None:
+            return None
+
+        try:
+            best_frame = cv2.cvtColor(
+                self.frame_cache[self.thumbnail_data["frame_time"]],
+                cv2.COLOR_YUV2BGR_I420,
+            )
+        except KeyError:
+            logger.warning(
+                f"Unable to create clean png because frame {self.thumbnail_data['frame_time']} is not in the cache"
+            )
+            return None
+
+        ret, png = cv2.imencode(".png", best_frame)
+        if ret:
+            return png.tobytes()
+        else:
+            return None
+
+    def get_jpg_bytes(
+        self, timestamp=False, bounding_box=False, crop=False, height=None, quality=70
+    ):
+        if self.thumbnail_data is None:
+            return None
+
+        try:
+            best_frame = cv2.cvtColor(
+                self.frame_cache[self.thumbnail_data["frame_time"]],
+                cv2.COLOR_YUV2BGR_I420,
+            )
+        except KeyError:
+            logger.warning(
+                f"Unable to create jpg because frame {self.thumbnail_data['frame_time']} is not in the cache"
+            )
+            return None
+
+        if bounding_box:
+            thickness = 2
+            color = self.colormap[self.obj_data["label"]]
+
+            # draw the bounding boxes on the frame
+            box = self.thumbnail_data["box"]
+            draw_box_with_label(
+                best_frame,
+                box[0],
+                box[1],
+                box[2],
+                box[3],
+                self.obj_data["label"],
+                f"{int(self.thumbnail_data['score']*100)}% {int(self.thumbnail_data['area'])}",
+                thickness=thickness,
+                color=color,
+            )
+
+            # draw any attributes
+            for attribute in self.thumbnail_data["attributes"]:
+                box = attribute["box"]
+                draw_box_with_label(
+                    best_frame,
+                    box[0],
+                    box[1],
+                    box[2],
+                    box[3],
+                    attribute["label"],
+                    f"{attribute['score']:.0%}",
+                    thickness=thickness,
+                    color=color,
+                )
+
+        if crop:
+            box = self.thumbnail_data["box"]
+            box_size = 300
+            region = calculate_region(
+                best_frame.shape,
+                box[0],
+                box[1],
+                box[2],
+                box[3],
+                box_size,
+                multiplier=1.1,
+            )
+            best_frame = best_frame[region[1] : region[3], region[0] : region[2]]
+
+        if height:
+            width = int(height * best_frame.shape[1] / best_frame.shape[0])
+            best_frame = cv2.resize(
+                best_frame, dsize=(width, height), interpolation=cv2.INTER_AREA
+            )
+        if timestamp:
+            color = self.camera_config.timestamp_style.color
+            draw_timestamp(
+                best_frame,
+                self.thumbnail_data["frame_time"],
+                self.camera_config.timestamp_style.format,
+                font_effect=self.camera_config.timestamp_style.effect,
+                font_thickness=self.camera_config.timestamp_style.thickness,
+                font_color=(color.blue, color.green, color.red),
+                position=self.camera_config.timestamp_style.position,
+            )
+
+        ret, jpg = cv2.imencode(
+            ".jpg", best_frame, [int(cv2.IMWRITE_JPEG_QUALITY), quality]
+        )
+        if ret:
+            return jpg.tobytes()
+        else:
+            return None
+
+
+def zone_filtered(obj: TrackedObject, object_config):
+    object_name = obj.obj_data["label"]
+
+    if object_name in object_config:
+        obj_settings = object_config[object_name]
+
+        # if the min area is larger than the
+        # detected object, don't add it to detected objects
+        if obj_settings.min_area > obj.obj_data["area"]:
+            return True
+
+        # if the detected object is larger than the
+        # max area, don't add it to detected objects
+        if obj_settings.max_area < obj.obj_data["area"]:
+            return True
+
+        # if the score is lower than the threshold, skip
+        if obj_settings.threshold > obj.computed_score:
+            return True
+
+        # if the object is not proportionally wide enough
+        if obj_settings.min_ratio > obj.obj_data["ratio"]:
+            return True
+
+        # if the object is proportionally too wide
+        if obj_settings.max_ratio < obj.obj_data["ratio"]:
+            return True
+
+    return False
+
+
+class TrackedObjectAttribute:
+    def __init__(self, raw_data: tuple) -> None:
+        self.label = raw_data[0]
+        self.score = raw_data[1]
+        self.box = raw_data[2]
+        self.area = raw_data[3]
+        self.ratio = raw_data[4]
+        self.region = raw_data[5]
+
+    def get_tracking_data(self) -> dict[str, any]:
+        """Return data saved to the object."""
+        return {
+            "label": self.label,
+            "score": self.score,
+            "box": self.box,
+        }
+
+    def find_best_object(self, objects: list[dict[str, any]]) -> str:
+        """Find the best attribute for each object and return its ID."""
+        best_object_area = None
+        best_object_id = None
+
+        for obj in objects:
+            if not box_inside(obj["box"], self.box):
+                continue
+
+            object_area = area(obj["box"])
+
+            # if multiple objects have the same attribute then they
+            # are overlapping, it is most likely that the smaller object
+            # is the one with the attribute
+            if best_object_area is None:
+                best_object_area = object_area
+                best_object_id = obj["id"]
+            elif object_area < best_object_area:
+                best_object_area = object_area
+                best_object_id = obj["id"]
+
+        return best_object_id
diff --git a/frigate/util/image.py b/frigate/util/image.py
index 41024a599..484737f71 100644
--- a/frigate/util/image.py
+++ b/frigate/util/image.py
@@ -36,6 +36,72 @@ def transliterate_to_latin(text: str) -> str:
     return unidecode(text)
 
 
+def on_edge(box, frame_shape):
+    if (
+        box[0] == 0
+        or box[1] == 0
+        or box[2] == frame_shape[1] - 1
+        or box[3] == frame_shape[0] - 1
+    ):
+        return True
+
+
+def has_better_attr(current_thumb, new_obj, attr_label) -> bool:
+    max_new_attr = max(
+        [0]
+        + [area(a["box"]) for a in new_obj["attributes"] if a["label"] == attr_label]
+    )
+    max_current_attr = max(
+        [0]
+        + [
+            area(a["box"])
+            for a in current_thumb["attributes"]
+            if a["label"] == attr_label
+        ]
+    )
+
+    # if the thumb has a higher scoring attr
+    return max_new_attr > max_current_attr
+
+
+def is_better_thumbnail(label, current_thumb, new_obj, frame_shape) -> bool:
+    # larger is better
+    # cutoff images are less ideal, but they should also be smaller?
+    # better scores are obviously better too
+
+    # check face on person
+    if label == "person":
+        if has_better_attr(current_thumb, new_obj, "face"):
+            return True
+        # if the current thumb has a face attr, dont update unless it gets better
+        if any([a["label"] == "face" for a in current_thumb["attributes"]]):
+            return False
+
+    # check license_plate on car
+    if label == "car":
+        if has_better_attr(current_thumb, new_obj, "license_plate"):
+            return True
+        # if the current thumb has a license_plate attr, dont update unless it gets better
+        if any([a["label"] == "license_plate" for a in current_thumb["attributes"]]):
+            return False
+
+    # if the new_thumb is on an edge, and the current thumb is not
+    if on_edge(new_obj["box"], frame_shape) and not on_edge(
+        current_thumb["box"], frame_shape
+    ):
+        return False
+
+    # if the score is better by more than 5%
+    if new_obj["score"] > current_thumb["score"] + 0.05:
+        return True
+
+    # if the area is 10% larger
+    if new_obj["area"] > current_thumb["area"] * 1.1:
+        return True
+
+    return False
+
+
 def draw_timestamp(
     frame,
     timestamp,
diff --git a/frigate/video.py b/frigate/video.py
index 0f051b6b2..c0341446a 100755
--- a/frigate/video.py
+++ b/frigate/video.py
@@ -27,7 +27,7 @@ from frigate.object_detection import RemoteObjectDetector
 from frigate.ptz.autotrack import ptz_moving_at_frame_time
 from frigate.track import ObjectTracker
 from frigate.track.norfair_tracker import NorfairTracker
-from frigate.track.object_attribute import ObjectAttribute
+from frigate.track.tracked_object import TrackedObjectAttribute
 from frigate.util.builtin import EventsPerSecond, get_tomorrow_at_time
 from frigate.util.image import (
     FrameManager,
@@ -734,10 +734,10 @@ def process_frames(
                 object_tracker.update_frame_times(frame_time)
 
         # group the attribute detections based on what label they apply to
-        attribute_detections: dict[str, list[ObjectAttribute]] = {}
+        attribute_detections: dict[str, list[TrackedObjectAttribute]] = {}
         for label, attribute_labels in model_config.attributes_map.items():
             attribute_detections[label] = [
-                ObjectAttribute(d)
+                TrackedObjectAttribute(d)
                 for d in consolidated_detections
                 if d[0] in attribute_labels
             ]