From 8ac4b001a213d02e1c993fcb2ff3696514676c51 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Thu, 17 Oct 2024 10:02:27 -0600 Subject: [PATCH] Various fixes (#14410) * Fix access * Reorganize tracked object for imports * Separate out rockchip build * Formatting * Use original ffmpeg build * Fix build * Update default search type value --- .github/workflows/ci.yml | 22 + docker/main/install_deps.sh | 5 +- frigate/api/defs/events_query_parameters.py | 2 +- frigate/object_processing.py | 456 +------------------- frigate/ptz/autotrack.py | 26 +- frigate/test/test_obects.py | 4 +- frigate/track/object_attribute.py | 44 -- frigate/track/tracked_object.py | 447 +++++++++++++++++++ frigate/util/image.py | 66 +++ frigate/video.py | 6 +- 10 files changed, 563 insertions(+), 515 deletions(-) delete mode 100644 frigate/track/object_attribute.py create mode 100644 frigate/track/tracked_object.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bbf47a57d..3a5a67041 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -155,6 +155,28 @@ jobs: tensorrt.tags=${{ steps.setup.outputs.image-name }}-tensorrt *.cache-from=type=registry,ref=${{ steps.setup.outputs.cache-name }}-amd64 *.cache-to=type=registry,ref=${{ steps.setup.outputs.cache-name }}-amd64,mode=max + arm64_extra_builds: + runs-on: ubuntu-latest + name: ARM Extra Build + needs: + - arm64_build + steps: + - name: Check out code + uses: actions/checkout@v4 + - name: Set up QEMU and Buildx + id: setup + uses: ./.github/actions/setup + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Build and push Rockchip build + uses: docker/bake-action@v3 + with: + push: true + targets: rk + files: docker/rockchip/rk.hcl + set: | + rk.tags=${{ steps.setup.outputs.image-name }}-rk + *.cache-from=type=gha combined_extra_builds: runs-on: ubuntu-latest name: Combined Extra Builds diff --git a/docker/main/install_deps.sh b/docker/main/install_deps.sh index 46f2a5357..2d7662053 100755 --- a/docker/main/install_deps.sh +++ b/docker/main/install_deps.sh @@ -8,6 +8,7 @@ apt-get -qq install --no-install-recommends -y \ apt-transport-https \ gnupg \ wget \ + lbzip2 \ procps vainfo \ unzip locales tzdata libxml2 xz-utils \ python3.9 \ @@ -45,7 +46,7 @@ if [[ "${TARGETARCH}" == "amd64" ]]; then wget -qO btbn-ffmpeg.tar.xz "https://github.com/NickM-27/FFmpeg-Builds/releases/download/autobuild-2022-07-31-12-37/ffmpeg-n5.1-2-g915ef932a3-linux64-gpl-5.1.tar.xz" tar -xf btbn-ffmpeg.tar.xz -C /usr/lib/ffmpeg/5.0 --strip-components 1 rm -rf btbn-ffmpeg.tar.xz /usr/lib/ffmpeg/5.0/doc /usr/lib/ffmpeg/5.0/bin/ffplay - wget -qO btbn-ffmpeg.tar.xz "https://github.com/BtbN/FFmpeg-Builds/releases/download/autobuild-2024-09-30-15-36/ffmpeg-n7.1-linux64-gpl-7.1.tar.xz" + wget -qO btbn-ffmpeg.tar.xz "https://github.com/NickM-27/FFmpeg-Builds/releases/download/autobuild-2024-09-19-12-51/ffmpeg-n7.0.2-18-g3e6cec1286-linux64-gpl-7.0.tar.xz" tar -xf btbn-ffmpeg.tar.xz -C /usr/lib/ffmpeg/7.0 --strip-components 1 rm -rf btbn-ffmpeg.tar.xz /usr/lib/ffmpeg/7.0/doc /usr/lib/ffmpeg/7.0/bin/ffplay fi @@ -57,7 +58,7 @@ if [[ "${TARGETARCH}" == "arm64" ]]; then wget -qO btbn-ffmpeg.tar.xz "https://github.com/NickM-27/FFmpeg-Builds/releases/download/autobuild-2022-07-31-12-37/ffmpeg-n5.1-2-g915ef932a3-linuxarm64-gpl-5.1.tar.xz" tar -xf btbn-ffmpeg.tar.xz -C /usr/lib/ffmpeg/5.0 --strip-components 1 rm -rf btbn-ffmpeg.tar.xz /usr/lib/ffmpeg/5.0/doc /usr/lib/ffmpeg/5.0/bin/ffplay - wget -qO btbn-ffmpeg.tar.xz "https://github.com/BtbN/FFmpeg-Builds/releases/download/autobuild-2024-09-30-15-36/ffmpeg-n7.1-linuxarm64-gpl-7.1.tar.xz" + wget -qO btbn-ffmpeg.tar.xz "https://github.com/NickM-27/FFmpeg-Builds/releases/download/autobuild-2024-09-19-12-51/ffmpeg-n7.0.2-18-g3e6cec1286-linuxarm64-gpl-7.0.tar.xz" tar -xf btbn-ffmpeg.tar.xz -C /usr/lib/ffmpeg/7.0 --strip-components 1 rm -rf btbn-ffmpeg.tar.xz /usr/lib/ffmpeg/7.0/doc /usr/lib/ffmpeg/7.0/bin/ffplay fi diff --git a/frigate/api/defs/events_query_parameters.py b/frigate/api/defs/events_query_parameters.py index c4e40bd4e..f4c98809c 100644 --- a/frigate/api/defs/events_query_parameters.py +++ b/frigate/api/defs/events_query_parameters.py @@ -35,7 +35,7 @@ class EventsQueryParams(BaseModel): class EventsSearchQueryParams(BaseModel): query: Optional[str] = None event_id: Optional[str] = None - search_type: Optional[str] = "thumbnail,description" + search_type: Optional[str] = "thumbnail" include_thumbnails: Optional[int] = 1 limit: Optional[int] = 50 cameras: Optional[str] = "all" diff --git a/frigate/object_processing.py b/frigate/object_processing.py index 6e63562a4..7ba3270f1 100644 --- a/frigate/object_processing.py +++ b/frigate/object_processing.py @@ -1,4 +1,3 @@ -import base64 import datetime import json import logging @@ -7,7 +6,6 @@ import queue import threading from collections import Counter, defaultdict from multiprocessing.synchronize import Event as MpEvent -from statistics import median from typing import Callable import cv2 @@ -18,9 +16,7 @@ from frigate.comms.dispatcher import Dispatcher from frigate.comms.events_updater import EventEndSubscriber, EventUpdatePublisher from frigate.comms.inter_process import InterProcessRequestor from frigate.config import ( - CameraConfig, FrigateConfig, - ModelConfig, MqttConfig, RecordConfig, SnapshotsConfig, @@ -29,466 +25,18 @@ from frigate.config import ( from frigate.const import CLIPS_DIR, UPDATE_CAMERA_ACTIVITY from frigate.events.types import EventStateEnum, EventTypeEnum from frigate.ptz.autotrack import PtzAutoTrackerThread +from frigate.track.tracked_object import TrackedObject from frigate.util.image import ( SharedMemoryFrameManager, - area, - calculate_region, draw_box_with_label, draw_timestamp, + is_better_thumbnail, is_label_printable, ) logger = logging.getLogger(__name__) -def on_edge(box, frame_shape): - if ( - box[0] == 0 - or box[1] == 0 - or box[2] == frame_shape[1] - 1 - or box[3] == frame_shape[0] - 1 - ): - return True - - -def has_better_attr(current_thumb, new_obj, attr_label) -> bool: - max_new_attr = max( - [0] - + [area(a["box"]) for a in new_obj["attributes"] if a["label"] == attr_label] - ) - max_current_attr = max( - [0] - + [ - area(a["box"]) - for a in current_thumb["attributes"] - if a["label"] == attr_label - ] - ) - - # if the thumb has a higher scoring attr - return max_new_attr > max_current_attr - - -def is_better_thumbnail(label, current_thumb, new_obj, frame_shape) -> bool: - # larger is better - # cutoff images are less ideal, but they should also be smaller? - # better scores are obviously better too - - # check face on person - if label == "person": - if has_better_attr(current_thumb, new_obj, "face"): - return True - # if the current thumb has a face attr, dont update unless it gets better - if any([a["label"] == "face" for a in current_thumb["attributes"]]): - return False - - # check license_plate on car - if label == "car": - if has_better_attr(current_thumb, new_obj, "license_plate"): - return True - # if the current thumb has a license_plate attr, dont update unless it gets better - if any([a["label"] == "license_plate" for a in current_thumb["attributes"]]): - return False - - # if the new_thumb is on an edge, and the current thumb is not - if on_edge(new_obj["box"], frame_shape) and not on_edge( - current_thumb["box"], frame_shape - ): - return False - - # if the score is better by more than 5% - if new_obj["score"] > current_thumb["score"] + 0.05: - return True - - # if the area is 10% larger - if new_obj["area"] > current_thumb["area"] * 1.1: - return True - - return False - - -class TrackedObject: - def __init__( - self, - model_config: ModelConfig, - camera_config: CameraConfig, - frame_cache, - obj_data: dict[str, any], - ): - # set the score history then remove as it is not part of object state - self.score_history = obj_data["score_history"] - del obj_data["score_history"] - - self.obj_data = obj_data - self.colormap = model_config.colormap - self.logos = model_config.all_attribute_logos - self.camera_config = camera_config - self.frame_cache = frame_cache - self.zone_presence: dict[str, int] = {} - self.zone_loitering: dict[str, int] = {} - self.current_zones = [] - self.entered_zones = [] - self.attributes = defaultdict(float) - self.false_positive = True - self.has_clip = False - self.has_snapshot = False - self.top_score = self.computed_score = 0.0 - self.thumbnail_data = None - self.last_updated = 0 - self.last_published = 0 - self.frame = None - self.active = True - self.pending_loitering = False - self.previous = self.to_dict() - - def _is_false_positive(self): - # once a true positive, always a true positive - if not self.false_positive: - return False - - threshold = self.camera_config.objects.filters[self.obj_data["label"]].threshold - return self.computed_score < threshold - - def compute_score(self): - """get median of scores for object.""" - return median(self.score_history) - - def update(self, current_frame_time: float, obj_data, has_valid_frame: bool): - thumb_update = False - significant_change = False - autotracker_update = False - # if the object is not in the current frame, add a 0.0 to the score history - if obj_data["frame_time"] != current_frame_time: - self.score_history.append(0.0) - else: - self.score_history.append(obj_data["score"]) - - # only keep the last 10 scores - if len(self.score_history) > 10: - self.score_history = self.score_history[-10:] - - # calculate if this is a false positive - self.computed_score = self.compute_score() - if self.computed_score > self.top_score: - self.top_score = self.computed_score - self.false_positive = self._is_false_positive() - self.active = self.is_active() - - if not self.false_positive and has_valid_frame: - # determine if this frame is a better thumbnail - if self.thumbnail_data is None or is_better_thumbnail( - self.obj_data["label"], - self.thumbnail_data, - obj_data, - self.camera_config.frame_shape, - ): - self.thumbnail_data = { - "frame_time": current_frame_time, - "box": obj_data["box"], - "area": obj_data["area"], - "region": obj_data["region"], - "score": obj_data["score"], - "attributes": obj_data["attributes"], - } - thumb_update = True - - # check zones - current_zones = [] - bottom_center = (obj_data["centroid"][0], obj_data["box"][3]) - in_loitering_zone = False - - # check each zone - for name, zone in self.camera_config.zones.items(): - # if the zone is not for this object type, skip - if len(zone.objects) > 0 and obj_data["label"] not in zone.objects: - continue - contour = zone.contour - zone_score = self.zone_presence.get(name, 0) + 1 - # check if the object is in the zone - if cv2.pointPolygonTest(contour, bottom_center, False) >= 0: - # if the object passed the filters once, dont apply again - if name in self.current_zones or not zone_filtered(self, zone.filters): - # an object is only considered present in a zone if it has a zone inertia of 3+ - if zone_score >= zone.inertia: - # if the zone has loitering time, update loitering status - if zone.loitering_time > 0: - in_loitering_zone = True - - loitering_score = self.zone_loitering.get(name, 0) + 1 - - # loitering time is configured as seconds, convert to count of frames - if loitering_score >= ( - self.camera_config.zones[name].loitering_time - * self.camera_config.detect.fps - ): - current_zones.append(name) - - if name not in self.entered_zones: - self.entered_zones.append(name) - else: - self.zone_loitering[name] = loitering_score - else: - self.zone_presence[name] = zone_score - else: - # once an object has a zone inertia of 3+ it is not checked anymore - if 0 < zone_score < zone.inertia: - self.zone_presence[name] = zone_score - 1 - - # update loitering status - self.pending_loitering = in_loitering_zone - - # maintain attributes - for attr in obj_data["attributes"]: - if self.attributes[attr["label"]] < attr["score"]: - self.attributes[attr["label"]] = attr["score"] - - # populate the sub_label for object with highest scoring logo - if self.obj_data["label"] in ["car", "package", "person"]: - recognized_logos = { - k: self.attributes[k] for k in self.logos if k in self.attributes - } - if len(recognized_logos) > 0: - max_logo = max(recognized_logos, key=recognized_logos.get) - - # don't overwrite sub label if it is already set - if ( - self.obj_data.get("sub_label") is None - or self.obj_data["sub_label"][0] == max_logo - ): - self.obj_data["sub_label"] = (max_logo, recognized_logos[max_logo]) - - # check for significant change - if not self.false_positive: - # if the zones changed, signal an update - if set(self.current_zones) != set(current_zones): - significant_change = True - - # if the position changed, signal an update - if self.obj_data["position_changes"] != obj_data["position_changes"]: - significant_change = True - - if self.obj_data["attributes"] != obj_data["attributes"]: - significant_change = True - - # if the state changed between stationary and active - if self.previous["active"] != self.active: - significant_change = True - - # update at least once per minute - if self.obj_data["frame_time"] - self.previous["frame_time"] > 60: - significant_change = True - - # update autotrack at most 3 objects per second - if self.obj_data["frame_time"] - self.previous["frame_time"] >= (1 / 3): - autotracker_update = True - - self.obj_data.update(obj_data) - self.current_zones = current_zones - return (thumb_update, significant_change, autotracker_update) - - def to_dict(self, include_thumbnail: bool = False): - event = { - "id": self.obj_data["id"], - "camera": self.camera_config.name, - "frame_time": self.obj_data["frame_time"], - "snapshot": self.thumbnail_data, - "label": self.obj_data["label"], - "sub_label": self.obj_data.get("sub_label"), - "top_score": self.top_score, - "false_positive": self.false_positive, - "start_time": self.obj_data["start_time"], - "end_time": self.obj_data.get("end_time", None), - "score": self.obj_data["score"], - "box": self.obj_data["box"], - "area": self.obj_data["area"], - "ratio": self.obj_data["ratio"], - "region": self.obj_data["region"], - "active": self.active, - "stationary": not self.active, - "motionless_count": self.obj_data["motionless_count"], - "position_changes": self.obj_data["position_changes"], - "current_zones": self.current_zones.copy(), - "entered_zones": self.entered_zones.copy(), - "has_clip": self.has_clip, - "has_snapshot": self.has_snapshot, - "attributes": self.attributes, - "current_attributes": self.obj_data["attributes"], - "pending_loitering": self.pending_loitering, - } - - if include_thumbnail: - event["thumbnail"] = base64.b64encode(self.get_thumbnail()).decode("utf-8") - - return event - - def is_active(self): - return not self.is_stationary() - - def is_stationary(self): - return ( - self.obj_data["motionless_count"] - > self.camera_config.detect.stationary.threshold - ) - - def get_thumbnail(self): - if ( - self.thumbnail_data is None - or self.thumbnail_data["frame_time"] not in self.frame_cache - ): - ret, jpg = cv2.imencode(".jpg", np.zeros((175, 175, 3), np.uint8)) - - jpg_bytes = self.get_jpg_bytes( - timestamp=False, bounding_box=False, crop=True, height=175 - ) - - if jpg_bytes: - return jpg_bytes - else: - ret, jpg = cv2.imencode(".jpg", np.zeros((175, 175, 3), np.uint8)) - return jpg.tobytes() - - def get_clean_png(self): - if self.thumbnail_data is None: - return None - - try: - best_frame = cv2.cvtColor( - self.frame_cache[self.thumbnail_data["frame_time"]], - cv2.COLOR_YUV2BGR_I420, - ) - except KeyError: - logger.warning( - f"Unable to create clean png because frame {self.thumbnail_data['frame_time']} is not in the cache" - ) - return None - - ret, png = cv2.imencode(".png", best_frame) - if ret: - return png.tobytes() - else: - return None - - def get_jpg_bytes( - self, timestamp=False, bounding_box=False, crop=False, height=None, quality=70 - ): - if self.thumbnail_data is None: - return None - - try: - best_frame = cv2.cvtColor( - self.frame_cache[self.thumbnail_data["frame_time"]], - cv2.COLOR_YUV2BGR_I420, - ) - except KeyError: - logger.warning( - f"Unable to create jpg because frame {self.thumbnail_data['frame_time']} is not in the cache" - ) - return None - - if bounding_box: - thickness = 2 - color = self.colormap[self.obj_data["label"]] - - # draw the bounding boxes on the frame - box = self.thumbnail_data["box"] - draw_box_with_label( - best_frame, - box[0], - box[1], - box[2], - box[3], - self.obj_data["label"], - f"{int(self.thumbnail_data['score']*100)}% {int(self.thumbnail_data['area'])}", - thickness=thickness, - color=color, - ) - - # draw any attributes - for attribute in self.thumbnail_data["attributes"]: - box = attribute["box"] - draw_box_with_label( - best_frame, - box[0], - box[1], - box[2], - box[3], - attribute["label"], - f"{attribute['score']:.0%}", - thickness=thickness, - color=color, - ) - - if crop: - box = self.thumbnail_data["box"] - box_size = 300 - region = calculate_region( - best_frame.shape, - box[0], - box[1], - box[2], - box[3], - box_size, - multiplier=1.1, - ) - best_frame = best_frame[region[1] : region[3], region[0] : region[2]] - - if height: - width = int(height * best_frame.shape[1] / best_frame.shape[0]) - best_frame = cv2.resize( - best_frame, dsize=(width, height), interpolation=cv2.INTER_AREA - ) - if timestamp: - color = self.camera_config.timestamp_style.color - draw_timestamp( - best_frame, - self.thumbnail_data["frame_time"], - self.camera_config.timestamp_style.format, - font_effect=self.camera_config.timestamp_style.effect, - font_thickness=self.camera_config.timestamp_style.thickness, - font_color=(color.blue, color.green, color.red), - position=self.camera_config.timestamp_style.position, - ) - - ret, jpg = cv2.imencode( - ".jpg", best_frame, [int(cv2.IMWRITE_JPEG_QUALITY), quality] - ) - if ret: - return jpg.tobytes() - else: - return None - - -def zone_filtered(obj: TrackedObject, object_config): - object_name = obj.obj_data["label"] - - if object_name in object_config: - obj_settings = object_config[object_name] - - # if the min area is larger than the - # detected object, don't add it to detected objects - if obj_settings.min_area > obj.obj_data["area"]: - return True - - # if the detected object is larger than the - # max area, don't add it to detected objects - if obj_settings.max_area < obj.obj_data["area"]: - return True - - # if the score is lower than the threshold, skip - if obj_settings.threshold > obj.computed_score: - return True - - # if the object is not proportionally wide enough - if obj_settings.min_ratio > obj.obj_data["ratio"]: - return True - - # if the object is proportionally too wide - if obj_settings.max_ratio < obj.obj_data["ratio"]: - return True - - return False - - # Maintains the state of a camera class CameraState: def __init__( diff --git a/frigate/ptz/autotrack.py b/frigate/ptz/autotrack.py index fd9933bcb..e9226f267 100644 --- a/frigate/ptz/autotrack.py +++ b/frigate/ptz/autotrack.py @@ -32,6 +32,7 @@ from frigate.const import ( CONFIG_DIR, ) from frigate.ptz.onvif import OnvifController +from frigate.track.tracked_object import TrackedObject from frigate.util.builtin import update_yaml_file from frigate.util.image import SharedMemoryFrameManager, intersection_over_union @@ -214,7 +215,7 @@ class PtzAutoTracker: ): self._autotracker_setup(camera_config, camera) - def _autotracker_setup(self, camera_config, camera): + def _autotracker_setup(self, camera_config: CameraConfig, camera: str): logger.debug(f"{camera}: Autotracker init") self.object_types[camera] = camera_config.onvif.autotracking.track @@ -852,7 +853,7 @@ class PtzAutoTracker: logger.debug(f"{camera}: Valid velocity ") return True, velocities.flatten() - def _get_distance_threshold(self, camera, obj): + def _get_distance_threshold(self, camera: str, obj: TrackedObject): # Returns true if Euclidean distance from object to center of frame is # less than 10% of the of the larger dimension (width or height) of the frame, # multiplied by a scaling factor for object size. @@ -888,7 +889,9 @@ class PtzAutoTracker: return distance_threshold - def _should_zoom_in(self, camera, obj, box, predicted_time, debug_zooming=False): + def _should_zoom_in( + self, camera: str, obj: TrackedObject, box, predicted_time, debug_zooming=False + ): # returns True if we should zoom in, False if we should zoom out, None to do nothing camera_config = self.config.cameras[camera] camera_width = camera_config.frame_shape[1] @@ -1019,7 +1022,7 @@ class PtzAutoTracker: # Don't zoom at all return None - def _autotrack_move_ptz(self, camera, obj): + def _autotrack_move_ptz(self, camera: str, obj: TrackedObject): camera_config = self.config.cameras[camera] camera_width = camera_config.frame_shape[1] camera_height = camera_config.frame_shape[0] @@ -1090,7 +1093,12 @@ class PtzAutoTracker: self._enqueue_move(camera, obj.obj_data["frame_time"], 0, 0, zoom) def _get_zoom_amount( - self, camera, obj, predicted_box, predicted_movement_time, debug_zoom=True + self, + camera: str, + obj: TrackedObject, + predicted_box, + predicted_movement_time, + debug_zoom=True, ): camera_config = self.config.cameras[camera] @@ -1186,13 +1194,13 @@ class PtzAutoTracker: return zoom - def is_autotracking(self, camera): + def is_autotracking(self, camera: str): return self.tracked_object[camera] is not None - def autotracked_object_region(self, camera): + def autotracked_object_region(self, camera: str): return self.tracked_object[camera]["region"] - def autotrack_object(self, camera, obj): + def autotrack_object(self, camera: str, obj: TrackedObject): camera_config = self.config.cameras[camera] if camera_config.onvif.autotracking.enabled: @@ -1208,7 +1216,7 @@ class PtzAutoTracker: if ( # new object self.tracked_object[camera] is None - and obj.camera == camera + and obj.camera_config.name == camera and obj.obj_data["label"] in self.object_types[camera] and set(obj.entered_zones) & set(self.required_zones[camera]) and not obj.previous["false_positive"] diff --git a/frigate/test/test_obects.py b/frigate/test/test_obects.py index f1c039ef8..8fe831980 100644 --- a/frigate/test/test_obects.py +++ b/frigate/test/test_obects.py @@ -1,11 +1,11 @@ import unittest -from frigate.track.object_attribute import ObjectAttribute +from frigate.track.tracked_object import TrackedObjectAttribute class TestAttribute(unittest.TestCase): def test_overlapping_object_selection(self) -> None: - attribute = ObjectAttribute( + attribute = TrackedObjectAttribute( ( "amazon", 0.80078125, diff --git a/frigate/track/object_attribute.py b/frigate/track/object_attribute.py deleted file mode 100644 index 54433c5f3..000000000 --- a/frigate/track/object_attribute.py +++ /dev/null @@ -1,44 +0,0 @@ -"""Object attribute.""" - -from frigate.util.object import area, box_inside - - -class ObjectAttribute: - def __init__(self, raw_data: tuple) -> None: - self.label = raw_data[0] - self.score = raw_data[1] - self.box = raw_data[2] - self.area = raw_data[3] - self.ratio = raw_data[4] - self.region = raw_data[5] - - def get_tracking_data(self) -> dict[str, any]: - """Return data saved to the object.""" - return { - "label": self.label, - "score": self.score, - "box": self.box, - } - - def find_best_object(self, objects: list[dict[str, any]]) -> str: - """Find the best attribute for each object and return its ID.""" - best_object_area = None - best_object_id = None - - for obj in objects: - if not box_inside(obj["box"], self.box): - continue - - object_area = area(obj["box"]) - - # if multiple objects have the same attribute then they - # are overlapping, it is most likely that the smaller object - # is the one with the attribute - if best_object_area is None: - best_object_area = object_area - best_object_id = obj["id"] - elif object_area < best_object_area: - best_object_area = object_area - best_object_id = obj["id"] - - return best_object_id diff --git a/frigate/track/tracked_object.py b/frigate/track/tracked_object.py new file mode 100644 index 000000000..a4b4e8426 --- /dev/null +++ b/frigate/track/tracked_object.py @@ -0,0 +1,447 @@ +"""Object attribute.""" + +import base64 +import logging +from collections import defaultdict +from statistics import median + +import cv2 +import numpy as np + +from frigate.config import ( + CameraConfig, + ModelConfig, +) +from frigate.util.image import ( + area, + calculate_region, + draw_box_with_label, + draw_timestamp, + is_better_thumbnail, +) +from frigate.util.object import box_inside + +logger = logging.getLogger(__name__) + + +class TrackedObject: + def __init__( + self, + model_config: ModelConfig, + camera_config: CameraConfig, + frame_cache, + obj_data: dict[str, any], + ): + # set the score history then remove as it is not part of object state + self.score_history = obj_data["score_history"] + del obj_data["score_history"] + + self.obj_data = obj_data + self.colormap = model_config.colormap + self.logos = model_config.all_attribute_logos + self.camera_config = camera_config + self.frame_cache = frame_cache + self.zone_presence: dict[str, int] = {} + self.zone_loitering: dict[str, int] = {} + self.current_zones = [] + self.entered_zones = [] + self.attributes = defaultdict(float) + self.false_positive = True + self.has_clip = False + self.has_snapshot = False + self.top_score = self.computed_score = 0.0 + self.thumbnail_data = None + self.last_updated = 0 + self.last_published = 0 + self.frame = None + self.active = True + self.pending_loitering = False + self.previous = self.to_dict() + + def _is_false_positive(self): + # once a true positive, always a true positive + if not self.false_positive: + return False + + threshold = self.camera_config.objects.filters[self.obj_data["label"]].threshold + return self.computed_score < threshold + + def compute_score(self): + """get median of scores for object.""" + return median(self.score_history) + + def update(self, current_frame_time: float, obj_data, has_valid_frame: bool): + thumb_update = False + significant_change = False + autotracker_update = False + # if the object is not in the current frame, add a 0.0 to the score history + if obj_data["frame_time"] != current_frame_time: + self.score_history.append(0.0) + else: + self.score_history.append(obj_data["score"]) + + # only keep the last 10 scores + if len(self.score_history) > 10: + self.score_history = self.score_history[-10:] + + # calculate if this is a false positive + self.computed_score = self.compute_score() + if self.computed_score > self.top_score: + self.top_score = self.computed_score + self.false_positive = self._is_false_positive() + self.active = self.is_active() + + if not self.false_positive and has_valid_frame: + # determine if this frame is a better thumbnail + if self.thumbnail_data is None or is_better_thumbnail( + self.obj_data["label"], + self.thumbnail_data, + obj_data, + self.camera_config.frame_shape, + ): + self.thumbnail_data = { + "frame_time": current_frame_time, + "box": obj_data["box"], + "area": obj_data["area"], + "region": obj_data["region"], + "score": obj_data["score"], + "attributes": obj_data["attributes"], + } + thumb_update = True + + # check zones + current_zones = [] + bottom_center = (obj_data["centroid"][0], obj_data["box"][3]) + in_loitering_zone = False + + # check each zone + for name, zone in self.camera_config.zones.items(): + # if the zone is not for this object type, skip + if len(zone.objects) > 0 and obj_data["label"] not in zone.objects: + continue + contour = zone.contour + zone_score = self.zone_presence.get(name, 0) + 1 + # check if the object is in the zone + if cv2.pointPolygonTest(contour, bottom_center, False) >= 0: + # if the object passed the filters once, dont apply again + if name in self.current_zones or not zone_filtered(self, zone.filters): + # an object is only considered present in a zone if it has a zone inertia of 3+ + if zone_score >= zone.inertia: + # if the zone has loitering time, update loitering status + if zone.loitering_time > 0: + in_loitering_zone = True + + loitering_score = self.zone_loitering.get(name, 0) + 1 + + # loitering time is configured as seconds, convert to count of frames + if loitering_score >= ( + self.camera_config.zones[name].loitering_time + * self.camera_config.detect.fps + ): + current_zones.append(name) + + if name not in self.entered_zones: + self.entered_zones.append(name) + else: + self.zone_loitering[name] = loitering_score + else: + self.zone_presence[name] = zone_score + else: + # once an object has a zone inertia of 3+ it is not checked anymore + if 0 < zone_score < zone.inertia: + self.zone_presence[name] = zone_score - 1 + + # update loitering status + self.pending_loitering = in_loitering_zone + + # maintain attributes + for attr in obj_data["attributes"]: + if self.attributes[attr["label"]] < attr["score"]: + self.attributes[attr["label"]] = attr["score"] + + # populate the sub_label for object with highest scoring logo + if self.obj_data["label"] in ["car", "package", "person"]: + recognized_logos = { + k: self.attributes[k] for k in self.logos if k in self.attributes + } + if len(recognized_logos) > 0: + max_logo = max(recognized_logos, key=recognized_logos.get) + + # don't overwrite sub label if it is already set + if ( + self.obj_data.get("sub_label") is None + or self.obj_data["sub_label"][0] == max_logo + ): + self.obj_data["sub_label"] = (max_logo, recognized_logos[max_logo]) + + # check for significant change + if not self.false_positive: + # if the zones changed, signal an update + if set(self.current_zones) != set(current_zones): + significant_change = True + + # if the position changed, signal an update + if self.obj_data["position_changes"] != obj_data["position_changes"]: + significant_change = True + + if self.obj_data["attributes"] != obj_data["attributes"]: + significant_change = True + + # if the state changed between stationary and active + if self.previous["active"] != self.active: + significant_change = True + + # update at least once per minute + if self.obj_data["frame_time"] - self.previous["frame_time"] > 60: + significant_change = True + + # update autotrack at most 3 objects per second + if self.obj_data["frame_time"] - self.previous["frame_time"] >= (1 / 3): + autotracker_update = True + + self.obj_data.update(obj_data) + self.current_zones = current_zones + return (thumb_update, significant_change, autotracker_update) + + def to_dict(self, include_thumbnail: bool = False): + event = { + "id": self.obj_data["id"], + "camera": self.camera_config.name, + "frame_time": self.obj_data["frame_time"], + "snapshot": self.thumbnail_data, + "label": self.obj_data["label"], + "sub_label": self.obj_data.get("sub_label"), + "top_score": self.top_score, + "false_positive": self.false_positive, + "start_time": self.obj_data["start_time"], + "end_time": self.obj_data.get("end_time", None), + "score": self.obj_data["score"], + "box": self.obj_data["box"], + "area": self.obj_data["area"], + "ratio": self.obj_data["ratio"], + "region": self.obj_data["region"], + "active": self.active, + "stationary": not self.active, + "motionless_count": self.obj_data["motionless_count"], + "position_changes": self.obj_data["position_changes"], + "current_zones": self.current_zones.copy(), + "entered_zones": self.entered_zones.copy(), + "has_clip": self.has_clip, + "has_snapshot": self.has_snapshot, + "attributes": self.attributes, + "current_attributes": self.obj_data["attributes"], + "pending_loitering": self.pending_loitering, + } + + if include_thumbnail: + event["thumbnail"] = base64.b64encode(self.get_thumbnail()).decode("utf-8") + + return event + + def is_active(self): + return not self.is_stationary() + + def is_stationary(self): + return ( + self.obj_data["motionless_count"] + > self.camera_config.detect.stationary.threshold + ) + + def get_thumbnail(self): + if ( + self.thumbnail_data is None + or self.thumbnail_data["frame_time"] not in self.frame_cache + ): + ret, jpg = cv2.imencode(".jpg", np.zeros((175, 175, 3), np.uint8)) + + jpg_bytes = self.get_jpg_bytes( + timestamp=False, bounding_box=False, crop=True, height=175 + ) + + if jpg_bytes: + return jpg_bytes + else: + ret, jpg = cv2.imencode(".jpg", np.zeros((175, 175, 3), np.uint8)) + return jpg.tobytes() + + def get_clean_png(self): + if self.thumbnail_data is None: + return None + + try: + best_frame = cv2.cvtColor( + self.frame_cache[self.thumbnail_data["frame_time"]], + cv2.COLOR_YUV2BGR_I420, + ) + except KeyError: + logger.warning( + f"Unable to create clean png because frame {self.thumbnail_data['frame_time']} is not in the cache" + ) + return None + + ret, png = cv2.imencode(".png", best_frame) + if ret: + return png.tobytes() + else: + return None + + def get_jpg_bytes( + self, timestamp=False, bounding_box=False, crop=False, height=None, quality=70 + ): + if self.thumbnail_data is None: + return None + + try: + best_frame = cv2.cvtColor( + self.frame_cache[self.thumbnail_data["frame_time"]], + cv2.COLOR_YUV2BGR_I420, + ) + except KeyError: + logger.warning( + f"Unable to create jpg because frame {self.thumbnail_data['frame_time']} is not in the cache" + ) + return None + + if bounding_box: + thickness = 2 + color = self.colormap[self.obj_data["label"]] + + # draw the bounding boxes on the frame + box = self.thumbnail_data["box"] + draw_box_with_label( + best_frame, + box[0], + box[1], + box[2], + box[3], + self.obj_data["label"], + f"{int(self.thumbnail_data['score']*100)}% {int(self.thumbnail_data['area'])}", + thickness=thickness, + color=color, + ) + + # draw any attributes + for attribute in self.thumbnail_data["attributes"]: + box = attribute["box"] + draw_box_with_label( + best_frame, + box[0], + box[1], + box[2], + box[3], + attribute["label"], + f"{attribute['score']:.0%}", + thickness=thickness, + color=color, + ) + + if crop: + box = self.thumbnail_data["box"] + box_size = 300 + region = calculate_region( + best_frame.shape, + box[0], + box[1], + box[2], + box[3], + box_size, + multiplier=1.1, + ) + best_frame = best_frame[region[1] : region[3], region[0] : region[2]] + + if height: + width = int(height * best_frame.shape[1] / best_frame.shape[0]) + best_frame = cv2.resize( + best_frame, dsize=(width, height), interpolation=cv2.INTER_AREA + ) + if timestamp: + color = self.camera_config.timestamp_style.color + draw_timestamp( + best_frame, + self.thumbnail_data["frame_time"], + self.camera_config.timestamp_style.format, + font_effect=self.camera_config.timestamp_style.effect, + font_thickness=self.camera_config.timestamp_style.thickness, + font_color=(color.blue, color.green, color.red), + position=self.camera_config.timestamp_style.position, + ) + + ret, jpg = cv2.imencode( + ".jpg", best_frame, [int(cv2.IMWRITE_JPEG_QUALITY), quality] + ) + if ret: + return jpg.tobytes() + else: + return None + + +def zone_filtered(obj: TrackedObject, object_config): + object_name = obj.obj_data["label"] + + if object_name in object_config: + obj_settings = object_config[object_name] + + # if the min area is larger than the + # detected object, don't add it to detected objects + if obj_settings.min_area > obj.obj_data["area"]: + return True + + # if the detected object is larger than the + # max area, don't add it to detected objects + if obj_settings.max_area < obj.obj_data["area"]: + return True + + # if the score is lower than the threshold, skip + if obj_settings.threshold > obj.computed_score: + return True + + # if the object is not proportionally wide enough + if obj_settings.min_ratio > obj.obj_data["ratio"]: + return True + + # if the object is proportionally too wide + if obj_settings.max_ratio < obj.obj_data["ratio"]: + return True + + return False + + +class TrackedObjectAttribute: + def __init__(self, raw_data: tuple) -> None: + self.label = raw_data[0] + self.score = raw_data[1] + self.box = raw_data[2] + self.area = raw_data[3] + self.ratio = raw_data[4] + self.region = raw_data[5] + + def get_tracking_data(self) -> dict[str, any]: + """Return data saved to the object.""" + return { + "label": self.label, + "score": self.score, + "box": self.box, + } + + def find_best_object(self, objects: list[dict[str, any]]) -> str: + """Find the best attribute for each object and return its ID.""" + best_object_area = None + best_object_id = None + + for obj in objects: + if not box_inside(obj["box"], self.box): + continue + + object_area = area(obj["box"]) + + # if multiple objects have the same attribute then they + # are overlapping, it is most likely that the smaller object + # is the one with the attribute + if best_object_area is None: + best_object_area = object_area + best_object_id = obj["id"] + elif object_area < best_object_area: + best_object_area = object_area + best_object_id = obj["id"] + + return best_object_id diff --git a/frigate/util/image.py b/frigate/util/image.py index 41024a599..484737f71 100644 --- a/frigate/util/image.py +++ b/frigate/util/image.py @@ -36,6 +36,72 @@ def transliterate_to_latin(text: str) -> str: return unidecode(text) +def on_edge(box, frame_shape): + if ( + box[0] == 0 + or box[1] == 0 + or box[2] == frame_shape[1] - 1 + or box[3] == frame_shape[0] - 1 + ): + return True + + +def has_better_attr(current_thumb, new_obj, attr_label) -> bool: + max_new_attr = max( + [0] + + [area(a["box"]) for a in new_obj["attributes"] if a["label"] == attr_label] + ) + max_current_attr = max( + [0] + + [ + area(a["box"]) + for a in current_thumb["attributes"] + if a["label"] == attr_label + ] + ) + + # if the thumb has a higher scoring attr + return max_new_attr > max_current_attr + + +def is_better_thumbnail(label, current_thumb, new_obj, frame_shape) -> bool: + # larger is better + # cutoff images are less ideal, but they should also be smaller? + # better scores are obviously better too + + # check face on person + if label == "person": + if has_better_attr(current_thumb, new_obj, "face"): + return True + # if the current thumb has a face attr, dont update unless it gets better + if any([a["label"] == "face" for a in current_thumb["attributes"]]): + return False + + # check license_plate on car + if label == "car": + if has_better_attr(current_thumb, new_obj, "license_plate"): + return True + # if the current thumb has a license_plate attr, dont update unless it gets better + if any([a["label"] == "license_plate" for a in current_thumb["attributes"]]): + return False + + # if the new_thumb is on an edge, and the current thumb is not + if on_edge(new_obj["box"], frame_shape) and not on_edge( + current_thumb["box"], frame_shape + ): + return False + + # if the score is better by more than 5% + if new_obj["score"] > current_thumb["score"] + 0.05: + return True + + # if the area is 10% larger + if new_obj["area"] > current_thumb["area"] * 1.1: + return True + + return False + + def draw_timestamp( frame, timestamp, diff --git a/frigate/video.py b/frigate/video.py index 0f051b6b2..c0341446a 100755 --- a/frigate/video.py +++ b/frigate/video.py @@ -27,7 +27,7 @@ from frigate.object_detection import RemoteObjectDetector from frigate.ptz.autotrack import ptz_moving_at_frame_time from frigate.track import ObjectTracker from frigate.track.norfair_tracker import NorfairTracker -from frigate.track.object_attribute import ObjectAttribute +from frigate.track.tracked_object import TrackedObjectAttribute from frigate.util.builtin import EventsPerSecond, get_tomorrow_at_time from frigate.util.image import ( FrameManager, @@ -734,10 +734,10 @@ def process_frames( object_tracker.update_frame_times(frame_time) # group the attribute detections based on what label they apply to - attribute_detections: dict[str, list[ObjectAttribute]] = {} + attribute_detections: dict[str, list[TrackedObjectAttribute]] = {} for label, attribute_labels in model_config.attributes_map.items(): attribute_detections[label] = [ - ObjectAttribute(d) + TrackedObjectAttribute(d) for d in consolidated_detections if d[0] in attribute_labels ]