refactor and reduce false positives

2025-06-04 01:16:52 +02:00 · 2020-09-07 12:17:42 -05:00 · 2020-09-07 12:17:42 -05:00 · acb75fa02d
commit acb75fa02d
parent ea4ecae27c
10 changed files with 539 additions and 230 deletions
--- a/3
+++ b/3
@ -31,6 +31,7 @@ RUN apt -qq update && apt -qq install --no-install-recommends -y \
        PyYAML \
        matplotlib \
        pyarrow \
+        click \
    && echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" > /etc/apt/sources.list.d/coral-edgetpu.list \
    && wget -q -O - https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \
    && apt -qq update \
@ -46,7 +47,7 @@ RUN apt -qq update && apt -qq install --no-install-recommends -y \

 # get model and labels
 RUN wget -q https://github.com/google-coral/edgetpu/raw/master/test_data/ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite -O /edgetpu_model.tflite --trust-server-names
-RUN wget -q https://dl.google.com/coral/canned_models/coco_labels.txt -O /labelmap.txt --trust-server-names
+COPY labelmap.txt /labelmap.txt
 RUN wget -q https://github.com/google-coral/edgetpu/raw/master/test_data/ssd_mobilenet_v2_coco_quant_postprocess.tflite -O /cpu_model.tflite 


--- a/README.md
+++ b/README.md
@ -344,3 +344,15 @@ cameras:
 - Additional logging is available in the docker container - You can view the logs by running `docker logs -t frigate`
 - Object configuration - Tracked objects types, sizes and thresholds can be defined globally and/or on a per camera basis. The global and camera object configuration is *merged*. For example, if you defined tracking person, car, and truck globally but modified your backyard camera to only track person, the global config would merge making the effective list for the backyard camera still contain person, car and truck. If you want precise object tracking per camera, best practice to put a minimal list of objects at the global level and expand objects on a per camera basis. Object threshold and area configuration will be used first from the camera object config (if defined) and then from the global config.  See the [example config](config/config.example.yml) for more information.

+## Troubleshooting
+
+### "ffmpeg didnt return a frame. something is wrong"
+Turn on logging for the camera by overriding the global_args and setting the log level to `info`:
+```yaml
+ffmpeg:
+  global_args:
+    - -hide_banner
+    - -loglevel
+    - panic
+```
+
--- a/config/config.example.yml
+++ b/config/config.example.yml
@ -53,9 +53,10 @@ mqtt:
 # unless overridden at the camera levels.
 # Keys must be valid labels. By default, the model uses coco (https://dl.google.com/coral/canned_models/coco_labels.txt).
 # All labels from the model are reported over MQTT. These values are used to filter out false positives.
-# min_area (optional): minimum width*height of the bounding box for the detected person
-# max_area (optional): maximum width*height of the bounding box for the detected person
-# threshold (optional): The minimum decimal percentage (50% hit = 0.5) for the confidence from tensorflow
+# min_area (optional): minimum width*height of the bounding box for the detected object
+# max_area (optional): maximum width*height of the bounding box for the detected object
+# min_score (optional): minimum score for the object
+# threshold (optional): The minimum decimal percentage for tracked object's computed score to considered a true positive
 ####################
 objects:
  track:
@ -66,7 +67,8 @@ objects:
    person:
      min_area: 5000
      max_area: 100000
-      threshold: 0.8
+      min_score: 0.5
+      threshold: 0.85

 zones:
  #################
@ -184,4 +186,5 @@ cameras:
        person:
          min_area: 5000
          max_area: 100000
-          threshold: 0.8
+          min_score: 0.5
+          threshold: 0.85
--- a/detect_objects.py
+++ b/detect_objects.py
@ -120,7 +120,7 @@ class CameraWatchdog(threading.Thread):
                    process = mp.Process(target=track_camera, args=(name, self.config[name], GLOBAL_OBJECT_CONFIG, camera_process['frame_queue'],
                        camera_process['frame_shape'], self.tflite_process.detection_queue, self.tracked_objects_queue, 
                        camera_process['process_fps'], camera_process['detection_fps'],
-                        camera_process['read_start'], camera_process['detection_frame']))
+                        camera_process['read_start'], camera_process['detection_frame'], self.stop_event))
                    process.daemon = True
                    camera_process['process'] = process
                    process.start()
@ -135,7 +135,7 @@ class CameraWatchdog(threading.Thread):
                    camera_capture.start()
                    camera_process['ffmpeg_process'] = ffmpeg_process
                    camera_process['capture_thread'] = camera_capture
-                elif now - camera_process['capture_thread'].current_frame > 5:
+                elif now - camera_process['capture_thread'].current_frame.value > 5:
                    print(f"No frames received from {name} in 5 seconds. Exiting ffmpeg...")
                    ffmpeg_process = camera_process['ffmpeg_process']
                    ffmpeg_process.terminate()
@ -181,6 +181,7 @@ def main():
            'show_timestamp': config.get('snapshots', {}).get('show_timestamp', True),
            'draw_zones': config.get('snapshots', {}).get('draw_zones', False)
        }
+        config['zones'] = {}

    # Queue for cameras to push tracked objects to
    tracked_objects_queue = mp.Queue()
@ -259,7 +260,23 @@ def main():
            'capture_thread': camera_capture
        }

-        camera_process = mp.Process(target=track_camera, args=(name, config, GLOBAL_OBJECT_CONFIG, frame_queue, frame_shape,
+        # merge global object config into camera object config
+        camera_objects_config = config.get('objects', {})
+        # get objects to track for camera
+        objects_to_track = camera_objects_config.get('track', GLOBAL_OBJECT_CONFIG.get('track', ['person']))
+        # merge object filters
+        global_object_filters = GLOBAL_OBJECT_CONFIG.get('filters', {})
+        camera_object_filters = camera_objects_config.get('filters', {})
+        objects_with_config = set().union(global_object_filters.keys(), camera_object_filters.keys())
+        object_filters = {}
+        for obj in objects_with_config:
+            object_filters[obj] = {**global_object_filters.get(obj, {}), **camera_object_filters.get(obj, {})}
+        config['objects'] = {
+            'track': objects_to_track,
+            'filters': object_filters
+        }
+
+        camera_process = mp.Process(target=track_camera, args=(name, config, frame_queue, frame_shape,
            tflite_process.detection_queue, tracked_objects_queue, camera_processes[name]['process_fps'], 
            camera_processes[name]['detection_fps'], 
            camera_processes[name]['read_start'], camera_processes[name]['detection_frame'], stop_event))
@ -340,7 +357,7 @@ def main():
                'pid': camera_stats['process'].pid,
                'ffmpeg_pid': camera_stats['ffmpeg_process'].pid,
                'frame_info': {
-                    'read': capture_thread.current_frame,
+                    'read': capture_thread.current_frame.value,
                    'detect': camera_stats['detection_frame'].value,
                    'process': object_processor.camera_data[name]['current_frame_time']
                }
--- a/frigate/object_processing.py
+++ b/frigate/object_processing.py
@ -13,6 +13,8 @@ import pyarrow.plasma as plasma
 import matplotlib.pyplot as plt
 from frigate.util import draw_box_with_label, PlasmaFrameManager
 from frigate.edgetpu import load_labels
+from typing import Callable, Dict
+from statistics import mean, median

 PATH_TO_LABELS = '/labelmap.txt'

@ -23,11 +25,6 @@ COLOR_MAP = {}
 for key, val in LABELS.items():
    COLOR_MAP[val] = tuple(int(round(255 * c)) for c in cmap(key)[:3])

-def filter_false_positives(event):
-    if len(event['history']) < 2:
-        return True
-    return False
-
 def zone_filtered(obj, object_config):
    object_name = obj['label']
    object_filters = object_config.get('filters', {})
@ -46,11 +43,186 @@ def zone_filtered(obj, object_config):
            return True

        # if the score is lower than the threshold, skip
-        if obj_settings.get('threshold', 0) > obj['score']:
+        if obj_settings.get('threshold', 0) > obj['computed_score']:
            return True
        
    return False

+# Maintains the state of a camera
+class CameraState():
+    def __init__(self, name, config, frame_manager):
+        self.name = name
+        self.config = config
+        self.frame_manager = frame_manager
+
+        self.best_objects = {}
+        self.object_status = defaultdict(lambda: 'OFF')
+        self.tracked_objects = {}
+        self.zone_objects = defaultdict(lambda: [])
+        self.current_frame = np.zeros((720,1280,3), np.uint8)
+        self.current_frame_time = 0.0
+        self.previous_frame_id = None
+        self.callbacks = defaultdict(lambda: [])
+
+    def false_positive(self, obj):
+        threshold = self.config['objects'].get('filters', {}).get(obj['label'], {}).get('threshold', 0.85)
+        if obj['computed_score'] < threshold:
+            return True
+        return False
+
+    def compute_score(self, obj):
+        scores = obj['score_history'][:]
+        # pad with zeros if you dont have at least 3 scores
+        if len(scores) < 3:
+            scores += [0.0]*(3 - len(scores))
+        return median(scores)
+
+    def on(self, event_type: str, callback: Callable[[Dict], None]):
+        self.callbacks[event_type].append(callback)
+
+    def update(self, frame_time, tracked_objects):
+        self.current_frame_time = frame_time
+        # get the new frame and delete the old frame
+        frame_id = f"{self.name}{frame_time}"
+        self.current_frame = self.frame_manager.get(frame_id)
+        if not self.previous_frame_id is None:
+            self.frame_manager.delete(self.previous_frame_id)
+        self.previous_frame_id = frame_id
+
+        current_ids = tracked_objects.keys()
+        previous_ids = self.tracked_objects.keys()
+        removed_ids = list(set(previous_ids).difference(current_ids))
+        new_ids = list(set(current_ids).difference(previous_ids))
+        updated_ids = list(set(current_ids).intersection(previous_ids))
+
+        for id in new_ids:
+            self.tracked_objects[id] = tracked_objects[id]
+            self.tracked_objects[id]['zones'] = []
+
+            # start the score history
+            self.tracked_objects[id]['score_history'] = [self.tracked_objects[id]['score']]
+
+            # calculate if this is a false positive
+            self.tracked_objects[id]['computed_score'] = self.compute_score(self.tracked_objects[id])
+            self.tracked_objects[id]['false_positive'] = self.false_positive(self.tracked_objects[id])
+
+            # call event handlers
+            for c in self.callbacks['start']:
+                c(self.name, tracked_objects[id])
+        
+        for id in updated_ids:
+            self.tracked_objects[id].update(tracked_objects[id])
+
+            # if the object is not in the current frame, add a 0.0 to the score history
+            if self.tracked_objects[id]['frame_time'] != self.current_frame_time:
+                self.tracked_objects[id]['score_history'].append(0.0)
+            else:
+                self.tracked_objects[id]['score_history'].append(self.tracked_objects[id]['score'])
+            # only keep the last 10 scores
+            if len(self.tracked_objects[id]['score_history']) > 10:
+                self.tracked_objects[id]['score_history'] = self.tracked_objects[id]['score_history'][-10:]
+
+            # calculate if this is a false positive
+            self.tracked_objects[id]['computed_score'] = self.compute_score(self.tracked_objects[id])
+            self.tracked_objects[id]['false_positive'] = self.false_positive(self.tracked_objects[id])
+
+            # call event handlers
+            for c in self.callbacks['update']:
+                c(self.name, self.tracked_objects[id])
+        
+        for id in removed_ids:
+            # publish events to mqtt
+            self.tracked_objects[id]['end_time'] = frame_time
+            for c in self.callbacks['end']:
+                c(self.name, self.tracked_objects[id])
+            del self.tracked_objects[id]
+
+        # check to see if the objects are in any zones
+        for obj in self.tracked_objects.values():
+            current_zones = []
+            bottom_center = (obj['centroid'][0], obj['box'][3])
+            # check each zone
+            for name, zone in self.config['zones'].items():
+                contour = zone['contour']
+                # check if the object is in the zone and not filtered
+                if (cv2.pointPolygonTest(contour, bottom_center, False) >= 0 
+                    and not zone_filtered(obj, zone.get('filters', {}))):
+                    current_zones.append(name)
+            obj['zones'] = current_zones
+        
+        # draw on the frame
+        if not self.current_frame is None:
+            # draw the bounding boxes on the frame
+            for obj in self.tracked_objects.values():
+                thickness = 2
+                color = COLOR_MAP[obj['label']]
+                
+                if obj['frame_time'] != frame_time:
+                    thickness = 1
+                    color = (255,0,0)
+
+                # draw the bounding boxes on the frame
+                box = obj['box']
+                draw_box_with_label(self.current_frame, box[0], box[1], box[2], box[3], obj['label'], f"{int(obj['score']*100)}% {int(obj['area'])}", thickness=thickness, color=color)
+                # draw the regions on the frame
+                region = obj['region']
+                cv2.rectangle(self.current_frame, (region[0], region[1]), (region[2], region[3]), (0,255,0), 1)
+            
+            if self.config['snapshots']['show_timestamp']:
+                time_to_show = datetime.datetime.fromtimestamp(frame_time).strftime("%m/%d/%Y %H:%M:%S")
+                cv2.putText(self.current_frame, time_to_show, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, fontScale=.8, color=(255, 255, 255), thickness=2)
+
+            if self.config['snapshots']['draw_zones']:
+                for name, zone in self.config['zones'].items():
+                    thickness = 8 if any([name in obj['zones'] for obj in self.tracked_objects.values()]) else 2
+                    cv2.drawContours(self.current_frame, [zone['contour']], -1, zone['color'], thickness)
+
+        # maintain best objects
+        for obj in self.tracked_objects.values():
+            object_type = obj['label']
+            # if the object wasn't seen on the current frame, skip it
+            if obj['frame_time'] != self.current_frame_time or obj['false_positive']:
+                continue
+            if object_type in self.best_objects:
+                current_best = self.best_objects[object_type]
+                now = datetime.datetime.now().timestamp()
+                # if the object is a higher score than the current best score 
+                # or the current object is more than 1 minute old, use the new object
+                if obj['score'] > current_best['score'] or (now - current_best['frame_time']) > 60:
+                    obj['frame'] = np.copy(self.current_frame)
+                    self.best_objects[object_type] = obj
+                    for c in self.callbacks['snapshot']:
+                        c(self.name, self.best_objects[object_type])
+            else:
+                obj['frame'] = np.copy(self.current_frame)
+                self.best_objects[object_type] = obj
+                for c in self.callbacks['snapshot']:
+                    c(self.name, self.best_objects[object_type])
+        
+        # update overall camera state for each object type
+        obj_counter = Counter()
+        for obj in self.tracked_objects.values():
+            if not obj['false_positive']:
+                obj_counter[obj['label']] += 1
+                
+        # report on detected objects
+        for obj_name, count in obj_counter.items():
+            new_status = 'ON' if count > 0 else 'OFF'
+            if new_status != self.object_status[obj_name]:
+                self.object_status[obj_name] = new_status
+                for c in self.callbacks['object_status']:
+                    c(self.name, obj_name, new_status)
+
+        # expire any objects that are ON and no longer detected
+        expired_objects = [obj_name for obj_name, status in self.object_status.items() if status == 'ON' and not obj_name in obj_counter]
+        for obj_name in expired_objects:
+            self.object_status[obj_name] = 'OFF'
+            for c in self.callbacks['object_status']:
+                c(self.name, obj_name, 'OFF')
+            for c in self.callbacks['snapshot']:
+                c(self.name, self.best_objects[object_type])
+
+
 class TrackedObjectProcessor(threading.Thread):
    def __init__(self, camera_config, zone_config, client, topic_prefix, tracked_objects_queue, event_queue, stop_event):
        threading.Thread.__init__(self)
@ -61,6 +233,40 @@ class TrackedObjectProcessor(threading.Thread):
        self.tracked_objects_queue = tracked_objects_queue
        self.event_queue = event_queue
        self.stop_event = stop_event
+        self.camera_states: Dict[str, CameraState] = {}
+        self.plasma_client = PlasmaFrameManager(self.stop_event)
+
+        def start(camera, obj):
+            # publish events to mqtt
+            self.client.publish(f"{self.topic_prefix}/{camera}/events/start", json.dumps({x: obj[x] for x in obj if x not in ['frame']}), retain=False)
+            self.event_queue.put(('start', camera, obj))
+
+        def update(camera, obj):
+            pass
+
+        def end(camera, obj):
+            self.client.publish(f"{self.topic_prefix}/{camera}/events/end", json.dumps({x: obj[x] for x in obj if x not in ['frame']}), retain=False)
+            self.event_queue.put(('end', camera, obj))
+        
+        def snapshot(camera, obj):
+            best_frame = cv2.cvtColor(obj['frame'], cv2.COLOR_RGB2BGR)
+            ret, jpg = cv2.imencode('.jpg', best_frame)
+            if ret:
+                jpg_bytes = jpg.tobytes()
+                self.client.publish(f"{self.topic_prefix}/{camera}/{obj['label']}/snapshot", jpg_bytes, retain=True)
+        
+        def object_status(camera, object_name, status):
+            self.client.publish(f"{self.topic_prefix}/{camera}/{object_name}", status, retain=False)
+
+        for camera in self.camera_config.keys():
+            camera_state = CameraState(camera, self.camera_config[camera], self.plasma_client)
+            camera_state.on('start', start)
+            camera_state.on('update', update)
+            camera_state.on('end', end)
+            camera_state.on('snapshot', snapshot)
+            camera_state.on('object_status', object_status)
+            self.camera_states[camera] = camera_state
+
        self.camera_data = defaultdict(lambda: {
            'best_objects': {},
            'object_status': defaultdict(lambda: defaultdict(lambda: 'OFF')),
@ -69,38 +275,43 @@ class TrackedObjectProcessor(threading.Thread):
            'current_frame_time': 0.0,
            'object_id': None
        })
-        self.zone_data = defaultdict(lambda: {
-            'object_status': defaultdict(lambda: defaultdict(lambda: 'OFF')),
-            'contours': {}
-        })
-
-        # create zone contours
-        for name, config in zone_config.items():
-            for camera, camera_zone_config in config.items():
-                coordinates = camera_zone_config['coordinates']
-                if isinstance(coordinates, list):
-                    self.zone_data[name]['contours'][camera] =  np.array([[int(p.split(',')[0]), int(p.split(',')[1])] for p in coordinates])
-                elif isinstance(coordinates, str):
-                    points = coordinates.split(',')
-                    self.zone_data[name]['contours'][camera] =  np.array([[int(points[i]), int(points[i+1])] for i in range(0, len(points), 2)])
-                else:
-                    print(f"Unable to parse zone coordinates for {name} - {camera}")
+        # {
+        #   'zone_name': {
+        #       'person': ['camera_1', 'camera_2']
+        #   }
+        # }
+        self.zone_data = defaultdict(lambda: defaultdict(lambda: set()))

        # set colors for zones
-        colors = plt.cm.get_cmap('tab10', len(self.zone_data.keys()))
-        for i, zone in enumerate(self.zone_data.values()):
-            zone['color'] = tuple(int(round(255 * c)) for c in colors(i)[:3])
+        zone_colors = {}
+        colors = plt.cm.get_cmap('tab10', len(self.zone_config.keys()))
+        for i, zone in enumerate(self.zone_config.keys()):
+            zone_colors[zone] = tuple(int(round(255 * c)) for c in colors(i)[:3])

-        self.plasma_client = PlasmaFrameManager(self.stop_event)
+        # create zone contours
+        for zone_name, config in zone_config.items():
+            for camera, camera_zone_config in config.items():
+                camera_zone = {}
+                camera_zone['color'] = zone_colors[zone_name]
+                coordinates = camera_zone_config['coordinates']
+                if isinstance(coordinates, list):
+                    camera_zone['contour'] =  np.array([[int(p.split(',')[0]), int(p.split(',')[1])] for p in coordinates])
+                elif isinstance(coordinates, str):
+                    points = coordinates.split(',')
+                    camera_zone['contour'] =  np.array([[int(points[i]), int(points[i+1])] for i in range(0, len(points), 2)])
+                else:
+                    print(f"Unable to parse zone coordinates for {zone_name} - {camera}")
+                self.camera_config[camera]['zones'][zone_name] = camera_zone
        
    def get_best(self, camera, label):
-        if label in self.camera_data[camera]['best_objects']:
-            return self.camera_data[camera]['best_objects'][label]['frame']
+        best_objects = self.camera_states[camera].best_objects
+        if label in best_objects:
+            return best_objects[label]['frame']
        else:
            return None
    
    def get_current_frame(self, camera):
-        return self.camera_data[camera]['current_frame']
+        return self.camera_states[camera].current_frame

    def run(self):
        while True:
@ -113,165 +324,27 @@ class TrackedObjectProcessor(threading.Thread):
            except queue.Empty:
                continue

-            camera_config = self.camera_config[camera]
-            best_objects = self.camera_data[camera]['best_objects']
-            current_object_status = self.camera_data[camera]['object_status']
-            tracked_objects = self.camera_data[camera]['tracked_objects']
+            camera_state = self.camera_states[camera]

-            current_ids = current_tracked_objects.keys()
-            previous_ids = tracked_objects.keys()
-            removed_ids = list(set(previous_ids).difference(current_ids))
-            new_ids = list(set(current_ids).difference(previous_ids))
-            updated_ids = list(set(current_ids).intersection(previous_ids))
+            camera_state.update(frame_time, current_tracked_objects)

-            for id in new_ids:
-                # only register the object here if we are sure it isnt a false positive
-                if not filter_false_positives(current_tracked_objects[id]):
-                    tracked_objects[id] = current_tracked_objects[id]
-                    # publish events to mqtt
-                    self.client.publish(f"{self.topic_prefix}/{camera}/events/start", json.dumps(tracked_objects[id]), retain=False)
-                    self.event_queue.put(('start', camera, tracked_objects[id]))
-            
-            for id in updated_ids:
-                tracked_objects[id] = current_tracked_objects[id]
-            
-            for id in removed_ids:
-                # publish events to mqtt
-                tracked_objects[id]['end_time'] = frame_time
-                self.client.publish(f"{self.topic_prefix}/{camera}/events/end", json.dumps(tracked_objects[id]), retain=False)
-                self.event_queue.put(('end', camera, tracked_objects[id]))
-                del tracked_objects[id]
-
-            self.camera_data[camera]['current_frame_time'] = frame_time
-
-            # build a dict of objects in each zone for current camera
-            current_objects_in_zones = defaultdict(lambda: [])
-            for obj in tracked_objects.values():
-                bottom_center = (obj['centroid'][0], obj['box'][3])
-                # check each zone
-                for name, zone in self.zone_data.items():
-                    current_contour = zone['contours'].get(camera, None)
-                    # if the current camera does not have a contour for this zone, skip
-                    if current_contour is None:
-                        continue
-                    # check if the object is in the zone and not filtered
-                    if (cv2.pointPolygonTest(current_contour, bottom_center, False) >= 0 
-                        and not zone_filtered(obj, self.zone_config[name][camera])):
-                        current_objects_in_zones[name].append(obj['label'])
-
-            ###
-            # Draw tracked objects on the frame
-            ###
-            current_frame = self.plasma_client.get(f"{camera}{frame_time}")
-
-            if not current_frame is plasma.ObjectNotAvailable:
-                # draw the bounding boxes on the frame
-                for obj in tracked_objects.values():
-                    thickness = 2
-                    color = COLOR_MAP[obj['label']]
-                    
-                    if obj['frame_time'] != frame_time:
-                        thickness = 1
-                        color = (255,0,0)
-
-                    # draw the bounding boxes on the frame
-                    box = obj['box']
-                    draw_box_with_label(current_frame, box[0], box[1], box[2], box[3], obj['label'], f"{int(obj['score']*100)}% {int(obj['area'])}", thickness=thickness, color=color)
-                    # draw the regions on the frame
-                    region = obj['region']
-                    cv2.rectangle(current_frame, (region[0], region[1]), (region[2], region[3]), (0,255,0), 1)
-                
-                if camera_config['snapshots']['show_timestamp']:
-                    time_to_show = datetime.datetime.fromtimestamp(frame_time).strftime("%m/%d/%Y %H:%M:%S")
-                    cv2.putText(current_frame, time_to_show, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, fontScale=.8, color=(255, 255, 255), thickness=2)
-
-                if camera_config['snapshots']['draw_zones']:
-                    for name, zone in self.zone_data.items():
-                        thickness = 2 if len(current_objects_in_zones[name]) == 0 else 8
-                        if camera in zone['contours']:
-                            cv2.drawContours(current_frame, [zone['contours'][camera]], -1, zone['color'], thickness)
-
-                ###
-                # Set the current frame
-                ###
-                self.camera_data[camera]['current_frame'] = current_frame
-
-                # delete the previous frame from the plasma store and update the object id
-                if not self.camera_data[camera]['object_id'] is None:
-                    self.plasma_client.delete(self.camera_data[camera]['object_id'])
-                self.camera_data[camera]['object_id'] = f"{camera}{frame_time}"
-            
-            ###
-            # Maintain the highest scoring recent object and frame for each label
-            ###
-            for obj in tracked_objects.values():
-                # if the object wasn't seen on the current frame, skip it
-                if obj['frame_time'] != frame_time:
-                    continue
-                if obj['label'] in best_objects:
-                    now = datetime.datetime.now().timestamp()
-                    # if the object is a higher score than the current best score 
-                    # or the current object is more than 1 minute old, use the new object
-                    if obj['score'] > best_objects[obj['label']]['score'] or (now - best_objects[obj['label']]['frame_time']) > 60:
-                        obj['frame'] = np.copy(self.camera_data[camera]['current_frame'])
-                        best_objects[obj['label']] = obj
-                        # send updated snapshot over mqtt
-                        best_frame = cv2.cvtColor(obj['frame'], cv2.COLOR_RGB2BGR)
-                        ret, jpg = cv2.imencode('.jpg', best_frame)
-                        if ret:
-                            jpg_bytes = jpg.tobytes()
-                            self.client.publish(f"{self.topic_prefix}/{camera}/{obj['label']}/snapshot", jpg_bytes, retain=True)
-                else:
-                    obj['frame'] = np.copy(self.camera_data[camera]['current_frame'])
-                    best_objects[obj['label']] = obj
-
-            ###
-            # Report over MQTT
-            ###
-
-            # get the zones that are relevant for this camera
-            relevant_zones = [zone for zone, config in self.zone_config.items() if camera in config]
-            for zone in relevant_zones:
-                # create the set of labels in the current frame and previously reported
-                labels_for_zone = set(current_objects_in_zones[zone] + list(self.zone_data[zone]['object_status'][camera].keys()))
-                # for each label
-                for label in labels_for_zone:
-                    # compute the current 'ON' vs 'OFF' status by checking if any camera sees the object in the zone
-                    previous_state = any([c[label] == 'ON' for c in self.zone_data[zone]['object_status'].values()])
-                    self.zone_data[zone]['object_status'][camera][label] = 'ON' if label in current_objects_in_zones[zone] else 'OFF'
-                    new_state = any([c[label] == 'ON' for c in self.zone_data[zone]['object_status'].values()])
+            # update zone status for each label
+            for zone in camera_state.config['zones'].keys():
+                # get labels for current camera and all labels in current zone
+                labels_for_camera = set([obj['label'] for obj in camera_state.tracked_objects.values() if zone in obj['zones']])
+                labels_to_check = labels_for_camera | set(self.zone_data[zone].keys())
+                # for each label in zone
+                for label in labels_to_check:
+                    camera_list = self.zone_data[zone][label]
+                    # remove or add the camera to the list for the current label
+                    previous_state = len(camera_list) > 0
+                    if label in labels_for_camera:
+                        camera_list.add(camera_state.name)
+                    elif camera_state.name in camera_list:
+                        camera_list.remove(camera_state.name)
+                    new_state = len(camera_list) > 0
                    # if the value is changing, send over MQTT
                    if previous_state == False and new_state == True:
                        self.client.publish(f"{self.topic_prefix}/{zone}/{label}", 'ON', retain=False)
                    elif previous_state == True and new_state == False:
                        self.client.publish(f"{self.topic_prefix}/{zone}/{label}", 'OFF', retain=False)
-
-            # count  by type
-            obj_counter = Counter()
-            for obj in tracked_objects.values():
-                obj_counter[obj['label']] += 1
-                    
-            # report on detected objects
-            for obj_name, count in obj_counter.items():
-                new_status = 'ON' if count > 0 else 'OFF'
-                if new_status != current_object_status[obj_name]:
-                    current_object_status[obj_name] = new_status
-                    self.client.publish(f"{self.topic_prefix}/{camera}/{obj_name}", new_status, retain=False)
-                    # send the best snapshot over mqtt
-                    best_frame = cv2.cvtColor(best_objects[obj_name]['frame'], cv2.COLOR_RGB2BGR)
-                    ret, jpg = cv2.imencode('.jpg', best_frame)
-                    if ret:
-                        jpg_bytes = jpg.tobytes()
-                        self.client.publish(f"{self.topic_prefix}/{camera}/{obj_name}/snapshot", jpg_bytes, retain=True)
-
-            # expire any objects that are ON and no longer detected
-            expired_objects = [obj_name for obj_name, status in current_object_status.items() if status == 'ON' and not obj_name in obj_counter]
-            for obj_name in expired_objects:
-                current_object_status[obj_name] = 'OFF'
-                self.client.publish(f"{self.topic_prefix}/{camera}/{obj_name}", 'OFF', retain=False)
-                # send updated snapshot over mqtt
-                best_frame = cv2.cvtColor(best_objects[obj_name]['frame'], cv2.COLOR_RGB2BGR)
-                ret, jpg = cv2.imencode('.jpg', best_frame)
-                if ret:
-                    jpg_bytes = jpg.tobytes()
-                    self.client.publish(f"{self.topic_prefix}/{camera}/{obj_name}/snapshot", jpg_bytes, retain=True)
--- a/frigate/objects.py
+++ b/frigate/objects.py
@ -24,7 +24,6 @@ class ObjectTracker():
        obj['id'] = id
        obj['start_time'] = obj['frame_time']
        obj['top_score'] = obj['score']
-        self.add_history(obj)
        self.tracked_objects[id] = obj
        self.disappeared[id] = 0

@ -35,26 +34,9 @@ class ObjectTracker():
    def update(self, id, new_obj):
        self.disappeared[id] = 0
        self.tracked_objects[id].update(new_obj)
-        self.add_history(self.tracked_objects[id])
        if self.tracked_objects[id]['score'] > self.tracked_objects[id]['top_score']:
            self.tracked_objects[id]['top_score'] = self.tracked_objects[id]['score']

-    def add_history(self, obj):
-        entry = {
-            'score': obj['score'],
-            'box': obj['box'],
-            'region': obj['region'],
-            'centroid': obj['centroid'],
-            'frame_time': obj['frame_time']
-        }
-        if 'history' in obj:
-            obj['history'].append(entry)
-            # only maintain the last 20 in history
-            if len(obj['history']) > 20:
-                obj['history'] = obj['history'][-20:]
-        else:
-            obj['history'] = [entry]
-
    def match_and_update(self, frame_time, new_objects):
        # group by name
        new_object_groups = defaultdict(lambda: [])
--- a/frigate/util.py
+++ b/frigate/util.py
@ -44,6 +44,9 @@ def draw_box_with_label(frame, x_min, y_min, x_max, y_max, label, info, thicknes
 def calculate_region(frame_shape, xmin, ymin, xmax, ymax, multiplier=2):    
    # size is larger than longest edge
    size = int(max(xmax-xmin, ymax-ymin)*multiplier)
+    # dont go any smaller than 300
+    if size < 300:
+        size = 300
    # if the size is too big to fit in the frame
    if size > min(frame_shape[0], frame_shape[1]):
        size = min(frame_shape[0], frame_shape[1])
--- a/frigate/video.py
+++ b/frigate/video.py
@ -73,8 +73,8 @@ def filtered(obj, objects_to_track, object_filters, mask=None):
        if obj_settings.get('max_area', 24000000) < obj[3]:
            return True

-        # if the score is lower than the threshold, skip
-        if obj_settings.get('threshold', 0) > obj[1]:
+        # if the score is lower than the min_score, skip
+        if obj_settings.get('min_score', 0) > obj[1]:
            return True
    
        # compute the coordinates of the object and make sure
@ -83,7 +83,7 @@ def filtered(obj, objects_to_track, object_filters, mask=None):
        x_location = min(int((obj[2][2]-obj[2][0])/2.0)+obj[2][0], len(mask[0])-1)

        # if the object is in a masked location, don't add it to detected objects
-        if mask != None and mask[y_location][x_location] == [0]:
+        if (not mask is None) and (mask[y_location][x_location][0] == 0):
            return True
        
    return False
@ -118,7 +118,7 @@ def start_or_restart_ffmpeg(ffmpeg_cmd, frame_size, ffmpeg_process=None):

 def capture_frames(ffmpeg_process, camera_name, frame_shape, frame_manager: FrameManager, 
    frame_queue, take_frame: int, fps:EventsPerSecond, skipped_fps: EventsPerSecond, 
-    stop_event: mp.Event, detection_frame: mp.Value):
+    stop_event: mp.Event, detection_frame: mp.Value, current_frame: mp.Value):

    frame_num = 0
    last_frame = 0
@ -130,7 +130,7 @@ def capture_frames(ffmpeg_process, camera_name, frame_shape, frame_manager: Fram
            break

        frame_bytes = ffmpeg_process.stdout.read(frame_size)
-        current_frame = datetime.datetime.now().timestamp()
+        current_frame.value = datetime.datetime.now().timestamp()

        if len(frame_bytes) == 0:
            print(f"{camera_name}: ffmpeg didnt return a frame. something is wrong.")
@ -154,14 +154,14 @@ def capture_frames(ffmpeg_process, camera_name, frame_shape, frame_manager: Fram
            continue

        # put the frame in the frame manager
-        frame_manager.put(f"{camera_name}{current_frame}",
+        frame_manager.put(f"{camera_name}{current_frame.value}",
                np
                    .frombuffer(frame_bytes, np.uint8)
                    .reshape(frame_shape)
            )
        # add to the queue
-        frame_queue.put(current_frame)
-        last_frame = current_frame
+        frame_queue.put(current_frame.value)
+        last_frame = current_frame.value

 class CameraCapture(threading.Thread):
    def __init__(self, name, ffmpeg_process, frame_shape, frame_queue, take_frame, fps, detection_frame, stop_event):
@ -175,7 +175,7 @@ class CameraCapture(threading.Thread):
        self.skipped_fps = EventsPerSecond()
        self.plasma_client = PlasmaFrameManager(stop_event)
        self.ffmpeg_process = ffmpeg_process
-        self.current_frame = 0
+        self.current_frame = mp.Value('d', 0.0)
        self.last_frame = 0
        self.detection_frame = detection_frame
        self.stop_event = stop_event
@ -183,9 +183,9 @@ class CameraCapture(threading.Thread):
    def run(self):
        self.skipped_fps.start()
        capture_frames(self.ffmpeg_process, self.name, self.frame_shape, self.plasma_client, self.frame_queue, self.take_frame,
-            self.fps, self.skipped_fps, self.stop_event, self.detection_frame)
+            self.fps, self.skipped_fps, self.stop_event, self.detection_frame, self.current_frame)

-def track_camera(name, config, global_objects_config, frame_queue, frame_shape, detection_queue, detected_objects_queue, fps, detection_fps, read_start, detection_frame, stop_event):
+def track_camera(name, config, frame_queue, frame_shape, detection_queue, detected_objects_queue, fps, detection_fps, read_start, detection_frame, stop_event):
    print(f"Starting process for {name}: {os.getpid()}")
    listen()

@ -193,15 +193,8 @@ def track_camera(name, config, global_objects_config, frame_queue, frame_shape,

    # Merge the tracked object config with the global config
    camera_objects_config = config.get('objects', {})
-    # combine tracked objects lists
-    objects_to_track = set().union(global_objects_config.get('track', ['person', 'car', 'truck']), camera_objects_config.get('track', []))
-    # merge object filters
-    global_object_filters = global_objects_config.get('filters', {})
-    camera_object_filters = camera_objects_config.get('filters', {})
-    objects_with_config = set().union(global_object_filters.keys(), camera_object_filters.keys())
-    object_filters = {}
-    for obj in objects_with_config:
-        object_filters[obj] = {**global_object_filters.get(obj, {}), **camera_object_filters.get(obj, {})}
+    objects_to_track = camera_objects_config.get('track', [])
+    object_filters = camera_objects_config.get('filters', {})

    # load in the mask for object detection
    if 'mask' in config:
--- a/labelmap.txt
+++ b/labelmap.txt
@ -0,0 +1,80 @@
+0  person
+1  bicycle
+2  car
+3  motorcycle
+4  airplane
+5  bus
+6  train
+7  car
+8  boat
+9  traffic light
+10  fire hydrant
+12  stop sign
+13  parking meter
+14  bench
+15  bird
+16  cat
+17  dog
+18  horse
+19  sheep
+20  cow
+21  elephant
+22  bear
+23  zebra
+24  giraffe
+26  backpack
+27  umbrella
+30  handbag
+31  tie
+32  suitcase
+33  frisbee
+34  skis
+35  snowboard
+36  sports ball
+37  kite
+38  baseball bat
+39  baseball glove
+40  skateboard
+41  surfboard
+42  tennis racket
+43  bottle
+45  wine glass
+46  cup
+47  fork
+48  knife
+49  spoon
+50  bowl
+51  banana
+52  apple
+53  sandwich
+54  orange
+55  broccoli
+56  carrot
+57  hot dog
+58  pizza
+59  donut
+60  cake
+61  chair
+62  couch
+63  potted plant
+64  bed
+66  dining table
+69  toilet
+71  tv
+72  laptop
+73  mouse
+74  remote
+75  keyboard
+76  cell phone
+77  microwave
+78  oven
+79  toaster
+80  sink
+81  refrigerator
+83  book
+84  clock
+85  vase
+86  scissors
+87  teddy bear
+88  hair drier
+89  toothbrush
--- a/process_clip.py
+++ b/process_clip.py
@ -0,0 +1,145 @@
+import sys
+import click
+import os
+import datetime
+from unittest import TestCase, main
+from frigate.video import process_frames, start_or_restart_ffmpeg, capture_frames, get_frame_shape
+from frigate.util import DictFrameManager, EventsPerSecond, draw_box_with_label
+from frigate.motion import MotionDetector
+from frigate.edgetpu import LocalObjectDetector
+from frigate.objects import ObjectTracker
+import multiprocessing as mp
+import numpy as np
+import cv2
+from frigate.object_processing import COLOR_MAP, CameraState
+
+class ProcessClip():
+    def __init__(self, clip_path, frame_shape, config):
+        self.clip_path = clip_path
+        self.frame_shape = frame_shape
+        self.camera_name = 'camera'
+        self.frame_manager = DictFrameManager()
+        self.frame_queue = mp.Queue()
+        self.detected_objects_queue = mp.Queue()
+        self.camera_state = CameraState(self.camera_name, config, self.frame_manager)
+
+    def load_frames(self):
+        fps = EventsPerSecond()
+        skipped_fps = EventsPerSecond()
+        stop_event = mp.Event()
+        detection_frame = mp.Value('d', datetime.datetime.now().timestamp()+100000)
+        current_frame = mp.Value('d', 0.0)
+        ffmpeg_cmd = f"ffmpeg -hide_banner -loglevel panic -i {self.clip_path} -f rawvideo -pix_fmt rgb24 pipe:".split(" ")
+        ffmpeg_process = start_or_restart_ffmpeg(ffmpeg_cmd, self.frame_shape[0]*self.frame_shape[1]*self.frame_shape[2])
+        capture_frames(ffmpeg_process, self.camera_name, self.frame_shape, self.frame_manager, self.frame_queue, 1, fps, skipped_fps, stop_event, detection_frame, current_frame)
+        ffmpeg_process.wait()
+        ffmpeg_process.communicate()
+    
+    def process_frames(self, objects_to_track=['person'], object_filters={}):
+        mask = np.zeros((self.frame_shape[0], self.frame_shape[1], 1), np.uint8)
+        mask[:] = 255
+        motion_detector = MotionDetector(self.frame_shape, mask)
+
+        object_detector = LocalObjectDetector(labels='/labelmap.txt')
+        object_tracker = ObjectTracker(10)
+        process_fps = EventsPerSecond()
+        current_frame = mp.Value('d', 0.0)
+        stop_event = mp.Event()
+
+        process_frames(self.camera_name, self.frame_queue, self.frame_shape, self.frame_manager, motion_detector, object_detector, object_tracker, self.detected_objects_queue, 
+            process_fps, current_frame, objects_to_track, object_filters, mask, stop_event, exit_on_empty=True)
+    
+    def objects_found(self, debug_path=None):
+        obj_detected = False
+        top_computed_score = 0.0
+        def handle_event(name, obj):
+            nonlocal obj_detected
+            nonlocal top_computed_score
+            if obj['computed_score'] > top_computed_score:
+                top_computed_score = obj['computed_score']
+            if not obj['false_positive']:
+                obj_detected = True
+        self.camera_state.on('new', handle_event)
+        self.camera_state.on('update', handle_event)
+
+        while(not self.detected_objects_queue.empty()):
+            camera_name, frame_time, current_tracked_objects = self.detected_objects_queue.get()
+            if not debug_path is None:
+                self.save_debug_frame(debug_path, frame_time, current_tracked_objects.values())
+
+            self.camera_state.update(frame_time, current_tracked_objects)
+        
+        return {
+            'object_detected': obj_detected,
+            'top_score': top_computed_score
+        }
+    
+    def save_debug_frame(self, debug_path, frame_time, tracked_objects):
+        current_frame = self.frame_manager.get(f"{self.camera_name}{frame_time}")
+        # draw the bounding boxes on the frame
+        for obj in tracked_objects:
+            thickness = 2
+            color = (0,0,175)
+
+            if obj['frame_time'] != frame_time:
+                thickness = 1
+                color = (255,0,0)
+            else:
+                color = (255,255,0)
+
+            # draw the bounding boxes on the frame
+            box = obj['box']
+            draw_box_with_label(current_frame, box[0], box[1], box[2], box[3], obj['label'], f"{int(obj['score']*100)}% {int(obj['area'])}", thickness=thickness, color=color)
+            # draw the regions on the frame
+            region = obj['region']
+            draw_box_with_label(current_frame, region[0], region[1], region[2], region[3], 'region', "", thickness=1, color=(0,255,0))
+        
+        cv2.imwrite(f"{os.path.join(debug_path, os.path.basename(self.clip_path))}.{int(frame_time*1000000)}.jpg", cv2.cvtColor(current_frame, cv2.COLOR_RGB2BGR))
+
+@click.command()
+@click.option("-p", "--path", required=True, help="Path to clip or directory to test.")
+@click.option("-l", "--label", default='person', help="Label name to detect.")
+@click.option("-t", "--threshold", default=0.85, help="Threshold value for objects.")
+@click.option("--debug-path", default=None, help="Path to output frames for debugging.")
+def process(path, label, threshold, debug_path):
+    clips = []
+    if os.path.isdir(path):
+        files = os.listdir(path)
+        files.sort()
+        clips = [os.path.join(path, file) for file in files]
+    elif os.path.isfile(path):  
+        clips.append(path)
+
+    config = {
+        'snapshots': {
+            'show_timestamp': False, 
+            'draw_zones': False
+        },
+        'zones': {},
+        'objects': {
+            'track': [label],
+            'filters': {
+                'person': {
+                    'threshold': threshold
+                }
+            }
+        }
+    }
+
+    results = []
+    for c in clips:
+        frame_shape = get_frame_shape(c)
+        process_clip = ProcessClip(c, frame_shape, config)
+        process_clip.load_frames()
+        process_clip.process_frames(objects_to_track=config['objects']['track'])
+
+        results.append((c, process_clip.objects_found(debug_path)))
+
+    for result in results:
+        print(f"{result[0]}: {result[1]}")
+    
+    positive_count = sum(1 for result in results if result[1]['object_detected'])
+    print(f"Objects were detected in {positive_count}/{len(results)}({positive_count/len(results)*100:.2f}%) clip(s).")
+
+if __name__ == '__main__':
+    process()