From acb75fa02d783acfd3f091fff93aaa974e0bd196 Mon Sep 17 00:00:00 2001
From: Blake Blackshear <blakeb@blakeshome.com>
Date: Mon, 7 Sep 2020 12:17:42 -0500
Subject: [PATCH] refactor and reduce false positives

---
 Dockerfile                   |   3 +-
 README.md                    |  14 +-
 config/config.example.yml    |  13 +-
 detect_objects.py            |  25 +-
 frigate/object_processing.py | 431 ++++++++++++++++++++---------------
 frigate/objects.py           |  18 --
 frigate/util.py              |   3 +
 frigate/video.py             |  37 ++-
 labelmap.txt                 |  80 +++++++
 process_clip.py              | 145 ++++++++++++
 10 files changed, 539 insertions(+), 230 deletions(-)
 create mode 100644 labelmap.txt
 create mode 100644 process_clip.py

diff --git a/Dockerfile b/Dockerfile
index b06a29085..a67c5eabc 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -31,6 +31,7 @@ RUN apt -qq update && apt -qq install --no-install-recommends -y \
         PyYAML \
         matplotlib \
         pyarrow \
+        click \
     && echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" > /etc/apt/sources.list.d/coral-edgetpu.list \
     && wget -q -O - https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \
     && apt -qq update \
@@ -46,7 +47,7 @@ RUN apt -qq update && apt -qq install --no-install-recommends -y \
 
 # get model and labels
 RUN wget -q https://github.com/google-coral/edgetpu/raw/master/test_data/ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite -O /edgetpu_model.tflite --trust-server-names
-RUN wget -q https://dl.google.com/coral/canned_models/coco_labels.txt -O /labelmap.txt --trust-server-names
+COPY labelmap.txt /labelmap.txt
 RUN wget -q https://github.com/google-coral/edgetpu/raw/master/test_data/ssd_mobilenet_v2_coco_quant_postprocess.tflite -O /cpu_model.tflite 
 
 
diff --git a/README.md b/README.md
index d3f37a539..4252b2508 100644
--- a/README.md
+++ b/README.md
@@ -342,5 +342,17 @@ cameras:
     width: 1920
 ```
 - Additional logging is available in the docker container - You can view the logs by running `docker logs -t frigate`
-- Object configuration - Tracked objects types, sizes and thresholds can be defined globally and/or on a per camera basis. The global and camera object configuration is *merged*. For example, if you defined tracking person, car, and truck globally but modified your backyard camera to only track person, the global config would merge making the effective list for the backyard camera still contain person, car and truck. If you want precise object tracking per camera, best practice to put a minimal list of objects at the global level and expand objects on a per camera basis. Object threshold and area configuration will be used first from the camera object config (if defined) and then from the global config.  See the [example config](config/config.example.yml) for more information. 
+- Object configuration - Tracked objects types, sizes and thresholds can be defined globally and/or on a per camera basis. The global and camera object configuration is *merged*. For example, if you defined tracking person, car, and truck globally but modified your backyard camera to only track person, the global config would merge making the effective list for the backyard camera still contain person, car and truck. If you want precise object tracking per camera, best practice to put a minimal list of objects at the global level and expand objects on a per camera basis. Object threshold and area configuration will be used first from the camera object config (if defined) and then from the global config.  See the [example config](config/config.example.yml) for more information.
+
+## Troubleshooting
+
+### "ffmpeg didnt return a frame. something is wrong"
+Turn on logging for the camera by overriding the global_args and setting the log level to `info`:
+```yaml
+ffmpeg:
+  global_args:
+    - -hide_banner
+    - -loglevel
+    - panic
+```
 
diff --git a/config/config.example.yml b/config/config.example.yml
index 7f08d594f..180418e19 100644
--- a/config/config.example.yml
+++ b/config/config.example.yml
@@ -53,9 +53,10 @@ mqtt:
 # unless overridden at the camera levels.
 # Keys must be valid labels. By default, the model uses coco (https://dl.google.com/coral/canned_models/coco_labels.txt).
 # All labels from the model are reported over MQTT. These values are used to filter out false positives.
-# min_area (optional): minimum width*height of the bounding box for the detected person
-# max_area (optional): maximum width*height of the bounding box for the detected person
-# threshold (optional): The minimum decimal percentage (50% hit = 0.5) for the confidence from tensorflow
+# min_area (optional): minimum width*height of the bounding box for the detected object
+# max_area (optional): maximum width*height of the bounding box for the detected object
+# min_score (optional): minimum score for the object
+# threshold (optional): The minimum decimal percentage for tracked object's computed score to considered a true positive
 ####################
 objects:
   track:
@@ -66,7 +67,8 @@ objects:
     person:
       min_area: 5000
       max_area: 100000
-      threshold: 0.8
+      min_score: 0.5
+      threshold: 0.85
 
 zones:
   #################
@@ -184,4 +186,5 @@ cameras:
         person:
           min_area: 5000
           max_area: 100000
-          threshold: 0.8
+          min_score: 0.5
+          threshold: 0.85
diff --git a/detect_objects.py b/detect_objects.py
index 4a652a7e8..ab0bcc238 100644
--- a/detect_objects.py
+++ b/detect_objects.py
@@ -120,7 +120,7 @@ class CameraWatchdog(threading.Thread):
                     process = mp.Process(target=track_camera, args=(name, self.config[name], GLOBAL_OBJECT_CONFIG, camera_process['frame_queue'],
                         camera_process['frame_shape'], self.tflite_process.detection_queue, self.tracked_objects_queue, 
                         camera_process['process_fps'], camera_process['detection_fps'],
-                        camera_process['read_start'], camera_process['detection_frame']))
+                        camera_process['read_start'], camera_process['detection_frame'], self.stop_event))
                     process.daemon = True
                     camera_process['process'] = process
                     process.start()
@@ -135,7 +135,7 @@ class CameraWatchdog(threading.Thread):
                     camera_capture.start()
                     camera_process['ffmpeg_process'] = ffmpeg_process
                     camera_process['capture_thread'] = camera_capture
-                elif now - camera_process['capture_thread'].current_frame > 5:
+                elif now - camera_process['capture_thread'].current_frame.value > 5:
                     print(f"No frames received from {name} in 5 seconds. Exiting ffmpeg...")
                     ffmpeg_process = camera_process['ffmpeg_process']
                     ffmpeg_process.terminate()
@@ -181,6 +181,7 @@ def main():
             'show_timestamp': config.get('snapshots', {}).get('show_timestamp', True),
             'draw_zones': config.get('snapshots', {}).get('draw_zones', False)
         }
+        config['zones'] = {}
 
     # Queue for cameras to push tracked objects to
     tracked_objects_queue = mp.Queue()
@@ -259,7 +260,23 @@ def main():
             'capture_thread': camera_capture
         }
 
-        camera_process = mp.Process(target=track_camera, args=(name, config, GLOBAL_OBJECT_CONFIG, frame_queue, frame_shape,
+        # merge global object config into camera object config
+        camera_objects_config = config.get('objects', {})
+        # get objects to track for camera
+        objects_to_track = camera_objects_config.get('track', GLOBAL_OBJECT_CONFIG.get('track', ['person']))
+        # merge object filters
+        global_object_filters = GLOBAL_OBJECT_CONFIG.get('filters', {})
+        camera_object_filters = camera_objects_config.get('filters', {})
+        objects_with_config = set().union(global_object_filters.keys(), camera_object_filters.keys())
+        object_filters = {}
+        for obj in objects_with_config:
+            object_filters[obj] = {**global_object_filters.get(obj, {}), **camera_object_filters.get(obj, {})}
+        config['objects'] = {
+            'track': objects_to_track,
+            'filters': object_filters
+        }
+
+        camera_process = mp.Process(target=track_camera, args=(name, config, frame_queue, frame_shape,
             tflite_process.detection_queue, tracked_objects_queue, camera_processes[name]['process_fps'], 
             camera_processes[name]['detection_fps'], 
             camera_processes[name]['read_start'], camera_processes[name]['detection_frame'], stop_event))
@@ -340,7 +357,7 @@ def main():
                 'pid': camera_stats['process'].pid,
                 'ffmpeg_pid': camera_stats['ffmpeg_process'].pid,
                 'frame_info': {
-                    'read': capture_thread.current_frame,
+                    'read': capture_thread.current_frame.value,
                     'detect': camera_stats['detection_frame'].value,
                     'process': object_processor.camera_data[name]['current_frame_time']
                 }
diff --git a/frigate/object_processing.py b/frigate/object_processing.py
index 99bce25ee..cb423829a 100644
--- a/frigate/object_processing.py
+++ b/frigate/object_processing.py
@@ -13,6 +13,8 @@ import pyarrow.plasma as plasma
 import matplotlib.pyplot as plt
 from frigate.util import draw_box_with_label, PlasmaFrameManager
 from frigate.edgetpu import load_labels
+from typing import Callable, Dict
+from statistics import mean, median
 
 PATH_TO_LABELS = '/labelmap.txt'
 
@@ -23,11 +25,6 @@ COLOR_MAP = {}
 for key, val in LABELS.items():
     COLOR_MAP[val] = tuple(int(round(255 * c)) for c in cmap(key)[:3])
 
-def filter_false_positives(event):
-    if len(event['history']) < 2:
-        return True
-    return False
-
 def zone_filtered(obj, object_config):
     object_name = obj['label']
     object_filters = object_config.get('filters', {})
@@ -46,11 +43,186 @@ def zone_filtered(obj, object_config):
             return True
 
         # if the score is lower than the threshold, skip
-        if obj_settings.get('threshold', 0) > obj['score']:
+        if obj_settings.get('threshold', 0) > obj['computed_score']:
             return True
         
     return False
 
+# Maintains the state of a camera
+class CameraState():
+    def __init__(self, name, config, frame_manager):
+        self.name = name
+        self.config = config
+        self.frame_manager = frame_manager
+
+        self.best_objects = {}
+        self.object_status = defaultdict(lambda: 'OFF')
+        self.tracked_objects = {}
+        self.zone_objects = defaultdict(lambda: [])
+        self.current_frame = np.zeros((720,1280,3), np.uint8)
+        self.current_frame_time = 0.0
+        self.previous_frame_id = None
+        self.callbacks = defaultdict(lambda: [])
+
+    def false_positive(self, obj):
+        threshold = self.config['objects'].get('filters', {}).get(obj['label'], {}).get('threshold', 0.85)
+        if obj['computed_score'] < threshold:
+            return True
+        return False
+
+    def compute_score(self, obj):
+        scores = obj['score_history'][:]
+        # pad with zeros if you dont have at least 3 scores
+        if len(scores) < 3:
+            scores += [0.0]*(3 - len(scores))
+        return median(scores)
+
+    def on(self, event_type: str, callback: Callable[[Dict], None]):
+        self.callbacks[event_type].append(callback)
+
+    def update(self, frame_time, tracked_objects):
+        self.current_frame_time = frame_time
+        # get the new frame and delete the old frame
+        frame_id = f"{self.name}{frame_time}"
+        self.current_frame = self.frame_manager.get(frame_id)
+        if not self.previous_frame_id is None:
+            self.frame_manager.delete(self.previous_frame_id)
+        self.previous_frame_id = frame_id
+
+        current_ids = tracked_objects.keys()
+        previous_ids = self.tracked_objects.keys()
+        removed_ids = list(set(previous_ids).difference(current_ids))
+        new_ids = list(set(current_ids).difference(previous_ids))
+        updated_ids = list(set(current_ids).intersection(previous_ids))
+
+        for id in new_ids:
+            self.tracked_objects[id] = tracked_objects[id]
+            self.tracked_objects[id]['zones'] = []
+
+            # start the score history
+            self.tracked_objects[id]['score_history'] = [self.tracked_objects[id]['score']]
+
+            # calculate if this is a false positive
+            self.tracked_objects[id]['computed_score'] = self.compute_score(self.tracked_objects[id])
+            self.tracked_objects[id]['false_positive'] = self.false_positive(self.tracked_objects[id])
+
+            # call event handlers
+            for c in self.callbacks['start']:
+                c(self.name, tracked_objects[id])
+        
+        for id in updated_ids:
+            self.tracked_objects[id].update(tracked_objects[id])
+
+            # if the object is not in the current frame, add a 0.0 to the score history
+            if self.tracked_objects[id]['frame_time'] != self.current_frame_time:
+                self.tracked_objects[id]['score_history'].append(0.0)
+            else:
+                self.tracked_objects[id]['score_history'].append(self.tracked_objects[id]['score'])
+            # only keep the last 10 scores
+            if len(self.tracked_objects[id]['score_history']) > 10:
+                self.tracked_objects[id]['score_history'] = self.tracked_objects[id]['score_history'][-10:]
+
+            # calculate if this is a false positive
+            self.tracked_objects[id]['computed_score'] = self.compute_score(self.tracked_objects[id])
+            self.tracked_objects[id]['false_positive'] = self.false_positive(self.tracked_objects[id])
+
+            # call event handlers
+            for c in self.callbacks['update']:
+                c(self.name, self.tracked_objects[id])
+        
+        for id in removed_ids:
+            # publish events to mqtt
+            self.tracked_objects[id]['end_time'] = frame_time
+            for c in self.callbacks['end']:
+                c(self.name, self.tracked_objects[id])
+            del self.tracked_objects[id]
+
+        # check to see if the objects are in any zones
+        for obj in self.tracked_objects.values():
+            current_zones = []
+            bottom_center = (obj['centroid'][0], obj['box'][3])
+            # check each zone
+            for name, zone in self.config['zones'].items():
+                contour = zone['contour']
+                # check if the object is in the zone and not filtered
+                if (cv2.pointPolygonTest(contour, bottom_center, False) >= 0 
+                    and not zone_filtered(obj, zone.get('filters', {}))):
+                    current_zones.append(name)
+            obj['zones'] = current_zones
+        
+        # draw on the frame
+        if not self.current_frame is None:
+            # draw the bounding boxes on the frame
+            for obj in self.tracked_objects.values():
+                thickness = 2
+                color = COLOR_MAP[obj['label']]
+                
+                if obj['frame_time'] != frame_time:
+                    thickness = 1
+                    color = (255,0,0)
+
+                # draw the bounding boxes on the frame
+                box = obj['box']
+                draw_box_with_label(self.current_frame, box[0], box[1], box[2], box[3], obj['label'], f"{int(obj['score']*100)}% {int(obj['area'])}", thickness=thickness, color=color)
+                # draw the regions on the frame
+                region = obj['region']
+                cv2.rectangle(self.current_frame, (region[0], region[1]), (region[2], region[3]), (0,255,0), 1)
+            
+            if self.config['snapshots']['show_timestamp']:
+                time_to_show = datetime.datetime.fromtimestamp(frame_time).strftime("%m/%d/%Y %H:%M:%S")
+                cv2.putText(self.current_frame, time_to_show, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, fontScale=.8, color=(255, 255, 255), thickness=2)
+
+            if self.config['snapshots']['draw_zones']:
+                for name, zone in self.config['zones'].items():
+                    thickness = 8 if any([name in obj['zones'] for obj in self.tracked_objects.values()]) else 2
+                    cv2.drawContours(self.current_frame, [zone['contour']], -1, zone['color'], thickness)
+
+        # maintain best objects
+        for obj in self.tracked_objects.values():
+            object_type = obj['label']
+            # if the object wasn't seen on the current frame, skip it
+            if obj['frame_time'] != self.current_frame_time or obj['false_positive']:
+                continue
+            if object_type in self.best_objects:
+                current_best = self.best_objects[object_type]
+                now = datetime.datetime.now().timestamp()
+                # if the object is a higher score than the current best score 
+                # or the current object is more than 1 minute old, use the new object
+                if obj['score'] > current_best['score'] or (now - current_best['frame_time']) > 60:
+                    obj['frame'] = np.copy(self.current_frame)
+                    self.best_objects[object_type] = obj
+                    for c in self.callbacks['snapshot']:
+                        c(self.name, self.best_objects[object_type])
+            else:
+                obj['frame'] = np.copy(self.current_frame)
+                self.best_objects[object_type] = obj
+                for c in self.callbacks['snapshot']:
+                    c(self.name, self.best_objects[object_type])
+        
+        # update overall camera state for each object type
+        obj_counter = Counter()
+        for obj in self.tracked_objects.values():
+            if not obj['false_positive']:
+                obj_counter[obj['label']] += 1
+                
+        # report on detected objects
+        for obj_name, count in obj_counter.items():
+            new_status = 'ON' if count > 0 else 'OFF'
+            if new_status != self.object_status[obj_name]:
+                self.object_status[obj_name] = new_status
+                for c in self.callbacks['object_status']:
+                    c(self.name, obj_name, new_status)
+
+        # expire any objects that are ON and no longer detected
+        expired_objects = [obj_name for obj_name, status in self.object_status.items() if status == 'ON' and not obj_name in obj_counter]
+        for obj_name in expired_objects:
+            self.object_status[obj_name] = 'OFF'
+            for c in self.callbacks['object_status']:
+                c(self.name, obj_name, 'OFF')
+            for c in self.callbacks['snapshot']:
+                c(self.name, self.best_objects[object_type])
+
+
 class TrackedObjectProcessor(threading.Thread):
     def __init__(self, camera_config, zone_config, client, topic_prefix, tracked_objects_queue, event_queue, stop_event):
         threading.Thread.__init__(self)
@@ -61,6 +233,40 @@ class TrackedObjectProcessor(threading.Thread):
         self.tracked_objects_queue = tracked_objects_queue
         self.event_queue = event_queue
         self.stop_event = stop_event
+        self.camera_states: Dict[str, CameraState] = {}
+        self.plasma_client = PlasmaFrameManager(self.stop_event)
+
+        def start(camera, obj):
+            # publish events to mqtt
+            self.client.publish(f"{self.topic_prefix}/{camera}/events/start", json.dumps({x: obj[x] for x in obj if x not in ['frame']}), retain=False)
+            self.event_queue.put(('start', camera, obj))
+
+        def update(camera, obj):
+            pass
+
+        def end(camera, obj):
+            self.client.publish(f"{self.topic_prefix}/{camera}/events/end", json.dumps({x: obj[x] for x in obj if x not in ['frame']}), retain=False)
+            self.event_queue.put(('end', camera, obj))
+        
+        def snapshot(camera, obj):
+            best_frame = cv2.cvtColor(obj['frame'], cv2.COLOR_RGB2BGR)
+            ret, jpg = cv2.imencode('.jpg', best_frame)
+            if ret:
+                jpg_bytes = jpg.tobytes()
+                self.client.publish(f"{self.topic_prefix}/{camera}/{obj['label']}/snapshot", jpg_bytes, retain=True)
+        
+        def object_status(camera, object_name, status):
+            self.client.publish(f"{self.topic_prefix}/{camera}/{object_name}", status, retain=False)
+
+        for camera in self.camera_config.keys():
+            camera_state = CameraState(camera, self.camera_config[camera], self.plasma_client)
+            camera_state.on('start', start)
+            camera_state.on('update', update)
+            camera_state.on('end', end)
+            camera_state.on('snapshot', snapshot)
+            camera_state.on('object_status', object_status)
+            self.camera_states[camera] = camera_state
+
         self.camera_data = defaultdict(lambda: {
             'best_objects': {},
             'object_status': defaultdict(lambda: defaultdict(lambda: 'OFF')),
@@ -69,38 +275,43 @@ class TrackedObjectProcessor(threading.Thread):
             'current_frame_time': 0.0,
             'object_id': None
         })
-        self.zone_data = defaultdict(lambda: {
-            'object_status': defaultdict(lambda: defaultdict(lambda: 'OFF')),
-            'contours': {}
-        })
+        # {
+        #   'zone_name': {
+        #       'person': ['camera_1', 'camera_2']
+        #   }
+        # }
+        self.zone_data = defaultdict(lambda: defaultdict(lambda: set()))
+
+        # set colors for zones
+        zone_colors = {}
+        colors = plt.cm.get_cmap('tab10', len(self.zone_config.keys()))
+        for i, zone in enumerate(self.zone_config.keys()):
+            zone_colors[zone] = tuple(int(round(255 * c)) for c in colors(i)[:3])
 
         # create zone contours
-        for name, config in zone_config.items():
+        for zone_name, config in zone_config.items():
             for camera, camera_zone_config in config.items():
+                camera_zone = {}
+                camera_zone['color'] = zone_colors[zone_name]
                 coordinates = camera_zone_config['coordinates']
                 if isinstance(coordinates, list):
-                    self.zone_data[name]['contours'][camera] =  np.array([[int(p.split(',')[0]), int(p.split(',')[1])] for p in coordinates])
+                    camera_zone['contour'] =  np.array([[int(p.split(',')[0]), int(p.split(',')[1])] for p in coordinates])
                 elif isinstance(coordinates, str):
                     points = coordinates.split(',')
-                    self.zone_data[name]['contours'][camera] =  np.array([[int(points[i]), int(points[i+1])] for i in range(0, len(points), 2)])
+                    camera_zone['contour'] =  np.array([[int(points[i]), int(points[i+1])] for i in range(0, len(points), 2)])
                 else:
-                    print(f"Unable to parse zone coordinates for {name} - {camera}")
-        
-        # set colors for zones
-        colors = plt.cm.get_cmap('tab10', len(self.zone_data.keys()))
-        for i, zone in enumerate(self.zone_data.values()):
-            zone['color'] = tuple(int(round(255 * c)) for c in colors(i)[:3])
-
-        self.plasma_client = PlasmaFrameManager(self.stop_event)
+                    print(f"Unable to parse zone coordinates for {zone_name} - {camera}")
+                self.camera_config[camera]['zones'][zone_name] = camera_zone
         
     def get_best(self, camera, label):
-        if label in self.camera_data[camera]['best_objects']:
-            return self.camera_data[camera]['best_objects'][label]['frame']
+        best_objects = self.camera_states[camera].best_objects
+        if label in best_objects:
+            return best_objects[label]['frame']
         else:
             return None
     
     def get_current_frame(self, camera):
-        return self.camera_data[camera]['current_frame']
+        return self.camera_states[camera].current_frame
 
     def run(self):
         while True:
@@ -113,165 +324,27 @@ class TrackedObjectProcessor(threading.Thread):
             except queue.Empty:
                 continue
 
-            camera_config = self.camera_config[camera]
-            best_objects = self.camera_data[camera]['best_objects']
-            current_object_status = self.camera_data[camera]['object_status']
-            tracked_objects = self.camera_data[camera]['tracked_objects']
+            camera_state = self.camera_states[camera]
 
-            current_ids = current_tracked_objects.keys()
-            previous_ids = tracked_objects.keys()
-            removed_ids = list(set(previous_ids).difference(current_ids))
-            new_ids = list(set(current_ids).difference(previous_ids))
-            updated_ids = list(set(current_ids).intersection(previous_ids))
+            camera_state.update(frame_time, current_tracked_objects)
 
-            for id in new_ids:
-                # only register the object here if we are sure it isnt a false positive
-                if not filter_false_positives(current_tracked_objects[id]):
-                    tracked_objects[id] = current_tracked_objects[id]
-                    # publish events to mqtt
-                    self.client.publish(f"{self.topic_prefix}/{camera}/events/start", json.dumps(tracked_objects[id]), retain=False)
-                    self.event_queue.put(('start', camera, tracked_objects[id]))
-            
-            for id in updated_ids:
-                tracked_objects[id] = current_tracked_objects[id]
-            
-            for id in removed_ids:
-                # publish events to mqtt
-                tracked_objects[id]['end_time'] = frame_time
-                self.client.publish(f"{self.topic_prefix}/{camera}/events/end", json.dumps(tracked_objects[id]), retain=False)
-                self.event_queue.put(('end', camera, tracked_objects[id]))
-                del tracked_objects[id]
-
-            self.camera_data[camera]['current_frame_time'] = frame_time
-
-            # build a dict of objects in each zone for current camera
-            current_objects_in_zones = defaultdict(lambda: [])
-            for obj in tracked_objects.values():
-                bottom_center = (obj['centroid'][0], obj['box'][3])
-                # check each zone
-                for name, zone in self.zone_data.items():
-                    current_contour = zone['contours'].get(camera, None)
-                    # if the current camera does not have a contour for this zone, skip
-                    if current_contour is None:
-                        continue
-                    # check if the object is in the zone and not filtered
-                    if (cv2.pointPolygonTest(current_contour, bottom_center, False) >= 0 
-                        and not zone_filtered(obj, self.zone_config[name][camera])):
-                        current_objects_in_zones[name].append(obj['label'])
-
-            ###
-            # Draw tracked objects on the frame
-            ###
-            current_frame = self.plasma_client.get(f"{camera}{frame_time}")
-
-            if not current_frame is plasma.ObjectNotAvailable:
-                # draw the bounding boxes on the frame
-                for obj in tracked_objects.values():
-                    thickness = 2
-                    color = COLOR_MAP[obj['label']]
-                    
-                    if obj['frame_time'] != frame_time:
-                        thickness = 1
-                        color = (255,0,0)
-
-                    # draw the bounding boxes on the frame
-                    box = obj['box']
-                    draw_box_with_label(current_frame, box[0], box[1], box[2], box[3], obj['label'], f"{int(obj['score']*100)}% {int(obj['area'])}", thickness=thickness, color=color)
-                    # draw the regions on the frame
-                    region = obj['region']
-                    cv2.rectangle(current_frame, (region[0], region[1]), (region[2], region[3]), (0,255,0), 1)
-                
-                if camera_config['snapshots']['show_timestamp']:
-                    time_to_show = datetime.datetime.fromtimestamp(frame_time).strftime("%m/%d/%Y %H:%M:%S")
-                    cv2.putText(current_frame, time_to_show, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, fontScale=.8, color=(255, 255, 255), thickness=2)
-
-                if camera_config['snapshots']['draw_zones']:
-                    for name, zone in self.zone_data.items():
-                        thickness = 2 if len(current_objects_in_zones[name]) == 0 else 8
-                        if camera in zone['contours']:
-                            cv2.drawContours(current_frame, [zone['contours'][camera]], -1, zone['color'], thickness)
-
-                ###
-                # Set the current frame
-                ###
-                self.camera_data[camera]['current_frame'] = current_frame
-
-                # delete the previous frame from the plasma store and update the object id
-                if not self.camera_data[camera]['object_id'] is None:
-                    self.plasma_client.delete(self.camera_data[camera]['object_id'])
-                self.camera_data[camera]['object_id'] = f"{camera}{frame_time}"
-            
-            ###
-            # Maintain the highest scoring recent object and frame for each label
-            ###
-            for obj in tracked_objects.values():
-                # if the object wasn't seen on the current frame, skip it
-                if obj['frame_time'] != frame_time:
-                    continue
-                if obj['label'] in best_objects:
-                    now = datetime.datetime.now().timestamp()
-                    # if the object is a higher score than the current best score 
-                    # or the current object is more than 1 minute old, use the new object
-                    if obj['score'] > best_objects[obj['label']]['score'] or (now - best_objects[obj['label']]['frame_time']) > 60:
-                        obj['frame'] = np.copy(self.camera_data[camera]['current_frame'])
-                        best_objects[obj['label']] = obj
-                        # send updated snapshot over mqtt
-                        best_frame = cv2.cvtColor(obj['frame'], cv2.COLOR_RGB2BGR)
-                        ret, jpg = cv2.imencode('.jpg', best_frame)
-                        if ret:
-                            jpg_bytes = jpg.tobytes()
-                            self.client.publish(f"{self.topic_prefix}/{camera}/{obj['label']}/snapshot", jpg_bytes, retain=True)
-                else:
-                    obj['frame'] = np.copy(self.camera_data[camera]['current_frame'])
-                    best_objects[obj['label']] = obj
-
-            ###
-            # Report over MQTT
-            ###
-
-            # get the zones that are relevant for this camera
-            relevant_zones = [zone for zone, config in self.zone_config.items() if camera in config]
-            for zone in relevant_zones:
-                # create the set of labels in the current frame and previously reported
-                labels_for_zone = set(current_objects_in_zones[zone] + list(self.zone_data[zone]['object_status'][camera].keys()))
-                # for each label
-                for label in labels_for_zone:
-                    # compute the current 'ON' vs 'OFF' status by checking if any camera sees the object in the zone
-                    previous_state = any([c[label] == 'ON' for c in self.zone_data[zone]['object_status'].values()])
-                    self.zone_data[zone]['object_status'][camera][label] = 'ON' if label in current_objects_in_zones[zone] else 'OFF'
-                    new_state = any([c[label] == 'ON' for c in self.zone_data[zone]['object_status'].values()])
+            # update zone status for each label
+            for zone in camera_state.config['zones'].keys():
+                # get labels for current camera and all labels in current zone
+                labels_for_camera = set([obj['label'] for obj in camera_state.tracked_objects.values() if zone in obj['zones']])
+                labels_to_check = labels_for_camera | set(self.zone_data[zone].keys())
+                # for each label in zone
+                for label in labels_to_check:
+                    camera_list = self.zone_data[zone][label]
+                    # remove or add the camera to the list for the current label
+                    previous_state = len(camera_list) > 0
+                    if label in labels_for_camera:
+                        camera_list.add(camera_state.name)
+                    elif camera_state.name in camera_list:
+                        camera_list.remove(camera_state.name)
+                    new_state = len(camera_list) > 0
                     # if the value is changing, send over MQTT
                     if previous_state == False and new_state == True:
                         self.client.publish(f"{self.topic_prefix}/{zone}/{label}", 'ON', retain=False)
                     elif previous_state == True and new_state == False:
                         self.client.publish(f"{self.topic_prefix}/{zone}/{label}", 'OFF', retain=False)
-
-            # count  by type
-            obj_counter = Counter()
-            for obj in tracked_objects.values():
-                obj_counter[obj['label']] += 1
-                    
-            # report on detected objects
-            for obj_name, count in obj_counter.items():
-                new_status = 'ON' if count > 0 else 'OFF'
-                if new_status != current_object_status[obj_name]:
-                    current_object_status[obj_name] = new_status
-                    self.client.publish(f"{self.topic_prefix}/{camera}/{obj_name}", new_status, retain=False)
-                    # send the best snapshot over mqtt
-                    best_frame = cv2.cvtColor(best_objects[obj_name]['frame'], cv2.COLOR_RGB2BGR)
-                    ret, jpg = cv2.imencode('.jpg', best_frame)
-                    if ret:
-                        jpg_bytes = jpg.tobytes()
-                        self.client.publish(f"{self.topic_prefix}/{camera}/{obj_name}/snapshot", jpg_bytes, retain=True)
-
-            # expire any objects that are ON and no longer detected
-            expired_objects = [obj_name for obj_name, status in current_object_status.items() if status == 'ON' and not obj_name in obj_counter]
-            for obj_name in expired_objects:
-                current_object_status[obj_name] = 'OFF'
-                self.client.publish(f"{self.topic_prefix}/{camera}/{obj_name}", 'OFF', retain=False)
-                # send updated snapshot over mqtt
-                best_frame = cv2.cvtColor(best_objects[obj_name]['frame'], cv2.COLOR_RGB2BGR)
-                ret, jpg = cv2.imencode('.jpg', best_frame)
-                if ret:
-                    jpg_bytes = jpg.tobytes()
-                    self.client.publish(f"{self.topic_prefix}/{camera}/{obj_name}/snapshot", jpg_bytes, retain=True)
diff --git a/frigate/objects.py b/frigate/objects.py
index b52edcd86..f083d5a5b 100644
--- a/frigate/objects.py
+++ b/frigate/objects.py
@@ -24,7 +24,6 @@ class ObjectTracker():
         obj['id'] = id
         obj['start_time'] = obj['frame_time']
         obj['top_score'] = obj['score']
-        self.add_history(obj)
         self.tracked_objects[id] = obj
         self.disappeared[id] = 0
 
@@ -35,25 +34,8 @@ class ObjectTracker():
     def update(self, id, new_obj):
         self.disappeared[id] = 0
         self.tracked_objects[id].update(new_obj)
-        self.add_history(self.tracked_objects[id])
         if self.tracked_objects[id]['score'] > self.tracked_objects[id]['top_score']:
             self.tracked_objects[id]['top_score'] = self.tracked_objects[id]['score']
-    
-    def add_history(self, obj):
-        entry = {
-            'score': obj['score'],
-            'box': obj['box'],
-            'region': obj['region'],
-            'centroid': obj['centroid'],
-            'frame_time': obj['frame_time']
-        }
-        if 'history' in obj:
-            obj['history'].append(entry)
-            # only maintain the last 20 in history
-            if len(obj['history']) > 20:
-                obj['history'] = obj['history'][-20:]
-        else:
-            obj['history'] = [entry]
 
     def match_and_update(self, frame_time, new_objects):
         # group by name
diff --git a/frigate/util.py b/frigate/util.py
index 6cefc4b47..e6491628c 100755
--- a/frigate/util.py
+++ b/frigate/util.py
@@ -44,6 +44,9 @@ def draw_box_with_label(frame, x_min, y_min, x_max, y_max, label, info, thicknes
 def calculate_region(frame_shape, xmin, ymin, xmax, ymax, multiplier=2):    
     # size is larger than longest edge
     size = int(max(xmax-xmin, ymax-ymin)*multiplier)
+    # dont go any smaller than 300
+    if size < 300:
+        size = 300
     # if the size is too big to fit in the frame
     if size > min(frame_shape[0], frame_shape[1]):
         size = min(frame_shape[0], frame_shape[1])
diff --git a/frigate/video.py b/frigate/video.py
index e68274e4f..c8632a8e8 100755
--- a/frigate/video.py
+++ b/frigate/video.py
@@ -73,8 +73,8 @@ def filtered(obj, objects_to_track, object_filters, mask=None):
         if obj_settings.get('max_area', 24000000) < obj[3]:
             return True
 
-        # if the score is lower than the threshold, skip
-        if obj_settings.get('threshold', 0) > obj[1]:
+        # if the score is lower than the min_score, skip
+        if obj_settings.get('min_score', 0) > obj[1]:
             return True
     
         # compute the coordinates of the object and make sure
@@ -83,10 +83,10 @@ def filtered(obj, objects_to_track, object_filters, mask=None):
         x_location = min(int((obj[2][2]-obj[2][0])/2.0)+obj[2][0], len(mask[0])-1)
 
         # if the object is in a masked location, don't add it to detected objects
-        if mask != None and mask[y_location][x_location] == [0]:
+        if (not mask is None) and (mask[y_location][x_location][0] == 0):
             return True
         
-        return False
+    return False
 
 def create_tensor_input(frame, region):
     cropped_frame = frame[region[1]:region[3], region[0]:region[2]]
@@ -118,7 +118,7 @@ def start_or_restart_ffmpeg(ffmpeg_cmd, frame_size, ffmpeg_process=None):
 
 def capture_frames(ffmpeg_process, camera_name, frame_shape, frame_manager: FrameManager, 
     frame_queue, take_frame: int, fps:EventsPerSecond, skipped_fps: EventsPerSecond, 
-    stop_event: mp.Event, detection_frame: mp.Value):
+    stop_event: mp.Event, detection_frame: mp.Value, current_frame: mp.Value):
 
     frame_num = 0
     last_frame = 0
@@ -130,7 +130,7 @@ def capture_frames(ffmpeg_process, camera_name, frame_shape, frame_manager: Fram
             break
 
         frame_bytes = ffmpeg_process.stdout.read(frame_size)
-        current_frame = datetime.datetime.now().timestamp()
+        current_frame.value = datetime.datetime.now().timestamp()
 
         if len(frame_bytes) == 0:
             print(f"{camera_name}: ffmpeg didnt return a frame. something is wrong.")
@@ -154,14 +154,14 @@ def capture_frames(ffmpeg_process, camera_name, frame_shape, frame_manager: Fram
             continue
 
         # put the frame in the frame manager
-        frame_manager.put(f"{camera_name}{current_frame}",
+        frame_manager.put(f"{camera_name}{current_frame.value}",
                 np
                     .frombuffer(frame_bytes, np.uint8)
                     .reshape(frame_shape)
             )
         # add to the queue
-        frame_queue.put(current_frame)
-        last_frame = current_frame
+        frame_queue.put(current_frame.value)
+        last_frame = current_frame.value
 
 class CameraCapture(threading.Thread):
     def __init__(self, name, ffmpeg_process, frame_shape, frame_queue, take_frame, fps, detection_frame, stop_event):
@@ -175,7 +175,7 @@ class CameraCapture(threading.Thread):
         self.skipped_fps = EventsPerSecond()
         self.plasma_client = PlasmaFrameManager(stop_event)
         self.ffmpeg_process = ffmpeg_process
-        self.current_frame = 0
+        self.current_frame = mp.Value('d', 0.0)
         self.last_frame = 0
         self.detection_frame = detection_frame
         self.stop_event = stop_event
@@ -183,25 +183,18 @@ class CameraCapture(threading.Thread):
     def run(self):
         self.skipped_fps.start()
         capture_frames(self.ffmpeg_process, self.name, self.frame_shape, self.plasma_client, self.frame_queue, self.take_frame,
-            self.fps, self.skipped_fps, self.stop_event, self.detection_frame)
+            self.fps, self.skipped_fps, self.stop_event, self.detection_frame, self.current_frame)
 
-def track_camera(name, config, global_objects_config, frame_queue, frame_shape, detection_queue, detected_objects_queue, fps, detection_fps, read_start, detection_frame, stop_event):
+def track_camera(name, config, frame_queue, frame_shape, detection_queue, detected_objects_queue, fps, detection_fps, read_start, detection_frame, stop_event):
     print(f"Starting process for {name}: {os.getpid()}")
     listen()
 
     detection_frame.value = 0.0
 
     # Merge the tracked object config with the global config
-    camera_objects_config = config.get('objects', {})    
-    # combine tracked objects lists
-    objects_to_track = set().union(global_objects_config.get('track', ['person', 'car', 'truck']), camera_objects_config.get('track', []))
-    # merge object filters
-    global_object_filters = global_objects_config.get('filters', {})
-    camera_object_filters = camera_objects_config.get('filters', {})
-    objects_with_config = set().union(global_object_filters.keys(), camera_object_filters.keys())
-    object_filters = {}
-    for obj in objects_with_config:
-        object_filters[obj] = {**global_object_filters.get(obj, {}), **camera_object_filters.get(obj, {})}
+    camera_objects_config = config.get('objects', {})
+    objects_to_track = camera_objects_config.get('track', [])
+    object_filters = camera_objects_config.get('filters', {})
 
     # load in the mask for object detection
     if 'mask' in config:
diff --git a/labelmap.txt b/labelmap.txt
new file mode 100644
index 000000000..a5a6fcb09
--- /dev/null
+++ b/labelmap.txt
@@ -0,0 +1,80 @@
+0  person
+1  bicycle
+2  car
+3  motorcycle
+4  airplane
+5  bus
+6  train
+7  car
+8  boat
+9  traffic light
+10  fire hydrant
+12  stop sign
+13  parking meter
+14  bench
+15  bird
+16  cat
+17  dog
+18  horse
+19  sheep
+20  cow
+21  elephant
+22  bear
+23  zebra
+24  giraffe
+26  backpack
+27  umbrella
+30  handbag
+31  tie
+32  suitcase
+33  frisbee
+34  skis
+35  snowboard
+36  sports ball
+37  kite
+38  baseball bat
+39  baseball glove
+40  skateboard
+41  surfboard
+42  tennis racket
+43  bottle
+45  wine glass
+46  cup
+47  fork
+48  knife
+49  spoon
+50  bowl
+51  banana
+52  apple
+53  sandwich
+54  orange
+55  broccoli
+56  carrot
+57  hot dog
+58  pizza
+59  donut
+60  cake
+61  chair
+62  couch
+63  potted plant
+64  bed
+66  dining table
+69  toilet
+71  tv
+72  laptop
+73  mouse
+74  remote
+75  keyboard
+76  cell phone
+77  microwave
+78  oven
+79  toaster
+80  sink
+81  refrigerator
+83  book
+84  clock
+85  vase
+86  scissors
+87  teddy bear
+88  hair drier
+89  toothbrush
\ No newline at end of file
diff --git a/process_clip.py b/process_clip.py
new file mode 100644
index 000000000..b7befef76
--- /dev/null
+++ b/process_clip.py
@@ -0,0 +1,145 @@
+import sys
+import click
+import os
+import datetime
+from unittest import TestCase, main
+from frigate.video import process_frames, start_or_restart_ffmpeg, capture_frames, get_frame_shape
+from frigate.util import DictFrameManager, EventsPerSecond, draw_box_with_label
+from frigate.motion import MotionDetector
+from frigate.edgetpu import LocalObjectDetector
+from frigate.objects import ObjectTracker
+import multiprocessing as mp
+import numpy as np
+import cv2
+from frigate.object_processing import COLOR_MAP, CameraState
+
+class ProcessClip():
+    def __init__(self, clip_path, frame_shape, config):
+        self.clip_path = clip_path
+        self.frame_shape = frame_shape
+        self.camera_name = 'camera'
+        self.frame_manager = DictFrameManager()
+        self.frame_queue = mp.Queue()
+        self.detected_objects_queue = mp.Queue()
+        self.camera_state = CameraState(self.camera_name, config, self.frame_manager)
+
+    def load_frames(self):
+        fps = EventsPerSecond()
+        skipped_fps = EventsPerSecond()
+        stop_event = mp.Event()
+        detection_frame = mp.Value('d', datetime.datetime.now().timestamp()+100000)
+        current_frame = mp.Value('d', 0.0)
+        ffmpeg_cmd = f"ffmpeg -hide_banner -loglevel panic -i {self.clip_path} -f rawvideo -pix_fmt rgb24 pipe:".split(" ")
+        ffmpeg_process = start_or_restart_ffmpeg(ffmpeg_cmd, self.frame_shape[0]*self.frame_shape[1]*self.frame_shape[2])
+        capture_frames(ffmpeg_process, self.camera_name, self.frame_shape, self.frame_manager, self.frame_queue, 1, fps, skipped_fps, stop_event, detection_frame, current_frame)
+        ffmpeg_process.wait()
+        ffmpeg_process.communicate()
+    
+    def process_frames(self, objects_to_track=['person'], object_filters={}):
+        mask = np.zeros((self.frame_shape[0], self.frame_shape[1], 1), np.uint8)
+        mask[:] = 255
+        motion_detector = MotionDetector(self.frame_shape, mask)
+
+        object_detector = LocalObjectDetector(labels='/labelmap.txt')
+        object_tracker = ObjectTracker(10)
+        process_fps = EventsPerSecond()
+        current_frame = mp.Value('d', 0.0)
+        stop_event = mp.Event()
+
+        process_frames(self.camera_name, self.frame_queue, self.frame_shape, self.frame_manager, motion_detector, object_detector, object_tracker, self.detected_objects_queue, 
+            process_fps, current_frame, objects_to_track, object_filters, mask, stop_event, exit_on_empty=True)
+    
+    def objects_found(self, debug_path=None):
+        obj_detected = False
+        top_computed_score = 0.0
+        def handle_event(name, obj):
+            nonlocal obj_detected
+            nonlocal top_computed_score
+            if obj['computed_score'] > top_computed_score:
+                top_computed_score = obj['computed_score']
+            if not obj['false_positive']:
+                obj_detected = True
+        self.camera_state.on('new', handle_event)
+        self.camera_state.on('update', handle_event)
+
+        while(not self.detected_objects_queue.empty()):
+            camera_name, frame_time, current_tracked_objects = self.detected_objects_queue.get()
+            if not debug_path is None:
+                self.save_debug_frame(debug_path, frame_time, current_tracked_objects.values())
+
+            self.camera_state.update(frame_time, current_tracked_objects)
+        
+        return {
+            'object_detected': obj_detected,
+            'top_score': top_computed_score
+        }
+    
+    def save_debug_frame(self, debug_path, frame_time, tracked_objects):
+        current_frame = self.frame_manager.get(f"{self.camera_name}{frame_time}")
+        # draw the bounding boxes on the frame
+        for obj in tracked_objects:
+            thickness = 2
+            color = (0,0,175)
+
+            if obj['frame_time'] != frame_time:
+                thickness = 1
+                color = (255,0,0)
+            else:
+                color = (255,255,0)
+
+            # draw the bounding boxes on the frame
+            box = obj['box']
+            draw_box_with_label(current_frame, box[0], box[1], box[2], box[3], obj['label'], f"{int(obj['score']*100)}% {int(obj['area'])}", thickness=thickness, color=color)
+            # draw the regions on the frame
+            region = obj['region']
+            draw_box_with_label(current_frame, region[0], region[1], region[2], region[3], 'region', "", thickness=1, color=(0,255,0))
+        
+        cv2.imwrite(f"{os.path.join(debug_path, os.path.basename(self.clip_path))}.{int(frame_time*1000000)}.jpg", cv2.cvtColor(current_frame, cv2.COLOR_RGB2BGR))
+
+@click.command()
+@click.option("-p", "--path", required=True, help="Path to clip or directory to test.")
+@click.option("-l", "--label", default='person', help="Label name to detect.")
+@click.option("-t", "--threshold", default=0.85, help="Threshold value for objects.")
+@click.option("--debug-path", default=None, help="Path to output frames for debugging.")
+def process(path, label, threshold, debug_path):
+    clips = []
+    if os.path.isdir(path):
+        files = os.listdir(path)
+        files.sort()
+        clips = [os.path.join(path, file) for file in files]
+    elif os.path.isfile(path):  
+        clips.append(path)
+
+    config = {
+        'snapshots': {
+            'show_timestamp': False, 
+            'draw_zones': False
+        },
+        'zones': {},
+        'objects': {
+            'track': [label],
+            'filters': {
+                'person': {
+                    'threshold': threshold
+                }
+            }
+        }
+    }
+
+    results = []
+    for c in clips:
+        frame_shape = get_frame_shape(c)
+        process_clip = ProcessClip(c, frame_shape, config)
+        process_clip.load_frames()
+        process_clip.process_frames(objects_to_track=config['objects']['track'])
+
+        results.append((c, process_clip.objects_found(debug_path)))
+
+    for result in results:
+        print(f"{result[0]}: {result[1]}")
+    
+    positive_count = sum(1 for result in results if result[1]['object_detected'])
+    print(f"Objects were detected in {positive_count}/{len(results)}({positive_count/len(results)*100:.2f}%) clip(s).")
+
+if __name__ == '__main__':
+    process()
\ No newline at end of file