track and report all detected object types

This commit is contained in:
Blake Blackshear 2019-12-14 15:18:21 -06:00
parent 5c01720567
commit bee99ca6ff
7 changed files with 160 additions and 131 deletions

View File

@ -55,20 +55,22 @@ Example docker-compose:
A `config.yml` file must exist in the `config` directory. See example [here](config/config.example.yml) and device specific info can be found [here](docs/DEVICES.md). A `config.yml` file must exist in the `config` directory. See example [here](config/config.example.yml) and device specific info can be found [here](docs/DEVICES.md).
Access the mjpeg stream at `http://localhost:5000/<camera_name>` and the best person snapshot at `http://localhost:5000/<camera_name>/best_person.jpg` Access the mjpeg stream at `http://localhost:5000/<camera_name>` and the best snapshot for any object type with at `http://localhost:5000/<camera_name>/<object_name>/best.jpg`
## Integration with HomeAssistant ## Integration with HomeAssistant
``` ```
camera: camera:
- name: Camera Last Person - name: Camera Last Person
platform: mqtt platform: mqtt
topic: frigate/<camera_name>/snapshot topic: frigate/<camera_name>/person/snapshot
- name: Camera Last Car
platform: mqtt
topic: frigate/<camera_name>/car/snapshot
binary_sensor: binary_sensor:
- name: Camera Person - name: Camera Person
platform: mqtt platform: mqtt
state_topic: "frigate/<camera_name>/objects" state_topic: "frigate/<camera_name>/person"
value_template: '{{ value_json.person }}'
device_class: motion device_class: motion
availability_topic: "frigate/available" availability_topic: "frigate/available"
@ -89,7 +91,7 @@ automation:
message: "A person was detected." message: "A person was detected."
data: data:
photo: photo:
- url: http://<ip>:5000/<camera_name>/best_person.jpg - url: http://<ip>:5000/<camera_name>/person/best.jpg
caption: A person was detected. caption: A person was detected.
``` ```

View File

@ -46,6 +46,18 @@ mqtt:
# - -pix_fmt # - -pix_fmt
# - rgb24 # - rgb24
####################
# Global object configuration. Applies to all cameras and regions
# unless overridden at the camera/region levels.
# Keys must be valid labels. By default, the model uses coco (https://dl.google.com/coral/canned_models/coco_labels.txt).
# All labels from the model are reported over MQTT. These values are used to filter out false positives.
####################
objects:
person:
min_area: 5000
max_area: 100000
threshold: 0.5
cameras: cameras:
back: back:
ffmpeg: ffmpeg:
@ -79,6 +91,12 @@ cameras:
################ ################
take_frame: 1 take_frame: 1
objects:
person:
min_area: 5000
max_area: 100000
threshold: 0.5
################ ################
# size: size of the region in pixels # size: size of the region in pixels
# x_offset/y_offset: position of the upper left corner of your region (top left of image is 0,0) # x_offset/y_offset: position of the upper left corner of your region (top left of image is 0,0)
@ -93,18 +111,18 @@ cameras:
- size: 350 - size: 350
x_offset: 0 x_offset: 0
y_offset: 300 y_offset: 300
min_person_area: 5000 objects:
max_person_area: 100000 car:
threshold: 0.5 threshold: 0.2
- size: 400 - size: 400
x_offset: 350 x_offset: 350
y_offset: 250 y_offset: 250
min_person_area: 2000 objects:
max_person_area: 100000 person:
threshold: 0.5 min_area: 2000
- size: 400 - size: 400
x_offset: 750 x_offset: 750
y_offset: 250 y_offset: 250
min_person_area: 2000 objects:
max_person_area: 100000 person:
threshold: 0.5 min_area: 2000

View File

@ -42,6 +42,8 @@ FFMPEG_DEFAULT_CONFIG = {
'-pix_fmt', 'rgb24']) '-pix_fmt', 'rgb24'])
} }
GLOBAL_OBJECT_CONFIG = CONFIG.get('objects', {})
WEB_PORT = CONFIG.get('web_port', 5000) WEB_PORT = CONFIG.get('web_port', 5000)
DEBUG = (CONFIG.get('debug', '0') == '1') DEBUG = (CONFIG.get('debug', '0') == '1')
@ -74,7 +76,7 @@ def main():
cameras = {} cameras = {}
for name, config in CONFIG['cameras'].items(): for name, config in CONFIG['cameras'].items():
cameras[name] = Camera(name, FFMPEG_DEFAULT_CONFIG, config, prepped_frame_queue, client, MQTT_TOPIC_PREFIX) cameras[name] = Camera(name, FFMPEG_DEFAULT_CONFIG, GLOBAL_OBJECT_CONFIG, config, prepped_frame_queue, client, MQTT_TOPIC_PREFIX)
prepped_queue_processor = PreppedQueueProcessor( prepped_queue_processor = PreppedQueueProcessor(
cameras, cameras,
@ -94,13 +96,13 @@ def main():
# return a healh # return a healh
return "Frigate is running. Alive and healthy!" return "Frigate is running. Alive and healthy!"
@app.route('/<camera_name>/best_person.jpg') @app.route('/<camera_name>/<label>/best.jpg')
def best_person(camera_name): def best(camera_name, label):
if camera_name in cameras: if camera_name in cameras:
best_person_frame = cameras[camera_name].get_best_person() best_frame = cameras[camera_name].get_best(label)
if best_person_frame is None: if best_frame is None:
best_person_frame = np.zeros((720,1280,3), np.uint8) best_frame = np.zeros((720,1280,3), np.uint8)
ret, jpg = cv2.imencode('.jpg', best_person_frame) ret, jpg = cv2.imencode('.jpg', best_frame)
response = make_response(jpg.tobytes()) response = make_response(jpg.tobytes())
response.headers['Content-Type'] = 'image/jpg' response.headers['Content-Type'] = 'image/jpg'
return response return response

View File

@ -1,41 +1,46 @@
import json import json
import cv2 import cv2
import threading import threading
from collections import Counter, defaultdict
class MqttObjectPublisher(threading.Thread): class MqttObjectPublisher(threading.Thread):
def __init__(self, client, topic_prefix, objects_parsed, detected_objects, best_person_frame): def __init__(self, client, topic_prefix, objects_parsed, detected_objects, best_frames):
threading.Thread.__init__(self) threading.Thread.__init__(self)
self.client = client self.client = client
self.topic_prefix = topic_prefix self.topic_prefix = topic_prefix
self.objects_parsed = objects_parsed self.objects_parsed = objects_parsed
self._detected_objects = detected_objects self._detected_objects = detected_objects
self.best_person_frame = best_person_frame self.best_frames = best_frames
def run(self): def run(self):
last_sent_payload = "" current_object_status = defaultdict(lambda: 'OFF')
while True: while True:
# initialize the payload
payload = {}
# wait until objects have been parsed # wait until objects have been parsed
with self.objects_parsed: with self.objects_parsed:
self.objects_parsed.wait() self.objects_parsed.wait()
# add all the person scores in detected objects # make a copy of detected objects
detected_objects = self._detected_objects.copy() detected_objects = self._detected_objects.copy()
person_score = sum([obj['score'] for obj in detected_objects if obj['name'] == 'person'])
# if the person score is more than 100, set person to ON
payload['person'] = 'ON' if int(person_score*100) > 100 else 'OFF'
# send message for objects if different # total up all scores by object type
new_payload = json.dumps(payload, sort_keys=True) obj_counter = Counter()
if new_payload != last_sent_payload: for obj in detected_objects:
last_sent_payload = new_payload obj_counter[obj['name']] += obj['score']
self.client.publish(self.topic_prefix+'/objects', new_payload, retain=False)
# report on detected objects
for obj_name, total_score in obj_counter.items():
new_status = 'ON' if int(total_score*100) > 100 else 'OFF'
if new_status != current_object_status[obj_name]:
current_object_status[obj_name] = new_status
self.client.publish(self.topic_prefix+'/'+obj_name, new_status, retain=False)
# send the snapshot over mqtt as well # send the snapshot over mqtt as well
if not self.best_person_frame.best_frame is None: if not self.best_frames.best_frames[obj_name] is None:
ret, jpg = cv2.imencode('.jpg', self.best_person_frame.best_frame) ret, jpg = cv2.imencode('.jpg', self.best_frames.best_frames[obj_name])
if ret: if ret:
jpg_bytes = jpg.tobytes() jpg_bytes = jpg.tobytes()
self.client.publish(self.topic_prefix+'/snapshot', jpg_bytes, retain=True) self.client.publish(self.topic_prefix+'/'+obj_name+'/snapshot', jpg_bytes, retain=True)
# expire any objects that are ON and no longer detected
expired_objects = [obj_name for obj_name, status in current_object_status.items() if status == 'ON' and not obj_name in obj_counter]
for obj_name in expired_objects:
self.client.publish(self.topic_prefix+'/'+obj_name, 'OFF', retain=False)

View File

@ -38,21 +38,18 @@ class PreppedQueueProcessor(threading.Thread):
frame = self.prepped_frame_queue.get() frame = self.prepped_frame_queue.get()
# Actual detection. # Actual detection.
objects = self.engine.DetectWithInputTensor(frame['frame'], threshold=frame['region_threshold'], top_k=3) objects = self.engine.DetectWithInputTensor(frame['frame'], threshold=0.5, top_k=5)
# print(self.engine.get_inference_time()) # print(self.engine.get_inference_time())
# parse and pass detected objects back to the camera # parse and pass detected objects back to the camera
parsed_objects = [] parsed_objects = []
for obj in objects: for obj in objects:
box = obj.bounding_box.flatten().tolist()
parsed_objects.append({ parsed_objects.append({
'region_id': frame['region_id'],
'frame_time': frame['frame_time'], 'frame_time': frame['frame_time'],
'name': str(self.labels[obj.label_id]), 'name': str(self.labels[obj.label_id]),
'score': float(obj.score), 'score': float(obj.score),
'xmin': int((box[0] * frame['region_size']) + frame['region_x_offset']), 'box': obj.bounding_box.flatten().tolist()
'ymin': int((box[1] * frame['region_size']) + frame['region_y_offset']),
'xmax': int((box[2] * frame['region_size']) + frame['region_x_offset']),
'ymax': int((box[3] * frame['region_size']) + frame['region_y_offset'])
}) })
self.cameras[frame['camera_name']].add_objects(parsed_objects) self.cameras[frame['camera_name']].add_objects(parsed_objects)
@ -61,7 +58,7 @@ class PreppedQueueProcessor(threading.Thread):
class FramePrepper(threading.Thread): class FramePrepper(threading.Thread):
def __init__(self, camera_name, shared_frame, frame_time, frame_ready, def __init__(self, camera_name, shared_frame, frame_time, frame_ready,
frame_lock, frame_lock,
region_size, region_x_offset, region_y_offset, region_threshold, region_size, region_x_offset, region_y_offset, region_id,
prepped_frame_queue): prepped_frame_queue):
threading.Thread.__init__(self) threading.Thread.__init__(self)
@ -73,7 +70,7 @@ class FramePrepper(threading.Thread):
self.region_size = region_size self.region_size = region_size
self.region_x_offset = region_x_offset self.region_x_offset = region_x_offset
self.region_y_offset = region_y_offset self.region_y_offset = region_y_offset
self.region_threshold = region_threshold self.region_id = region_id
self.prepped_frame_queue = prepped_frame_queue self.prepped_frame_queue = prepped_frame_queue
def run(self): def run(self):
@ -104,7 +101,7 @@ class FramePrepper(threading.Thread):
'frame_time': frame_time, 'frame_time': frame_time,
'frame': frame_expanded.flatten().copy(), 'frame': frame_expanded.flatten().copy(),
'region_size': self.region_size, 'region_size': self.region_size,
'region_threshold': self.region_threshold, 'region_id': self.region_id,
'region_x_offset': self.region_x_offset, 'region_x_offset': self.region_x_offset,
'region_y_offset': self.region_y_offset 'region_y_offset': self.region_y_offset
}) })

View File

@ -2,6 +2,7 @@ import time
import datetime import datetime
import threading import threading
import cv2 import cv2
import numpy as np
from . util import draw_box_with_label from . util import draw_box_with_label
class ObjectCleaner(threading.Thread): class ObjectCleaner(threading.Thread):
@ -35,16 +36,15 @@ class ObjectCleaner(threading.Thread):
self._objects_parsed.notify_all() self._objects_parsed.notify_all()
# Maintains the frame and person with the highest score from the most recent # Maintains the frame and object with the highest score
# motion event class BestFrames(threading.Thread):
class BestPersonFrame(threading.Thread):
def __init__(self, objects_parsed, recent_frames, detected_objects): def __init__(self, objects_parsed, recent_frames, detected_objects):
threading.Thread.__init__(self) threading.Thread.__init__(self)
self.objects_parsed = objects_parsed self.objects_parsed = objects_parsed
self.recent_frames = recent_frames self.recent_frames = recent_frames
self.detected_objects = detected_objects self.detected_objects = detected_objects
self.best_person = None self.best_objects = {}
self.best_frame = None self.best_frames = {}
def run(self): def run(self):
while True: while True:
@ -55,38 +55,30 @@ class BestPersonFrame(threading.Thread):
# make a copy of detected objects # make a copy of detected objects
detected_objects = self.detected_objects.copy() detected_objects = self.detected_objects.copy()
detected_people = [obj for obj in detected_objects if obj['name'] == 'person']
# get the highest scoring person for obj in detected_objects:
new_best_person = max(detected_people, key=lambda x:x['score'], default=self.best_person) if obj['name'] in self.best_objects:
# if there isnt a person, continue
if new_best_person is None:
continue
# if there is no current best_person
if self.best_person is None:
self.best_person = new_best_person
# if there is already a best_person
else:
now = datetime.datetime.now().timestamp() now = datetime.datetime.now().timestamp()
# if the new best person is a higher score than the current best person # if the object is a higher score than the current best score
# or the current person is more than 1 minute old, use the new best person # or the current object is more than 1 minute old, use the new object
if new_best_person['score'] > self.best_person['score'] or (now - self.best_person['frame_time']) > 60: if obj['score'] > self.best_objects[obj['name']]['score'] or (now - self.best_objects[obj['name']]['frame_time']) > 60:
self.best_person = new_best_person self.best_objects[obj['name']] = obj
else:
self.best_objects[obj['name']] = obj
# make a copy of the recent frames # make a copy of the recent frames
recent_frames = self.recent_frames.copy() recent_frames = self.recent_frames.copy()
if not self.best_person is None and self.best_person['frame_time'] in recent_frames: for name, obj in self.best_objects.items():
best_frame = recent_frames[self.best_person['frame_time']] if obj['frame_time'] in recent_frames:
best_frame = recent_frames[obj['frame_time']] #, np.zeros((720,1280,3), np.uint8))
label = "{}: {}% {}".format(self.best_person['name'],int(self.best_person['score']*100),int(self.best_person['area'])) label = "{}: {}% {}".format(name,int(obj['score']*100),int(obj['area']))
draw_box_with_label(best_frame, self.best_person['xmin'], self.best_person['ymin'], draw_box_with_label(best_frame, obj['xmin'], obj['ymin'],
self.best_person['xmax'], self.best_person['ymax'], label) obj['xmax'], obj['ymax'], label)
# print a timestamp # print a timestamp
time_to_show = datetime.datetime.fromtimestamp(self.best_person['frame_time']).strftime("%m/%d/%Y %H:%M:%S") time_to_show = datetime.datetime.fromtimestamp(obj['frame_time']).strftime("%m/%d/%Y %H:%M:%S")
cv2.putText(best_frame, time_to_show, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, fontScale=.8, color=(255, 255, 255), thickness=2) cv2.putText(best_frame, time_to_show, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, fontScale=.8, color=(255, 255, 255), thickness=2)
self.best_frame = cv2.cvtColor(best_frame, cv2.COLOR_RGB2BGR) self.best_frames[name] = cv2.cvtColor(best_frame, cv2.COLOR_RGB2BGR)

View File

@ -7,9 +7,10 @@ import ctypes
import multiprocessing as mp import multiprocessing as mp
import subprocess as sp import subprocess as sp
import numpy as np import numpy as np
from collections import defaultdict
from . util import tonumpyarray, draw_box_with_label from . util import tonumpyarray, draw_box_with_label
from . object_detection import FramePrepper from . object_detection import FramePrepper
from . objects import ObjectCleaner, BestPersonFrame from . objects import ObjectCleaner, BestFrames
from . mqtt import MqttObjectPublisher from . mqtt import MqttObjectPublisher
# Stores 2 seconds worth of frames when motion is detected so they can be used for other threads # Stores 2 seconds worth of frames when motion is detected so they can be used for other threads
@ -70,8 +71,8 @@ class CameraWatchdog(threading.Thread):
# wait a bit before checking # wait a bit before checking
time.sleep(10) time.sleep(10)
if (datetime.datetime.now().timestamp() - self.camera.frame_time.value) > 10: if (datetime.datetime.now().timestamp() - self.camera.frame_time.value) > 300:
print("last frame is more than 10 seconds old, restarting camera capture...") print("last frame is more than 5 minutes old, restarting camera capture...")
self.camera.start_or_restart_capture() self.camera.start_or_restart_capture()
time.sleep(5) time.sleep(5)
@ -111,7 +112,7 @@ class CameraCapture(threading.Thread):
self.camera.frame_ready.notify_all() self.camera.frame_ready.notify_all()
class Camera: class Camera:
def __init__(self, name, ffmpeg_config, config, prepped_frame_queue, mqtt_client, mqtt_prefix): def __init__(self, name, ffmpeg_config, global_objects_config, config, prepped_frame_queue, mqtt_client, mqtt_prefix):
self.name = name self.name = name
self.config = config self.config = config
self.detected_objects = [] self.detected_objects = []
@ -124,6 +125,8 @@ class Camera:
self.ffmpeg_input_args = self.ffmpeg.get('input_args', ffmpeg_config['input_args']) self.ffmpeg_input_args = self.ffmpeg.get('input_args', ffmpeg_config['input_args'])
self.ffmpeg_output_args = self.ffmpeg.get('output_args', ffmpeg_config['output_args']) self.ffmpeg_output_args = self.ffmpeg.get('output_args', ffmpeg_config['output_args'])
camera_objects_config = config.get('objects', {})
self.take_frame = self.config.get('take_frame', 1) self.take_frame = self.config.get('take_frame', 1)
self.regions = self.config['regions'] self.regions = self.config['regions']
self.frame_shape = get_frame_shape(self.ffmpeg_input) self.frame_shape = get_frame_shape(self.ffmpeg_input)
@ -147,20 +150,23 @@ class Camera:
# for each region, create a separate thread to resize the region and prep for detection # for each region, create a separate thread to resize the region and prep for detection
self.detection_prep_threads = [] self.detection_prep_threads = []
for region in self.config['regions']: for index, region in enumerate(self.config['regions']):
# set a default threshold of 0.5 if not defined region_objects = region.get('objects', {})
if not 'threshold' in region: # build objects config for region
region['threshold'] = 0.5 objects_with_config = set().union(global_objects_config.keys(), camera_objects_config.keys(), region_objects.keys())
if not isinstance(region['threshold'], float): merged_objects_config = defaultdict(lambda: {})
print('Threshold is not a float. Setting to 0.5 default.') for obj in objects_with_config:
region['threshold'] = 0.5 merged_objects_config[obj] = {**global_objects_config.get(obj,{}), **camera_objects_config.get(obj, {}), **region_objects.get(obj, {})}
region['objects'] = merged_objects_config
self.detection_prep_threads.append(FramePrepper( self.detection_prep_threads.append(FramePrepper(
self.name, self.name,
self.current_frame, self.current_frame,
self.frame_time, self.frame_time,
self.frame_ready, self.frame_ready,
self.frame_lock, self.frame_lock,
region['size'], region['x_offset'], region['y_offset'], region['threshold'], region['size'], region['x_offset'], region['y_offset'], index,
prepped_frame_queue prepped_frame_queue
)) ))
@ -169,22 +175,22 @@ class Camera:
self.frame_ready, self.frame_lock, self.recent_frames) self.frame_ready, self.frame_lock, self.recent_frames)
self.frame_tracker.start() self.frame_tracker.start()
# start a thread to store the highest scoring recent person frame # start a thread to store the highest scoring recent frames for monitored object types
self.best_person_frame = BestPersonFrame(self.objects_parsed, self.recent_frames, self.detected_objects) self.best_frames = BestFrames(self.objects_parsed, self.recent_frames, self.detected_objects)
self.best_person_frame.start() self.best_frames.start()
# start a thread to expire objects from the detected objects list # start a thread to expire objects from the detected objects list
self.object_cleaner = ObjectCleaner(self.objects_parsed, self.detected_objects) self.object_cleaner = ObjectCleaner(self.objects_parsed, self.detected_objects)
self.object_cleaner.start() self.object_cleaner.start()
# start a thread to publish object scores (currently only person) # start a thread to publish object scores
mqtt_publisher = MqttObjectPublisher(self.mqtt_client, self.mqtt_topic_prefix, self.objects_parsed, self.detected_objects, self.best_person_frame) mqtt_publisher = MqttObjectPublisher(self.mqtt_client, self.mqtt_topic_prefix, self.objects_parsed, self.detected_objects, self.best_frames)
mqtt_publisher.start() mqtt_publisher.start()
# create a watchdog thread for capture process # create a watchdog thread for capture process
self.watchdog = CameraWatchdog(self) self.watchdog = CameraWatchdog(self)
# load in the mask for person detection # load in the mask for object detection
if 'mask' in self.config: if 'mask' in self.config:
self.mask = cv2.imread("/config/{}".format(self.config['mask']), cv2.IMREAD_GRAYSCALE) self.mask = cv2.imread("/config/{}".format(self.config['mask']), cv2.IMREAD_GRAYSCALE)
else: else:
@ -252,38 +258,45 @@ class Camera:
return return
for obj in objects: for obj in objects:
# Store object area to use in bounding box labels # find the matching region
region = self.regions[obj['region_id']]
# Compute some extra properties
obj.update({
'xmin': int((obj['box'][0] * region['size']) + region['x_offset']),
'ymin': int((obj['box'][1] * region['size']) + region['y_offset']),
'xmax': int((obj['box'][2] * region['size']) + region['x_offset']),
'ymax': int((obj['box'][3] * region['size']) + region['y_offset'])
})
# Compute the area
obj['area'] = (obj['xmax']-obj['xmin'])*(obj['ymax']-obj['ymin']) obj['area'] = (obj['xmax']-obj['xmin'])*(obj['ymax']-obj['ymin'])
if obj['name'] == 'person': object_name = obj['name']
# find the matching region
region = None
for r in self.regions:
if (
obj['xmin'] >= r['x_offset'] and
obj['ymin'] >= r['y_offset'] and
obj['xmax'] <= r['x_offset']+r['size'] and
obj['ymax'] <= r['y_offset']+r['size']
):
region = r
break
# if the min person area is larger than the if object_name in region['objects']:
# detected person, don't add it to detected objects obj_settings = region['objects'][object_name]
if region and 'min_person_area' in region and region['min_person_area'] > obj['area']:
# if the min area is larger than the
# detected object, don't add it to detected objects
if obj_settings.get('min_area',-1) > obj['area']:
continue continue
# if the detected person is larger than the # if the detected object is larger than the
# max person area, don't add it to detected objects # max area, don't add it to detected objects
if region and 'max_person_area' in region and region['max_person_area'] < obj['area']: if obj_settings.get('max_area', region['size']**2) < obj['area']:
continue continue
# compute the coordinates of the person and make sure # if the score is lower than the threshold, skip
if obj_settings.get('threshold', 0) > obj['score']:
continue
# compute the coordinates of the object and make sure
# the location isnt outside the bounds of the image (can happen from rounding) # the location isnt outside the bounds of the image (can happen from rounding)
y_location = min(int(obj['ymax']), len(self.mask)-1) y_location = min(int(obj['ymax']), len(self.mask)-1)
x_location = min(int((obj['xmax']-obj['xmin'])/2.0)+obj['xmin'], len(self.mask[0])-1) x_location = min(int((obj['xmax']-obj['xmin'])/2.0)+obj['xmin'], len(self.mask[0])-1)
# if the person is in a masked location, continue # if the object is in a masked location, don't add it to detected objects
if self.mask[y_location][x_location] == [0]: if self.mask[y_location][x_location] == [0]:
continue continue
@ -292,8 +305,8 @@ class Camera:
with self.objects_parsed: with self.objects_parsed:
self.objects_parsed.notify_all() self.objects_parsed.notify_all()
def get_best_person(self): def get_best(self, label):
return self.best_person_frame.best_frame return self.best_frames.best_frames.get(label)
def get_current_frame_with_objects(self): def get_current_frame_with_objects(self):
# make a copy of the current detected objects # make a copy of the current detected objects