WIP: convert to camera class

This commit is contained in:
blakeblackshear 2019-03-29 20:49:27 -05:00
parent 8774e537dc
commit 0279121d77
4 changed files with 223 additions and 206 deletions

View File

@ -20,23 +20,16 @@ from frigate.util import tonumpyarray
from frigate.mqtt import MqttMotionPublisher, MqttObjectPublisher from frigate.mqtt import MqttMotionPublisher, MqttObjectPublisher
from frigate.objects import ObjectParser, ObjectCleaner, BestPersonFrame from frigate.objects import ObjectParser, ObjectCleaner, BestPersonFrame
from frigate.motion import detect_motion from frigate.motion import detect_motion
from frigate.video import fetch_frames, FrameTracker from frigate.video import fetch_frames, FrameTracker, Camera
from frigate.object_detection import FramePrepper, PreppedQueueProcessor from frigate.object_detection import FramePrepper, PreppedQueueProcessor
with open('/config/config.yml') as f: with open('/config/config.yml') as f:
# use safe_load instead load # use safe_load instead load
CONFIG = yaml.safe_load(f) CONFIG = yaml.safe_load(f)
rtsp_camera = CONFIG['cameras']['back']['rtsp']
if (rtsp_camera['password'].startswith('$')):
rtsp_camera['password'] = os.getenv(rtsp_camera['password'][1:])
RTSP_URL = 'rtsp://{}:{}@{}:{}{}'.format(rtsp_camera['user'],
rtsp_camera['password'], rtsp_camera['host'], rtsp_camera['port'],
rtsp_camera['path'])
MQTT_HOST = CONFIG['mqtt']['host'] MQTT_HOST = CONFIG['mqtt']['host']
MQTT_PORT = CONFIG.get('mqtt', {}).get('port', 1883) MQTT_PORT = CONFIG.get('mqtt', {}).get('port', 1883)
MQTT_TOPIC_PREFIX = CONFIG['mqtt']['topic_prefix'] + '/back' MQTT_TOPIC_PREFIX = CONFIG.get('mqtt', {}).get('topic_prefix', 'frigate')
MQTT_USER = CONFIG.get('mqtt', {}).get('user') MQTT_USER = CONFIG.get('mqtt', {}).get('user')
MQTT_PASS = CONFIG.get('mqtt', {}).get('password') MQTT_PASS = CONFIG.get('mqtt', {}).get('password')
@ -44,80 +37,6 @@ WEB_PORT = CONFIG.get('web_port', 5000)
DEBUG = (CONFIG.get('debug', '0') == '1') DEBUG = (CONFIG.get('debug', '0') == '1')
def main(): def main():
DETECTED_OBJECTS = []
recent_frames = {}
# Parse selected regions
regions = CONFIG['cameras']['back']['regions']
# capture a single frame and check the frame shape so the correct array
# size can be allocated in memory
video = cv2.VideoCapture(RTSP_URL)
ret, frame = video.read()
if ret:
frame_shape = frame.shape
else:
print("Unable to capture video stream")
exit(1)
video.release()
# compute the flattened array length from the array shape
flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2]
# create shared array for storing the full frame image data
shared_arr = mp.Array(ctypes.c_uint8, flat_array_length)
# create shared value for storing the frame_time
shared_frame_time = mp.Value('d', 0.0)
# Lock to control access to the frame
frame_lock = mp.Lock()
# Condition for notifying that a new frame is ready
frame_ready = mp.Condition()
# Condition for notifying that objects were parsed
objects_parsed = mp.Condition()
# Queue for detected objects
object_queue = queue.Queue()
# Queue for prepped frames
prepped_frame_queue = queue.Queue(len(regions)*2)
# shape current frame so it can be treated as an image
frame_arr = tonumpyarray(shared_arr).reshape(frame_shape)
# start the process to capture frames from the RTSP stream and store in a shared array
capture_process = mp.Process(target=fetch_frames, args=(shared_arr,
shared_frame_time, frame_lock, frame_ready, frame_shape, RTSP_URL))
capture_process.daemon = True
# for each region, start a separate thread to resize the region and prep for detection
detection_prep_threads = []
for region in regions:
detection_prep_threads.append(FramePrepper(
frame_arr,
shared_frame_time,
frame_ready,
frame_lock,
region['size'], region['x_offset'], region['y_offset'],
prepped_frame_queue
))
prepped_queue_processor = PreppedQueueProcessor(
prepped_frame_queue,
object_queue
)
prepped_queue_processor.start()
# start a thread to store recent motion frames for processing
frame_tracker = FrameTracker(frame_arr, shared_frame_time, frame_ready, frame_lock,
recent_frames)
frame_tracker.start()
# start a thread to store the highest scoring recent person frame
best_person_frame = BestPersonFrame(objects_parsed, recent_frames, DETECTED_OBJECTS)
best_person_frame.start()
# start a thread to parse objects from the queue
object_parser = ObjectParser(object_queue, objects_parsed, DETECTED_OBJECTS, regions)
object_parser.start()
# start a thread to expire objects from the detected objects list
object_cleaner = ObjectCleaner(objects_parsed, DETECTED_OBJECTS)
object_cleaner.start()
# connect to mqtt and setup last will # connect to mqtt and setup last will
def on_connect(client, userdata, flags, rc): def on_connect(client, userdata, flags, rc):
print("On connect called") print("On connect called")
@ -128,84 +47,82 @@ def main():
client.will_set(MQTT_TOPIC_PREFIX+'/available', payload='offline', qos=1, retain=True) client.will_set(MQTT_TOPIC_PREFIX+'/available', payload='offline', qos=1, retain=True)
if not MQTT_USER is None: if not MQTT_USER is None:
client.username_pw_set(MQTT_USER, password=MQTT_PASS) client.username_pw_set(MQTT_USER, password=MQTT_PASS)
client.connect(MQTT_HOST, MQTT_PORT, 60) client.connect(MQTT_HOST, MQTT_PORT, 60)
client.loop_start() client.loop_start()
# start a thread to publish object scores (currently only person) # Queue for prepped frames
mqtt_publisher = MqttObjectPublisher(client, MQTT_TOPIC_PREFIX, objects_parsed, DETECTED_OBJECTS) # TODO: set length to 1.5x the number of total regions
mqtt_publisher.start() prepped_frame_queue = queue.Queue(6)
# start the process of capturing frames
capture_process.start()
print("capture_process pid ", capture_process.pid)
# start the object detection prep threads camera = Camera('back', CONFIG['cameras']['back'], prepped_frame_queue, client, MQTT_TOPIC_PREFIX)
for detection_prep_thread in detection_prep_threads:
detection_prep_thread.start() cameras = {
'back': camera
}
prepped_queue_processor = PreppedQueueProcessor(
cameras,
prepped_frame_queue
)
prepped_queue_processor.start()
camera.start()
camera.join()
# create a flask app that encodes frames a mjpeg on demand # create a flask app that encodes frames a mjpeg on demand
app = Flask(__name__) # app = Flask(__name__)
@app.route('/best_person.jpg') # @app.route('/best_person.jpg')
def best_person(): # def best_person():
frame = np.zeros(frame_shape, np.uint8) if best_person_frame.best_frame is None else best_person_frame.best_frame # frame = np.zeros(frame_shape, np.uint8) if camera.get_best_person() is None else camera.get_best_person()
ret, jpg = cv2.imencode('.jpg', frame) # ret, jpg = cv2.imencode('.jpg', frame)
response = make_response(jpg.tobytes()) # response = make_response(jpg.tobytes())
response.headers['Content-Type'] = 'image/jpg' # response.headers['Content-Type'] = 'image/jpg'
return response # return response
@app.route('/') # @app.route('/')
def index(): # def index():
# return a multipart response # # return a multipart response
return Response(imagestream(), # return Response(imagestream(),
mimetype='multipart/x-mixed-replace; boundary=frame') # mimetype='multipart/x-mixed-replace; boundary=frame')
def imagestream(): # def imagestream():
while True: # while True:
# max out at 5 FPS # # max out at 5 FPS
time.sleep(0.2) # time.sleep(0.2)
# make a copy of the current detected objects # # make a copy of the current detected objects
detected_objects = DETECTED_OBJECTS.copy() # detected_objects = DETECTED_OBJECTS.copy()
# lock and make a copy of the current frame # # lock and make a copy of the current frame
with frame_lock: # with frame_lock:
frame = frame_arr.copy() # frame = frame_arr.copy()
# convert to RGB for drawing # # convert to RGB for drawing
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# draw the bounding boxes on the screen # # draw the bounding boxes on the screen
for obj in detected_objects: # for obj in detected_objects:
vis_util.draw_bounding_box_on_image_array(frame, # vis_util.draw_bounding_box_on_image_array(frame,
obj['ymin'], # obj['ymin'],
obj['xmin'], # obj['xmin'],
obj['ymax'], # obj['ymax'],
obj['xmax'], # obj['xmax'],
color='red', # color='red',
thickness=2, # thickness=2,
display_str_list=["{}: {}%".format(obj['name'],int(obj['score']*100))], # display_str_list=["{}: {}%".format(obj['name'],int(obj['score']*100))],
use_normalized_coordinates=False) # use_normalized_coordinates=False)
for region in regions: # for region in regions:
color = (255,255,255) # color = (255,255,255)
cv2.rectangle(frame, (region['x_offset'], region['y_offset']), # cv2.rectangle(frame, (region['x_offset'], region['y_offset']),
(region['x_offset']+region['size'], region['y_offset']+region['size']), # (region['x_offset']+region['size'], region['y_offset']+region['size']),
color, 2) # color, 2)
# convert back to BGR # # convert back to BGR
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
# encode the image into a jpg # # encode the image into a jpg
ret, jpg = cv2.imencode('.jpg', frame) # ret, jpg = cv2.imencode('.jpg', frame)
yield (b'--frame\r\n' # yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n') # b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n')
app.run(host='0.0.0.0', port=WEB_PORT, debug=False) # app.run(host='0.0.0.0', port=WEB_PORT, debug=False)
capture_process.join()
for detection_prep_thread in detection_prep_threads:
detection_prep_thread.join()
frame_tracker.join()
best_person_frame.join()
object_parser.join()
object_cleaner.join()
mqtt_publisher.join()
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -22,11 +22,11 @@ def ReadLabelFile(file_path):
return ret return ret
class PreppedQueueProcessor(threading.Thread): class PreppedQueueProcessor(threading.Thread):
def __init__(self, prepped_frame_queue, object_queue): def __init__(self, cameras, prepped_frame_queue):
threading.Thread.__init__(self) threading.Thread.__init__(self)
self.cameras = cameras
self.prepped_frame_queue = prepped_frame_queue self.prepped_frame_queue = prepped_frame_queue
self.object_queue = object_queue
# Load the edgetpu engine and labels # Load the edgetpu engine and labels
self.engine = DetectionEngine(PATH_TO_CKPT) self.engine = DetectionEngine(PATH_TO_CKPT)
@ -41,12 +41,12 @@ class PreppedQueueProcessor(threading.Thread):
objects = self.engine.DetectWithInputTensor(frame['frame'], threshold=0.5, top_k=3) objects = self.engine.DetectWithInputTensor(frame['frame'], threshold=0.5, top_k=3)
# time.sleep(0.1) # time.sleep(0.1)
# objects = [] # objects = []
# print(engine.get_inference_time()) print(self.engine.get_inference_time())
# put detected objects in the queue # put detected objects in the queue
if objects: parsed_objects = []
for obj in objects: for obj in objects:
box = obj.bounding_box.flatten().tolist() box = obj.bounding_box.flatten().tolist()
self.object_queue.put({ parsed_objects.append({
'frame_time': frame['frame_time'], 'frame_time': frame['frame_time'],
'name': str(self.labels[obj.label_id]), 'name': str(self.labels[obj.label_id]),
'score': float(obj.score), 'score': float(obj.score),
@ -55,16 +55,18 @@ class PreppedQueueProcessor(threading.Thread):
'xmax': int((box[2] * frame['region_size']) + frame['region_x_offset']), 'xmax': int((box[2] * frame['region_size']) + frame['region_x_offset']),
'ymax': int((box[3] * frame['region_size']) + frame['region_y_offset']) 'ymax': int((box[3] * frame['region_size']) + frame['region_y_offset'])
}) })
self.cameras[frame['camera_name']].add_objects(parsed_objects)
# should this be a region class? # should this be a region class?
class FramePrepper(threading.Thread): class FramePrepper(threading.Thread):
def __init__(self, shared_frame, frame_time, frame_ready, def __init__(self, camera_name, shared_frame, frame_time, frame_ready,
frame_lock, frame_lock,
region_size, region_x_offset, region_y_offset, region_size, region_x_offset, region_y_offset,
prepped_frame_queue): prepped_frame_queue):
threading.Thread.__init__(self) threading.Thread.__init__(self)
self.camera_name = camera_name
self.shared_frame = shared_frame self.shared_frame = shared_frame
self.frame_time = frame_time self.frame_time = frame_time
self.frame_ready = frame_ready self.frame_ready = frame_ready
@ -101,6 +103,7 @@ class FramePrepper(threading.Thread):
# add the frame to the queue # add the frame to the queue
if not self.prepped_frame_queue.full(): if not self.prepped_frame_queue.full():
self.prepped_frame_queue.put({ self.prepped_frame_queue.put({
'camera_name': self.camera_name,
'frame_time': frame_time, 'frame_time': frame_time,
'frame': frame_expanded.flatten().copy(), 'frame': frame_expanded.flatten().copy(),
'region_size': self.region_size, 'region_size': self.region_size,

View File

@ -4,53 +4,17 @@ import threading
import cv2 import cv2
from object_detection.utils import visualization_utils as vis_util from object_detection.utils import visualization_utils as vis_util
class ObjectParser(threading.Thread): class ObjectParser(threading.Thread):
def __init__(self, object_queue, objects_parsed, detected_objects, regions): def __init__(self, cameras, object_queue, detected_objects, regions):
threading.Thread.__init__(self) threading.Thread.__init__(self)
self._object_queue = object_queue self.cameras = cameras
self._objects_parsed = objects_parsed self.object_queue = object_queue
self._detected_objects = detected_objects
self.regions = regions self.regions = regions
def run(self): def run(self):
# frame_times = {} # frame_times = {}
while True: while True:
obj = self._object_queue.get() obj = self.object_queue.get()
# filter out persons self.cameras[obj['camera_name']].add_object(obj)
# [obj['score'] for obj in detected_objects if obj['name'] == 'person']
if obj['name'] == 'person':
person_area = (obj['xmax']-obj['xmin'])*(obj['ymax']-obj['ymin'])
# find the matching region
region = None
for r in self.regions:
if (
obj['xmin'] >= r['x_offset'] and
obj['ymin'] >= r['y_offset'] and
obj['xmax'] <= r['x_offset']+r['size'] and
obj['ymax'] <= r['y_offset']+r['size']
):
region = r
break
# if the min person area is larger than the
# detected person, don't add it to detected objects
if region and region['min_person_area'] > person_area:
continue
# frame_time = obj['frame_time']
# if frame_time in frame_times:
# if frame_times[frame_time] == 7:
# del frame_times[frame_time]
# else:
# frame_times[frame_time] += 1
# else:
# frame_times[frame_time] = 1
# print(frame_times)
self._detected_objects.append(obj)
# notify that objects were parsed
with self._objects_parsed:
self._objects_parsed.notify_all()
class ObjectCleaner(threading.Thread): class ObjectCleaner(threading.Thread):
def __init__(self, objects_parsed, detected_objects): def __init__(self, objects_parsed, detected_objects):

View File

@ -1,8 +1,14 @@
import os
import time import time
import datetime import datetime
import cv2 import cv2
import threading import threading
import ctypes
import multiprocessing as mp
from . util import tonumpyarray from . util import tonumpyarray
from . object_detection import FramePrepper
from . objects import ObjectCleaner, ObjectParser, BestPersonFrame
from . mqtt import MqttObjectPublisher
# fetch the frames as fast a possible, only decoding the frames when the # fetch the frames as fast a possible, only decoding the frames when the
# detection_process has consumed the current frame # detection_process has consumed the current frame
@ -85,3 +91,130 @@ class FrameTracker(threading.Thread):
for k in stored_frame_times: for k in stored_frame_times:
if (now - k) > 2: if (now - k) > 2:
del self.recent_frames[k] del self.recent_frames[k]
def get_frame_shape(rtsp_url):
# capture a single frame and check the frame shape so the correct array
# size can be allocated in memory
video = cv2.VideoCapture(rtsp_url)
ret, frame = video.read()
frame_shape = frame.shape
video.release()
return frame_shape
def get_rtsp_url(rtsp_config):
if (rtsp_config['password'].startswith('$')):
rtsp_config['password'] = os.getenv(rtsp_config['password'][1:])
return 'rtsp://{}:{}@{}:{}{}'.format(rtsp_config['user'],
rtsp_config['password'], rtsp_config['host'], rtsp_config['port'],
rtsp_config['path'])
class Camera:
def __init__(self, name, config, prepped_frame_queue, mqtt_client, mqtt_prefix):
self.name = name
self.config = config
self.detected_objects = []
self.recent_frames = {}
self.rtsp_url = get_rtsp_url(self.config['rtsp'])
self.regions = self.config['regions']
self.frame_shape = get_frame_shape(self.rtsp_url)
self.mqtt_client = mqtt_client
self.mqtt_topic_prefix = '{}/{}'.format(mqtt_prefix, self.name)
# compute the flattened array length from the shape of the frame
flat_array_length = self.frame_shape[0] * self.frame_shape[1] * self.frame_shape[2]
# create shared array for storing the full frame image data
self.shared_frame_array = mp.Array(ctypes.c_uint8, flat_array_length)
# create shared value for storing the frame_time
self.shared_frame_time = mp.Value('d', 0.0)
# Lock to control access to the frame
self.frame_lock = mp.Lock()
# Condition for notifying that a new frame is ready
self.frame_ready = mp.Condition()
# Condition for notifying that objects were parsed
self.objects_parsed = mp.Condition()
# shape current frame so it can be treated as a numpy image
self.shared_frame_np = tonumpyarray(self.shared_frame_array).reshape(self.frame_shape)
# create the process to capture frames from the RTSP stream and store in a shared array
self.capture_process = mp.Process(target=fetch_frames, args=(self.shared_frame_array,
self.shared_frame_time, self.frame_lock, self.frame_ready, self.frame_shape, self.rtsp_url))
self.capture_process.daemon = True
# for each region, create a separate thread to resize the region and prep for detection
self.detection_prep_threads = []
for region in self.config['regions']:
self.detection_prep_threads.append(FramePrepper(
self.name,
self.shared_frame_np,
self.shared_frame_time,
self.frame_ready,
self.frame_lock,
region['size'], region['x_offset'], region['y_offset'],
prepped_frame_queue
))
# start a thread to store recent motion frames for processing
self.frame_tracker = FrameTracker(self.shared_frame_np, self.shared_frame_time,
self.frame_ready, self.frame_lock, self.recent_frames)
self.frame_tracker.start()
# start a thread to store the highest scoring recent person frame
self.best_person_frame = BestPersonFrame(self.objects_parsed, self.recent_frames, self.detected_objects)
self.best_person_frame.start()
# start a thread to expire objects from the detected objects list
self.object_cleaner = ObjectCleaner(self.objects_parsed, self.detected_objects)
self.object_cleaner.start()
# start a thread to publish object scores (currently only person)
mqtt_publisher = MqttObjectPublisher(self.mqtt_client, self.mqtt_topic_prefix, self.objects_parsed, self.detected_objects)
mqtt_publisher.start()
def start(self):
self.capture_process.start()
# start the object detection prep threads
for detection_prep_thread in self.detection_prep_threads:
detection_prep_thread.start()
def join(self):
self.capture_process.join()
def get_capture_pid(self):
return self.capture_process.pid
def add_objects(self, objects):
if len(objects) == 0:
return
for obj in objects:
if obj['name'] == 'person':
person_area = (obj['xmax']-obj['xmin'])*(obj['ymax']-obj['ymin'])
# find the matching region
region = None
for r in self.regions:
if (
obj['xmin'] >= r['x_offset'] and
obj['ymin'] >= r['y_offset'] and
obj['xmax'] <= r['x_offset']+r['size'] and
obj['ymax'] <= r['y_offset']+r['size']
):
region = r
break
# if the min person area is larger than the
# detected person, don't add it to detected objects
if region and region['min_person_area'] > person_area:
continue
self.detected_objects.append(obj)
with self.objects_parsed:
self.objects_parsed.notify_all()
def get_best_person(self):
return self.best_person_frame.best_frame