diff --git a/detect_objects.py b/detect_objects.py index 405a86a75..79ae829d6 100644 --- a/detect_objects.py +++ b/detect_objects.py @@ -19,7 +19,7 @@ from frigate.mqtt import MqttMotionPublisher, MqttObjectPublisher from frigate.objects import ObjectParser, ObjectCleaner, BestPersonFrame from frigate.motion import detect_motion from frigate.video import fetch_frames, FrameTracker -from frigate.object_detection import detect_objects +from frigate.object_detection import prep_for_detection, detect_objects RTSP_URL = os.getenv('RTSP_URL') @@ -85,6 +85,16 @@ def main(): objects_parsed = mp.Condition() # Queue for detected objects object_queue = mp.Queue() + # array for prepped frame with shape (1, 300, 300, 3) + prepped_frame_array = mp.Array(ctypes.c_uint8, 300*300*3) + # shared value for storing the prepped_frame_time + prepped_frame_time = mp.Value('d', 0.0) + # Condition for notifying that a new prepped frame is ready + prepped_frame_ready = mp.Condition() + # Lock to control access to the prepped frame + prepped_frame_lock = mp.Lock() + # array for prepped frame box [x1, y1, x2, y2] + prepped_frame_box = mp.Array(ctypes.c_uint16, 4) # shape current frame so it can be treated as an image frame_arr = tonumpyarray(shared_arr).reshape(frame_shape) @@ -95,20 +105,19 @@ def main(): capture_process.daemon = True # for each region, start a separate process for motion detection and object detection - detection_processes = [] + detection_prep_processes = [] motion_processes = [] for region in regions: - detection_process = mp.Process(target=detect_objects, args=(shared_arr, - object_queue, + detection_prep_process = mp.Process(target=prep_for_detection, args=(shared_arr, shared_frame_time, frame_lock, frame_ready, region['motion_detected'], frame_shape, region['size'], region['x_offset'], region['y_offset'], - region['min_person_area'], - DEBUG)) - detection_process.daemon = True - detection_processes.append(detection_process) + prepped_frame_array, prepped_frame_time, prepped_frame_ready, + prepped_frame_lock, prepped_frame_box)) + detection_prep_process.daemon = True + detection_prep_processes.append(detection_prep_process) motion_process = mp.Process(target=detect_motion, args=(shared_arr, shared_frame_time, @@ -168,15 +177,16 @@ def main(): print("capture_process pid ", capture_process.pid) # start the object detection processes - for detection_process in detection_processes: - detection_process.start() - print("detection_process pid ", detection_process.pid) + for detection_prep_process in detection_prep_processes: + detection_prep_process.start() + print("detection_prep_process pid ", detection_prep_process.pid) # start the motion detection processes # for motion_process in motion_processes: # motion_process.start() # print("motion_process pid ", motion_process.pid) + # TEMP: short circuit the motion detection for region in regions: region['motion_detected'].set() with motion_changed: @@ -239,8 +249,8 @@ def main(): app.run(host='0.0.0.0', debug=False) capture_process.join() - for detection_process in detection_processes: - detection_process.join() + for detection_prep_process in detection_prep_processes: + detection_prep_process.join() for motion_process in motion_processes: motion_process.join() frame_tracker.join() diff --git a/frigate/object_detection.py b/frigate/object_detection.py index 8791e33c1..3037f803e 100644 --- a/frigate/object_detection.py +++ b/frigate/object_detection.py @@ -2,11 +2,8 @@ import datetime import cv2 import numpy as np from edgetpu.detection.engine import DetectionEngine -from PIL import Image from . util import tonumpyarray -# TODO: make dynamic? -NUM_CLASSES = 90 # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_CKPT = '/frozen_inference_graph.pb' # List of the strings that is used to add correct label for each box. @@ -22,43 +19,51 @@ def ReadLabelFile(file_path): ret[int(pair[0])] = pair[1].strip() return ret -# do the actual object detection -def tf_detect_objects(cropped_frame, engine, labels, region_size, region_x_offset, region_y_offset, debug): - # Resize to 300x300 if needed - if cropped_frame.shape != (300, 300, 3): - cropped_frame = cv2.resize(cropped_frame, dsize=(300, 300), interpolation=cv2.INTER_LINEAR) - # Expand dimensions since the model expects images to have shape: [1, None, None, 3] - image_np_expanded = np.expand_dims(cropped_frame, axis=0) - - # Actual detection. - ans = engine.DetectWithInputTensor(image_np_expanded.flatten(), threshold=0.5, top_k=3) - - # build an array of detected objects - objects = [] - if ans: - for obj in ans: - box = obj.bounding_box.flatten().tolist() - objects.append({ - 'name': str(labels[obj.label_id]), - 'score': float(obj.score), - 'xmin': int((box[0] * region_size) + region_x_offset), - 'ymin': int((box[1] * region_size) + region_y_offset), - 'xmax': int((box[2] * region_size) + region_x_offset), - 'ymax': int((box[3] * region_size) + region_y_offset) - }) - - return objects - -def detect_objects(shared_arr, object_queue, shared_frame_time, frame_lock, frame_ready, - motion_detected, frame_shape, region_size, region_x_offset, region_y_offset, - min_person_area, debug): - # shape shared input array into frame for processing - arr = tonumpyarray(shared_arr).reshape(frame_shape) - +def detect_objects(prepped_frame_array, prepped_frame_time, prepped_frame_lock, + prepped_frame_ready, prepped_frame_box, object_queue, debug): # Load the edgetpu engine and labels engine = DetectionEngine(PATH_TO_CKPT) labels = ReadLabelFile(PATH_TO_LABELS) + prepped_frame_time = 0.0 + while True: + with prepped_frame_ready: + prepped_frame_ready.wait() + + # make a copy of the cropped frame + with prepped_frame_lock: + prepped_frame_copy = prepped_frame_array.copy() + prepped_frame_time = prepped_frame_time.value + region_box = prepped_frame_box.value + + # Actual detection. + ans = engine.DetectWithInputTensor(prepped_frame_copy, threshold=0.5, top_k=3) + + # put detected objects in the queue + if ans: + # assumes square + region_size = region_box[3]-region_box[0] + for obj in ans: + box = obj.bounding_box.flatten().tolist() + object_queue.append({ + 'frame_time': prepped_frame_time, + 'name': str(labels[obj.label_id]), + 'score': float(obj.score), + 'xmin': int((box[0] * region_size) + region_box[0]), + 'ymin': int((box[1] * region_size) + region_box[1]), + 'xmax': int((box[2] * region_size) + region_box[0]), + 'ymax': int((box[3] * region_size) + region_box[1]) + }) + +def prep_for_detection(shared_whole_frame_array, shared_frame_time, frame_lock, frame_ready, + motion_detected, frame_shape, region_size, region_x_offset, region_y_offset, + prepped_frame_array, prepped_frame_time, prepped_frame_ready, prepped_frame_lock, + prepped_frame_box): + # shape shared input array into frame for processing + shared_whole_frame = tonumpyarray(shared_whole_frame_array).reshape(frame_shape) + + shared_prepped_frame = tonumpyarray(prepped_frame_array).reshape((1,300,300,3)) + frame_time = 0.0 while True: now = datetime.datetime.now().timestamp() @@ -69,20 +74,30 @@ def detect_objects(shared_arr, object_queue, shared_frame_time, frame_lock, fram with frame_ready: # if there isnt a frame ready for processing or it is old, wait for a new frame if shared_frame_time.value == frame_time or (now - shared_frame_time.value) > 0.5: + print("waiting...") frame_ready.wait() # make a copy of the cropped frame with frame_lock: - cropped_frame = arr[region_y_offset:region_y_offset+region_size, region_x_offset:region_x_offset+region_size].copy() + cropped_frame = shared_whole_frame[region_y_offset:region_y_offset+region_size, region_x_offset:region_x_offset+region_size].copy() frame_time = shared_frame_time.value + + print("grabbed frame " + str(frame_time)) # convert to RGB cropped_frame_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB) - # do the object detection - objects = tf_detect_objects(cropped_frame_rgb, engine, labels, region_size, region_x_offset, region_y_offset, debug) - for obj in objects: - # ignore persons below the size threshold - if obj['name'] == 'person' and (obj['xmax']-obj['xmin'])*(obj['ymax']-obj['ymin']) < min_person_area: - continue - obj['frame_time'] = frame_time - object_queue.put(obj) \ No newline at end of file + # Resize to 300x300 if needed + if cropped_frame_rgb.shape != (300, 300, 3): + cropped_frame_rgb = cv2.resize(cropped_frame_rgb, dsize=(300, 300), interpolation=cv2.INTER_LINEAR) + # Expand dimensions since the model expects images to have shape: [1, 300, 300, 3] + frame_expanded = np.expand_dims(cropped_frame_rgb, axis=0) + + # copy the prepped frame to the shared output array + with prepped_frame_lock: + shared_prepped_frame[:] = frame_expanded + prepped_frame_time = frame_time + prepped_frame_box[:] = [region_x_offset, region_y_offset, region_x_offset+region_size, region_y_offset+region_size] + + # signal that a prepped frame is ready + with prepped_frame_ready: + prepped_frame_ready.notify_all() \ No newline at end of file