From 7e3d2f66112cafcfafd90276f2319e04f6b72144 Mon Sep 17 00:00:00 2001 From: blakeblackshear Date: Fri, 1 Feb 2019 06:35:10 -0600 Subject: [PATCH 01/11] reduce CPU usage --- detect_objects.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/detect_objects.py b/detect_objects.py index 237ef0d88..685cb94e8 100644 --- a/detect_objects.py +++ b/detect_objects.py @@ -193,6 +193,9 @@ def process_frames(shared_arr, shared_output_arr, shared_frame_time, frame_shape if no_frames_available > 0 and (datetime.datetime.now().timestamp() - no_frames_available) > 30: time.sleep(1) print("sleeping because no frames have been available in a while") + else: + # rest a little bit to avoid maxing out the CPU + time.sleep(0.01) continue # we got a valid frame, so reset the timer @@ -202,6 +205,8 @@ def process_frames(shared_arr, shared_output_arr, shared_frame_time, frame_shape if (datetime.datetime.now().timestamp() - shared_frame_time.value) > 0.5: # signal that we need a new frame shared_frame_time.value = 0.0 + # rest a little bit to avoid maxing out the CPU + time.sleep(0.01) continue # make a copy of the frame From 98ce5a4a59f0ba155501ce7f7657a030b89aa810 Mon Sep 17 00:00:00 2001 From: blakeblackshear Date: Fri, 1 Feb 2019 06:35:48 -0600 Subject: [PATCH 02/11] wip: just detect objects in a specific area --- detect_objects.py | 71 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/detect_objects.py b/detect_objects.py index 685cb94e8..8c4ca2142 100644 --- a/detect_objects.py +++ b/detect_objects.py @@ -29,9 +29,9 @@ categories = label_map_util.convert_label_map_to_categories(label_map, max_num_c use_display_name=True) category_index = label_map_util.create_category_index(categories) -def detect_objects(image_np, sess, detection_graph): +def detect_objects(cropped_frame, full_frame, sess, detection_graph): # Expand dimensions since the model expects images to have shape: [1, None, None, 3] - image_np_expanded = np.expand_dims(image_np, axis=0) + image_np_expanded = np.expand_dims(cropped_frame, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. @@ -52,22 +52,41 @@ def detect_objects(image_np, sess, detection_graph): objects = [] for index, value in enumerate(classes[0]): object_dict = {} - if scores[0, index] > 0.5: + if scores[0, index] > 0.1: object_dict[(category_index.get(value)).get('name').encode('utf8')] = \ scores[0, index] objects.append(object_dict) + squeezed_boxes = np.squeeze(boxes) + squeezed_scores = np.squeeze(scores) + + if(len(objects)>0): + # reposition bounding box based on full frame + for i, box in enumerate(squeezed_boxes): + if squeezed_scores[i] > .1: + ymin = ((box[0] * 300) + 200)/1080 # ymin + xmin = ((box[1] * 300) + 1300)/1920 # xmin + xmax = ((box[2] * 300) + 200)/1080 # ymax + ymax = ((box[3] * 300) + 1300)/1920 # xmax + print("ymin", box[0] * 300, ymin) + print("xmin", box[1] * 300, xmin) + print("ymax", box[2] * 300, ymax) + print("xmax", box[3] * 300, xmax) + # draw boxes for detected objects on image vis_util.visualize_boxes_and_labels_on_image_array( - image_np, - np.squeeze(boxes), + cropped_frame, + squeezed_boxes, np.squeeze(classes).astype(np.int32), - np.squeeze(scores), + squeezed_scores, category_index, use_normalized_coordinates=True, - line_thickness=4) + line_thickness=4, + min_score_thresh=.1) + + # cv2.rectangle(full_frame, (800, 100), (1250, 550), (255,0,0), 2) - return objects, image_np + return objects, cropped_frame def main(): # capture a single frame and check the frame shape so the correct array @@ -88,18 +107,21 @@ def main(): shared_frame_time = mp.Value('d', 0.0) # compute the flattened array length from the array shape flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2] - # create shared array for passing the image data from capture to detect_objects + # create shared array for storing the full frame image data shared_arr = mp.Array(ctypes.c_uint16, flat_array_length) + # create shared array for storing the cropped frame image data + # TODO: make dynamic + shared_cropped_arr = mp.Array(ctypes.c_uint16, 300*300*3) # create shared array for passing the image data from detect_objects to flask - shared_output_arr = mp.Array(ctypes.c_uint16, flat_array_length) + shared_output_arr = mp.Array(ctypes.c_uint16, 300*300*3)#flat_array_length) # create a numpy array with the image shape from the shared memory array # this is used by flask to output an mjpeg stream - frame_output_arr = tonumpyarray(shared_output_arr).reshape(frame_shape) + frame_output_arr = tonumpyarray(shared_output_arr).reshape(300,300,3) - capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_frame_time, frame_shape)) + capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape)) capture_process.daemon = True - detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, shared_frame_time, frame_shape)) + detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_cropped_arr, shared_output_arr, shared_frame_time, frame_shape)) detection_process.daemon = True capture_process.start() @@ -119,9 +141,9 @@ def main(): # max out at 5 FPS time.sleep(0.2) # convert back to BGR - frame_bgr = cv2.cvtColor(frame_output_arr, cv2.COLOR_RGB2BGR) + # frame_bgr = cv2.cvtColor(frame_output_arr, cv2.COLOR_RGB2BGR) # encode the image into a jpg - ret, jpg = cv2.imencode('.jpg', frame_bgr) + ret, jpg = cv2.imencode('.jpg', frame_output_arr) yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n') @@ -136,9 +158,10 @@ def tonumpyarray(mp_arr): # fetch the frames as fast a possible, only decoding the frames when the # detection_process has consumed the current frame -def fetch_frames(shared_arr, shared_frame_time, frame_shape): +def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape): # convert shared memory array into numpy and shape into image array arr = tonumpyarray(shared_arr).reshape(frame_shape) + cropped_frame = tonumpyarray(shared_cropped_arr).reshape(300,300,3) # start the video capture video = cv2.VideoCapture(RTSP_URL) @@ -158,6 +181,12 @@ def fetch_frames(shared_arr, shared_frame_time, frame_shape): ret, frame = video.retrieve() if ret: # copy the frame into the numpy array + # Position 1 + # cropped_frame[:] = frame[270:720, 550:1000] + # Position 2 + # frame_cropped = frame[270:720, 100:550] + # Car + cropped_frame[:] = frame[200:500, 1300:1600] arr[:] = frame # signal to the detection_process by setting the shared_frame_time shared_frame_time.value = frame_time.timestamp() @@ -165,11 +194,12 @@ def fetch_frames(shared_arr, shared_frame_time, frame_shape): video.release() # do the actual object detection -def process_frames(shared_arr, shared_output_arr, shared_frame_time, frame_shape): +def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_frame_time, frame_shape): # shape shared input array into frame for processing arr = tonumpyarray(shared_arr).reshape(frame_shape) + shared_cropped_frame = tonumpyarray(shared_cropped_arr).reshape(300,300,3) # shape shared output array into frame so it can be copied into - output_arr = tonumpyarray(shared_output_arr).reshape(frame_shape) + output_arr = tonumpyarray(shared_output_arr).reshape(300,300,3) # Load a (frozen) Tensorflow model into memory before the processing loop detection_graph = tf.Graph() @@ -211,14 +241,15 @@ def process_frames(shared_arr, shared_output_arr, shared_frame_time, frame_shape # make a copy of the frame frame = arr.copy() + cropped_frame = shared_cropped_frame.copy() frame_time = shared_frame_time.value # signal that the frame has been used so a new one will be ready shared_frame_time.value = 0.0 # convert to RGB - frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + cropped_frame_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB) # do the object detection - objects, frame_overlay = detect_objects(frame_rgb, sess, detection_graph) + objects, frame_overlay = detect_objects(cropped_frame_rgb, frame, sess, detection_graph) # copy the output frame with the bounding boxes to the output array output_arr[:] = frame_overlay if(len(objects) > 0): From a976403edcbfc9c083682aa8ad5e7ec73da20b73 Mon Sep 17 00:00:00 2001 From: blakeblackshear Date: Fri, 1 Feb 2019 06:57:03 -0600 Subject: [PATCH 03/11] got bounding boxes repositioned for full frame --- detect_objects.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/detect_objects.py b/detect_objects.py index 8c4ca2142..bce8835d4 100644 --- a/detect_objects.py +++ b/detect_objects.py @@ -60,22 +60,21 @@ def detect_objects(cropped_frame, full_frame, sess, detection_graph): squeezed_boxes = np.squeeze(boxes) squeezed_scores = np.squeeze(scores) + full_frame_shape = full_frame.shape + cropped_frame_shape = cropped_frame.shape + if(len(objects)>0): # reposition bounding box based on full frame for i, box in enumerate(squeezed_boxes): - if squeezed_scores[i] > .1: - ymin = ((box[0] * 300) + 200)/1080 # ymin - xmin = ((box[1] * 300) + 1300)/1920 # xmin - xmax = ((box[2] * 300) + 200)/1080 # ymax - ymax = ((box[3] * 300) + 1300)/1920 # xmax - print("ymin", box[0] * 300, ymin) - print("xmin", box[1] * 300, xmin) - print("ymax", box[2] * 300, ymax) - print("xmax", box[3] * 300, xmax) + if box[2] > 0: + squeezed_boxes[i][0] = ((box[0] * cropped_frame_shape[0]) + 200)/full_frame_shape[0] # ymin + squeezed_boxes[i][1] = ((box[1] * cropped_frame_shape[0]) + 1300)/full_frame_shape[1] # xmin + squeezed_boxes[i][2] = ((box[2] * cropped_frame_shape[0]) + 200)/full_frame_shape[0] # ymax + squeezed_boxes[i][3] = ((box[3] * cropped_frame_shape[0]) + 1300)/full_frame_shape[1] # xmax # draw boxes for detected objects on image vis_util.visualize_boxes_and_labels_on_image_array( - cropped_frame, + full_frame, squeezed_boxes, np.squeeze(classes).astype(np.int32), squeezed_scores, @@ -86,7 +85,7 @@ def detect_objects(cropped_frame, full_frame, sess, detection_graph): # cv2.rectangle(full_frame, (800, 100), (1250, 550), (255,0,0), 2) - return objects, cropped_frame + return objects, full_frame def main(): # capture a single frame and check the frame shape so the correct array @@ -113,10 +112,10 @@ def main(): # TODO: make dynamic shared_cropped_arr = mp.Array(ctypes.c_uint16, 300*300*3) # create shared array for passing the image data from detect_objects to flask - shared_output_arr = mp.Array(ctypes.c_uint16, 300*300*3)#flat_array_length) + shared_output_arr = mp.Array(ctypes.c_uint16, flat_array_length) # create a numpy array with the image shape from the shared memory array # this is used by flask to output an mjpeg stream - frame_output_arr = tonumpyarray(shared_output_arr).reshape(300,300,3) + frame_output_arr = tonumpyarray(shared_output_arr).reshape(frame_shape) capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape)) capture_process.daemon = True @@ -199,7 +198,7 @@ def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_fra arr = tonumpyarray(shared_arr).reshape(frame_shape) shared_cropped_frame = tonumpyarray(shared_cropped_arr).reshape(300,300,3) # shape shared output array into frame so it can be copied into - output_arr = tonumpyarray(shared_output_arr).reshape(300,300,3) + output_arr = tonumpyarray(shared_output_arr).reshape(frame_shape) # Load a (frozen) Tensorflow model into memory before the processing loop detection_graph = tf.Graph() From 11af9bb953feb5aef8eb6b99821e3116ad625d2d Mon Sep 17 00:00:00 2001 From: blakeblackshear Date: Fri, 1 Feb 2019 21:38:13 -0600 Subject: [PATCH 04/11] wip: focus on dynamic region and delay drawing until viewing --- detect_objects.py | 90 +++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 54 deletions(-) diff --git a/detect_objects.py b/detect_objects.py index bce8835d4..6affdc087 100644 --- a/detect_objects.py +++ b/detect_objects.py @@ -23,13 +23,17 @@ PATH_TO_LABELS = '/label_map.pbtext' # TODO: make dynamic? NUM_CLASSES = 90 +REGION_SIZE = 700 +REGION_X_OFFSET = 950 +REGION_Y_OFFSET = 380 + # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) -def detect_objects(cropped_frame, full_frame, sess, detection_graph): +def detect_objects(cropped_frame, sess, detection_graph): # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(cropped_frame, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') @@ -51,41 +55,11 @@ def detect_objects(cropped_frame, full_frame, sess, detection_graph): # build an array of detected objects objects = [] for index, value in enumerate(classes[0]): - object_dict = {} - if scores[0, index] > 0.1: - object_dict[(category_index.get(value)).get('name').encode('utf8')] = \ - scores[0, index] - objects.append(object_dict) + score = scores[0, index] + if score > 0.1: + objects += [value, scores[0, index]] + boxes[0, index].tolist() - squeezed_boxes = np.squeeze(boxes) - squeezed_scores = np.squeeze(scores) - - full_frame_shape = full_frame.shape - cropped_frame_shape = cropped_frame.shape - - if(len(objects)>0): - # reposition bounding box based on full frame - for i, box in enumerate(squeezed_boxes): - if box[2] > 0: - squeezed_boxes[i][0] = ((box[0] * cropped_frame_shape[0]) + 200)/full_frame_shape[0] # ymin - squeezed_boxes[i][1] = ((box[1] * cropped_frame_shape[0]) + 1300)/full_frame_shape[1] # xmin - squeezed_boxes[i][2] = ((box[2] * cropped_frame_shape[0]) + 200)/full_frame_shape[0] # ymax - squeezed_boxes[i][3] = ((box[3] * cropped_frame_shape[0]) + 1300)/full_frame_shape[1] # xmax - - # draw boxes for detected objects on image - vis_util.visualize_boxes_and_labels_on_image_array( - full_frame, - squeezed_boxes, - np.squeeze(classes).astype(np.int32), - squeezed_scores, - category_index, - use_normalized_coordinates=True, - line_thickness=4, - min_score_thresh=.1) - - # cv2.rectangle(full_frame, (800, 100), (1250, 550), (255,0,0), 2) - - return objects, full_frame + return objects def main(): # capture a single frame and check the frame shape so the correct array @@ -108,14 +82,13 @@ def main(): flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2] # create shared array for storing the full frame image data shared_arr = mp.Array(ctypes.c_uint16, flat_array_length) + # shape current frame so it can be treated as an image + frame_arr = tonumpyarray(shared_arr).reshape(frame_shape) # create shared array for storing the cropped frame image data # TODO: make dynamic - shared_cropped_arr = mp.Array(ctypes.c_uint16, 300*300*3) + shared_cropped_arr = mp.Array(ctypes.c_uint16, REGION_SIZE*REGION_SIZE*3) # create shared array for passing the image data from detect_objects to flask - shared_output_arr = mp.Array(ctypes.c_uint16, flat_array_length) - # create a numpy array with the image shape from the shared memory array - # this is used by flask to output an mjpeg stream - frame_output_arr = tonumpyarray(shared_output_arr).reshape(frame_shape) + shared_output_arr = mp.Array(ctypes.c_double, 6*10) capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape)) capture_process.daemon = True @@ -139,10 +112,23 @@ def main(): while True: # max out at 5 FPS time.sleep(0.2) - # convert back to BGR - # frame_bgr = cv2.cvtColor(frame_output_arr, cv2.COLOR_RGB2BGR) + frame = frame_arr.copy() + # draw the bounding boxes on the screen + object_index = 0 + while(object_index < 60 and shared_output_arr[object_index] > 0): + object_class = shared_output_arr[object_index] + score = shared_output_arr[object_index+1] + ymin = int(((shared_output_arr[object_index+2] * REGION_SIZE) + REGION_Y_OFFSET)) + xmin = int(((shared_output_arr[object_index+3] * REGION_SIZE) + REGION_X_OFFSET)) + ymax = int(((shared_output_arr[object_index+4] * REGION_SIZE) + REGION_Y_OFFSET)) + xmax = int(((shared_output_arr[object_index+5] * REGION_SIZE) + REGION_X_OFFSET)) + cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255,0,0), 2) + object_index += 6 + print(category_index.get(object_class).get('name').encode('utf8'), score) # encode the image into a jpg - ret, jpg = cv2.imencode('.jpg', frame_output_arr) + + cv2.rectangle(frame, (REGION_X_OFFSET, REGION_Y_OFFSET), (REGION_X_OFFSET+REGION_SIZE, REGION_Y_OFFSET+REGION_SIZE), (255,255,255), 2) + ret, jpg = cv2.imencode('.jpg', frame) yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n') @@ -160,7 +146,7 @@ def tonumpyarray(mp_arr): def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape): # convert shared memory array into numpy and shape into image array arr = tonumpyarray(shared_arr).reshape(frame_shape) - cropped_frame = tonumpyarray(shared_cropped_arr).reshape(300,300,3) + cropped_frame = tonumpyarray(shared_cropped_arr).reshape(REGION_SIZE,REGION_SIZE,3) # start the video capture video = cv2.VideoCapture(RTSP_URL) @@ -185,7 +171,7 @@ def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape) # Position 2 # frame_cropped = frame[270:720, 100:550] # Car - cropped_frame[:] = frame[200:500, 1300:1600] + cropped_frame[:] = frame[REGION_Y_OFFSET:REGION_Y_OFFSET+REGION_SIZE, REGION_X_OFFSET:REGION_X_OFFSET+REGION_SIZE] arr[:] = frame # signal to the detection_process by setting the shared_frame_time shared_frame_time.value = frame_time.timestamp() @@ -196,9 +182,7 @@ def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape) def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_frame_time, frame_shape): # shape shared input array into frame for processing arr = tonumpyarray(shared_arr).reshape(frame_shape) - shared_cropped_frame = tonumpyarray(shared_cropped_arr).reshape(300,300,3) - # shape shared output array into frame so it can be copied into - output_arr = tonumpyarray(shared_output_arr).reshape(frame_shape) + shared_cropped_frame = tonumpyarray(shared_cropped_arr).reshape(REGION_SIZE,REGION_SIZE,3) # Load a (frozen) Tensorflow model into memory before the processing loop detection_graph = tf.Graph() @@ -239,7 +223,7 @@ def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_fra continue # make a copy of the frame - frame = arr.copy() + # frame = arr.copy() cropped_frame = shared_cropped_frame.copy() frame_time = shared_frame_time.value # signal that the frame has been used so a new one will be ready @@ -248,11 +232,9 @@ def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_fra # convert to RGB cropped_frame_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB) # do the object detection - objects, frame_overlay = detect_objects(cropped_frame_rgb, frame, sess, detection_graph) - # copy the output frame with the bounding boxes to the output array - output_arr[:] = frame_overlay - if(len(objects) > 0): - print(objects) + objects = detect_objects(cropped_frame_rgb, sess, detection_graph) + # copy the detected objects to the output array, filling the array when needed + shared_output_arr[:] = objects + [0.0] * (60-len(objects)) if __name__ == '__main__': mp.freeze_support() From b91c24bf8f0dda07e9d61eed234ce9c030418037 Mon Sep 17 00:00:00 2001 From: blakeblackshear Date: Sat, 2 Feb 2019 08:16:35 -0600 Subject: [PATCH 05/11] crop the frame and calculate the coordinates in the subprocess and add labels to the image --- detect_objects.py | 70 +++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/detect_objects.py b/detect_objects.py index 6affdc087..8718ed6ba 100644 --- a/detect_objects.py +++ b/detect_objects.py @@ -23,9 +23,9 @@ PATH_TO_LABELS = '/label_map.pbtext' # TODO: make dynamic? NUM_CLASSES = 90 -REGION_SIZE = 700 -REGION_X_OFFSET = 950 -REGION_Y_OFFSET = 380 +REGION_SIZE = 300 +REGION_X_OFFSET = 1250 +REGION_Y_OFFSET = 180 # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) @@ -33,7 +33,7 @@ categories = label_map_util.convert_label_map_to_categories(label_map, max_num_c use_display_name=True) category_index = label_map_util.create_category_index(categories) -def detect_objects(cropped_frame, sess, detection_graph): +def detect_objects(cropped_frame, sess, detection_graph, region_size, region_x_offset, region_y_offset): # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(cropped_frame, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') @@ -57,7 +57,15 @@ def detect_objects(cropped_frame, sess, detection_graph): for index, value in enumerate(classes[0]): score = scores[0, index] if score > 0.1: - objects += [value, scores[0, index]] + boxes[0, index].tolist() + box = boxes[0, index].tolist() + box[0] = (box[0] * region_size) + region_y_offset + box[1] = (box[1] * region_size) + region_x_offset + box[2] = (box[2] * region_size) + region_y_offset + box[3] = (box[3] * region_size) + region_x_offset + objects += [value, scores[0, index]] + box + # only get the first 10 objects + if len(objects) = 60: + break return objects @@ -84,16 +92,13 @@ def main(): shared_arr = mp.Array(ctypes.c_uint16, flat_array_length) # shape current frame so it can be treated as an image frame_arr = tonumpyarray(shared_arr).reshape(frame_shape) - # create shared array for storing the cropped frame image data - # TODO: make dynamic - shared_cropped_arr = mp.Array(ctypes.c_uint16, REGION_SIZE*REGION_SIZE*3) - # create shared array for passing the image data from detect_objects to flask + # create shared array for storing 10 detected objects shared_output_arr = mp.Array(ctypes.c_double, 6*10) - capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape)) + capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_frame_time, frame_shape)) capture_process.daemon = True - detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_cropped_arr, shared_output_arr, shared_frame_time, frame_shape)) + detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, shared_frame_time, frame_shape, REGION_SIZE, REGION_X_OFFSET, REGION_Y_OFFSET)) detection_process.daemon = True capture_process.start() @@ -113,21 +118,33 @@ def main(): # max out at 5 FPS time.sleep(0.2) frame = frame_arr.copy() + # convert to RGB for drawing + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # draw the bounding boxes on the screen object_index = 0 while(object_index < 60 and shared_output_arr[object_index] > 0): object_class = shared_output_arr[object_index] + object_name = str(category_index.get(object_class).get('name')) score = shared_output_arr[object_index+1] - ymin = int(((shared_output_arr[object_index+2] * REGION_SIZE) + REGION_Y_OFFSET)) - xmin = int(((shared_output_arr[object_index+3] * REGION_SIZE) + REGION_X_OFFSET)) - ymax = int(((shared_output_arr[object_index+4] * REGION_SIZE) + REGION_Y_OFFSET)) - xmax = int(((shared_output_arr[object_index+5] * REGION_SIZE) + REGION_X_OFFSET)) - cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255,0,0), 2) + display_str = '{}: {}%'.format(object_name, int(100*score)) + ymin = int(shared_output_arr[object_index+2]) + xmin = int(shared_output_arr[object_index+3]) + ymax = int(shared_output_arr[object_index+4]) + xmax = int(shared_output_arr[object_index+5]) + vis_util.draw_bounding_box_on_image_array(frame, + ymin, + xmin, + ymax, + xmax, + color='red', + thickness=2, + display_str_list=[display_str], + use_normalized_coordinates=False) object_index += 6 - print(category_index.get(object_class).get('name').encode('utf8'), score) - # encode the image into a jpg - cv2.rectangle(frame, (REGION_X_OFFSET, REGION_Y_OFFSET), (REGION_X_OFFSET+REGION_SIZE, REGION_Y_OFFSET+REGION_SIZE), (255,255,255), 2) + # convert back to BGR + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) + # encode the image into a jpg ret, jpg = cv2.imencode('.jpg', frame) yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n') @@ -143,10 +160,9 @@ def tonumpyarray(mp_arr): # fetch the frames as fast a possible, only decoding the frames when the # detection_process has consumed the current frame -def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape): +def fetch_frames(shared_arr, shared_frame_time, frame_shape): # convert shared memory array into numpy and shape into image array arr = tonumpyarray(shared_arr).reshape(frame_shape) - cropped_frame = tonumpyarray(shared_cropped_arr).reshape(REGION_SIZE,REGION_SIZE,3) # start the video capture video = cv2.VideoCapture(RTSP_URL) @@ -170,8 +186,6 @@ def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape) # cropped_frame[:] = frame[270:720, 550:1000] # Position 2 # frame_cropped = frame[270:720, 100:550] - # Car - cropped_frame[:] = frame[REGION_Y_OFFSET:REGION_Y_OFFSET+REGION_SIZE, REGION_X_OFFSET:REGION_X_OFFSET+REGION_SIZE] arr[:] = frame # signal to the detection_process by setting the shared_frame_time shared_frame_time.value = frame_time.timestamp() @@ -179,10 +193,9 @@ def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape) video.release() # do the actual object detection -def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_frame_time, frame_shape): +def process_frames(shared_arr, shared_output_arr, shared_frame_time, frame_shape, region_size, region_x_offset, region_y_offset): # shape shared input array into frame for processing arr = tonumpyarray(shared_arr).reshape(frame_shape) - shared_cropped_frame = tonumpyarray(shared_cropped_arr).reshape(REGION_SIZE,REGION_SIZE,3) # Load a (frozen) Tensorflow model into memory before the processing loop detection_graph = tf.Graph() @@ -222,9 +235,8 @@ def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_fra time.sleep(0.01) continue - # make a copy of the frame - # frame = arr.copy() - cropped_frame = shared_cropped_frame.copy() + # make a copy of the cropped frame + cropped_frame = arr[region_y_offset:region_y_offset+region_size, region_x_offset:region_x_offset+region_size].copy() frame_time = shared_frame_time.value # signal that the frame has been used so a new one will be ready shared_frame_time.value = 0.0 @@ -232,7 +244,7 @@ def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_fra # convert to RGB cropped_frame_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB) # do the object detection - objects = detect_objects(cropped_frame_rgb, sess, detection_graph) + objects = detect_objects(cropped_frame_rgb, sess, detection_graph, region_size, region_x_offset, region_y_offset) # copy the detected objects to the output array, filling the array when needed shared_output_arr[:] = objects + [0.0] * (60-len(objects)) From 072997736c0468b183e9ac72d8287091f706a201 Mon Sep 17 00:00:00 2001 From: blakeblackshear Date: Mon, 4 Feb 2019 06:18:49 -0600 Subject: [PATCH 06/11] parse the objects into a global array in a separate thread --- detect_objects.py | 68 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/detect_objects.py b/detect_objects.py index 8718ed6ba..d23660b54 100644 --- a/detect_objects.py +++ b/detect_objects.py @@ -5,6 +5,7 @@ import datetime import ctypes import logging import multiprocessing as mp +import threading from contextlib import closing import numpy as np import tensorflow as tf @@ -27,6 +28,8 @@ REGION_SIZE = 300 REGION_X_OFFSET = 1250 REGION_Y_OFFSET = 180 +DETECTED_OBJECTS = [] + # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, @@ -64,11 +67,36 @@ def detect_objects(cropped_frame, sess, detection_graph, region_size, region_x_o box[3] = (box[3] * region_size) + region_x_offset objects += [value, scores[0, index]] + box # only get the first 10 objects - if len(objects) = 60: + if len(objects) == 60: break return objects +class ObjectParser(threading.Thread): + def __init__(self, object_arrays): + threading.Thread.__init__(self) + self._object_arrays = object_arrays + + def run(self): + global DETECTED_OBJECTS + while True: + detected_objects = [] + for object_array in self._object_arrays: + object_index = 0 + while(object_index < 60 and object_array[object_index] > 0): + object_class = object_array[object_index] + detected_objects.append({ + 'name': str(category_index.get(object_class).get('name')), + 'score': object_array[object_index+1], + 'ymin': int(object_array[object_index+2]), + 'xmin': int(object_array[object_index+3]), + 'ymax': int(object_array[object_index+4]), + 'xmax': int(object_array[object_index+5]) + }) + object_index += 6 + DETECTED_OBJECTS = detected_objects + time.sleep(0.01) + def main(): # capture a single frame and check the frame shape so the correct array # size can be allocated in memory @@ -101,6 +129,9 @@ def main(): detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, shared_frame_time, frame_shape, REGION_SIZE, REGION_X_OFFSET, REGION_Y_OFFSET)) detection_process.daemon = True + object_parser = ObjectParser([shared_output_arr]) + object_parser.start() + capture_process.start() print("capture_process pid ", capture_process.pid) detection_process.start() @@ -114,33 +145,27 @@ def main(): return Response(imagestream(), mimetype='multipart/x-mixed-replace; boundary=frame') def imagestream(): + global DETECTED_OBJECTS while True: # max out at 5 FPS time.sleep(0.2) + # make a copy of the current detected objects + detected_objects = DETECTED_OBJECTS.copy() + # make a copy of the current frame frame = frame_arr.copy() # convert to RGB for drawing frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # draw the bounding boxes on the screen - object_index = 0 - while(object_index < 60 and shared_output_arr[object_index] > 0): - object_class = shared_output_arr[object_index] - object_name = str(category_index.get(object_class).get('name')) - score = shared_output_arr[object_index+1] - display_str = '{}: {}%'.format(object_name, int(100*score)) - ymin = int(shared_output_arr[object_index+2]) - xmin = int(shared_output_arr[object_index+3]) - ymax = int(shared_output_arr[object_index+4]) - xmax = int(shared_output_arr[object_index+5]) + for obj in DETECTED_OBJECTS: vis_util.draw_bounding_box_on_image_array(frame, - ymin, - xmin, - ymax, - xmax, + obj['ymin'], + obj['xmin'], + obj['ymax'], + obj['xmax'], color='red', thickness=2, - display_str_list=[display_str], + display_str_list=["{}: {}%".format(obj['name'],int(obj['score']*100))], use_normalized_coordinates=False) - object_index += 6 cv2.rectangle(frame, (REGION_X_OFFSET, REGION_Y_OFFSET), (REGION_X_OFFSET+REGION_SIZE, REGION_Y_OFFSET+REGION_SIZE), (255,255,255), 2) # convert back to BGR frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) @@ -153,6 +178,7 @@ def main(): capture_process.join() detection_process.join() + object_parser.join() # convert shared memory array into numpy array def tonumpyarray(mp_arr): @@ -181,14 +207,12 @@ def fetch_frames(shared_arr, shared_frame_time, frame_shape): # go ahead and decode the current frame ret, frame = video.retrieve() if ret: - # copy the frame into the numpy array - # Position 1 - # cropped_frame[:] = frame[270:720, 550:1000] - # Position 2 - # frame_cropped = frame[270:720, 100:550] arr[:] = frame # signal to the detection_process by setting the shared_frame_time shared_frame_time.value = frame_time.timestamp() + else: + # sleep a little to reduce CPU usage + time.sleep(0.01) video.release() From 623a3044fb3a6e36b8207d9c0ab3543e8676e660 Mon Sep 17 00:00:00 2001 From: blakeblackshear Date: Mon, 4 Feb 2019 06:34:32 -0600 Subject: [PATCH 07/11] multiple hard coded processes --- detect_objects.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/detect_objects.py b/detect_objects.py index d23660b54..dbb21a07a 100644 --- a/detect_objects.py +++ b/detect_objects.py @@ -114,6 +114,7 @@ def main(): # is a float. otherwise it stops updating the value in shared # memory. probably something to do with the size of the memory block shared_frame_time = mp.Value('d', 0.0) + shared_frame_time2 = mp.Value('d', 0.0) # compute the flattened array length from the array shape flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2] # create shared array for storing the full frame image data @@ -122,20 +123,28 @@ def main(): frame_arr = tonumpyarray(shared_arr).reshape(frame_shape) # create shared array for storing 10 detected objects shared_output_arr = mp.Array(ctypes.c_double, 6*10) + shared_output_arr2 = mp.Array(ctypes.c_double, 6*10) - capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_frame_time, frame_shape)) + capture_process = mp.Process(target=fetch_frames, args=(shared_arr, [shared_frame_time, shared_frame_time2], frame_shape)) capture_process.daemon = True - detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, shared_frame_time, frame_shape, REGION_SIZE, REGION_X_OFFSET, REGION_Y_OFFSET)) + detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, + shared_frame_time, frame_shape, REGION_SIZE, REGION_X_OFFSET, REGION_Y_OFFSET)) detection_process.daemon = True - object_parser = ObjectParser([shared_output_arr]) + detection_process2 = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr2, + shared_frame_time2, frame_shape, 1080, 0, 0)) + detection_process.daemon = True + + object_parser = ObjectParser([shared_output_arr, shared_output_arr2]) object_parser.start() capture_process.start() print("capture_process pid ", capture_process.pid) detection_process.start() print("detection_process pid ", detection_process.pid) + detection_process2.start() + print("detection_process pid ", detection_process2.pid) app = Flask(__name__) @@ -178,6 +187,7 @@ def main(): capture_process.join() detection_process.join() + detection_process2.join() object_parser.join() # convert shared memory array into numpy array @@ -186,7 +196,7 @@ def tonumpyarray(mp_arr): # fetch the frames as fast a possible, only decoding the frames when the # detection_process has consumed the current frame -def fetch_frames(shared_arr, shared_frame_time, frame_shape): +def fetch_frames(shared_arr, shared_frame_times, frame_shape): # convert shared memory array into numpy and shape into image array arr = tonumpyarray(shared_arr).reshape(frame_shape) @@ -203,13 +213,14 @@ def fetch_frames(shared_arr, shared_frame_time, frame_shape): if ret: # if the detection_process is ready for the next frame decode it # otherwise skip this frame and move onto the next one - if shared_frame_time.value == 0.0: + if all(shared_frame_time.value == 0.0 for shared_frame_time in shared_frame_times): # go ahead and decode the current frame ret, frame = video.retrieve() if ret: arr[:] = frame - # signal to the detection_process by setting the shared_frame_time - shared_frame_time.value = frame_time.timestamp() + # signal to the detection_processes by setting the shared_frame_time + for shared_frame_time in shared_frame_times: + shared_frame_time.value = frame_time.timestamp() else: # sleep a little to reduce CPU usage time.sleep(0.01) From 0359e2d2a1831acd15e68e7113ffd941e68ce16a Mon Sep 17 00:00:00 2001 From: blakeblackshear Date: Mon, 4 Feb 2019 07:07:13 -0600 Subject: [PATCH 08/11] dynamic number of processes based on selected regions --- detect_objects.py | 71 ++++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/detect_objects.py b/detect_objects.py index dbb21a07a..d5625418a 100644 --- a/detect_objects.py +++ b/detect_objects.py @@ -24,9 +24,8 @@ PATH_TO_LABELS = '/label_map.pbtext' # TODO: make dynamic? NUM_CLASSES = 90 -REGION_SIZE = 300 -REGION_X_OFFSET = 1250 -REGION_Y_OFFSET = 180 +#REGIONS = "600,0,380:600,600,380:600,1200,380" +REGIONS = os.getenv('REGIONS') DETECTED_OBJECTS = [] @@ -98,6 +97,15 @@ class ObjectParser(threading.Thread): time.sleep(0.01) def main(): + # Parse selected regions + regions = [] + for region_string in REGIONS.split(':'): + region_parts = region_string.split(',') + regions.append({ + 'size': int(region_parts[0]), + 'x_offset': int(region_parts[1]), + 'y_offset': int(region_parts[2]) + }) # capture a single frame and check the frame shape so the correct array # size can be allocated in memory video = cv2.VideoCapture(RTSP_URL) @@ -109,42 +117,45 @@ def main(): exit(1) video.release() - # create shared value for storing the time the frame was captured - # note: this must be a double even though the value you are storing - # is a float. otherwise it stops updating the value in shared - # memory. probably something to do with the size of the memory block - shared_frame_time = mp.Value('d', 0.0) - shared_frame_time2 = mp.Value('d', 0.0) + shared_memory_objects = [] + for region in regions: + shared_memory_objects.append({ + # create shared value for storing the time the frame was captured + # note: this must be a double even though the value you are storing + # is a float. otherwise it stops updating the value in shared + # memory. probably something to do with the size of the memory block + 'frame_time': mp.Value('d', 0.0), + # create shared array for storing 10 detected objects + 'output_array': mp.Array(ctypes.c_double, 6*10) + }) + # compute the flattened array length from the array shape flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2] # create shared array for storing the full frame image data shared_arr = mp.Array(ctypes.c_uint16, flat_array_length) # shape current frame so it can be treated as an image frame_arr = tonumpyarray(shared_arr).reshape(frame_shape) - # create shared array for storing 10 detected objects - shared_output_arr = mp.Array(ctypes.c_double, 6*10) - shared_output_arr2 = mp.Array(ctypes.c_double, 6*10) - capture_process = mp.Process(target=fetch_frames, args=(shared_arr, [shared_frame_time, shared_frame_time2], frame_shape)) + capture_process = mp.Process(target=fetch_frames, args=(shared_arr, [obj['frame_time'] for obj in shared_memory_objects], frame_shape)) capture_process.daemon = True - detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, - shared_frame_time, frame_shape, REGION_SIZE, REGION_X_OFFSET, REGION_Y_OFFSET)) - detection_process.daemon = True + detection_processes = [] + for index, region in enumerate(regions): + detection_process = mp.Process(target=process_frames, args=(shared_arr, + shared_memory_objects[index]['output_array'], + shared_memory_objects[index]['frame_time'], frame_shape, + region['size'], region['x_offset'], region['y_offset'])) + detection_process.daemon = True + detection_processes.append(detection_process) - detection_process2 = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr2, - shared_frame_time2, frame_shape, 1080, 0, 0)) - detection_process.daemon = True - - object_parser = ObjectParser([shared_output_arr, shared_output_arr2]) + object_parser = ObjectParser([obj['output_array'] for obj in shared_memory_objects]) object_parser.start() capture_process.start() print("capture_process pid ", capture_process.pid) - detection_process.start() - print("detection_process pid ", detection_process.pid) - detection_process2.start() - print("detection_process pid ", detection_process2.pid) + for detection_process in detection_processes: + detection_process.start() + print("detection_process pid ", detection_process.pid) app = Flask(__name__) @@ -175,7 +186,11 @@ def main(): thickness=2, display_str_list=["{}: {}%".format(obj['name'],int(obj['score']*100))], use_normalized_coordinates=False) - cv2.rectangle(frame, (REGION_X_OFFSET, REGION_Y_OFFSET), (REGION_X_OFFSET+REGION_SIZE, REGION_Y_OFFSET+REGION_SIZE), (255,255,255), 2) + + for region in regions: + cv2.rectangle(frame, (region['x_offset'], region['y_offset']), + (region['x_offset']+region['size'], region['y_offset']+region['size']), + (255,255,255), 2) # convert back to BGR frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # encode the image into a jpg @@ -186,8 +201,8 @@ def main(): app.run(host='0.0.0.0', debug=False) capture_process.join() - detection_process.join() - detection_process2.join() + for detection_process in detection_processes: + detection_process.join() object_parser.join() # convert shared memory array into numpy array From 4a77046c7c575954a7af394800b11cdaeac47519 Mon Sep 17 00:00:00 2001 From: blakeblackshear Date: Mon, 4 Feb 2019 07:10:42 -0600 Subject: [PATCH 09/11] update readme --- README.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 771fa775f..8b5dc53c2 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,12 @@ # Realtime Object Detection for RTSP Cameras +This results in a MJPEG stream with objects identified that has a lower latency than directly viewing the RTSP feed with VLC. - Prioritizes realtime processing over frames per second. Dropping frames is fine. - OpenCV runs in a separate process so it can grab frames as quickly as possible to ensure there aren't old frames in the buffer - Object detection with Tensorflow runs in a separate process and ignores frames that are more than 0.5 seconds old - Uses shared memory arrays for handing frames between processes - Provides a url for viewing the video feed at a hard coded ~5FPS as an mjpeg stream - Frames are only encoded into mjpeg stream when it is being viewed +- A process is created per detection region ## Getting Started Build the container with @@ -23,13 +25,18 @@ docker run -it --rm \ -v :/label_map.pbtext:ro \ -p 5000:5000 \ -e RTSP_URL='' \ +-e REGIONS=',,:,,' \ realtime-od:latest ``` Access the mjpeg stream at http://localhost:5000 +## Tips +- Lower the framerate of the RTSP feed on the camera to what you want to reduce the CPU usage for capturing the feed + ## Future improvements - MQTT messages when detected objects change - Dynamic changes to processing speed, ie. only process 1FPS unless motion detected -- Break incoming frame into multiple smaller images and run detection in parallel for lower latency (rather than input a lower resolution) -- Parallel processing to increase FPS \ No newline at end of file +- Parallel processing to increase FPS +- Look into GPU accelerated decoding of RTSP stream +- Send video over a socket and use JSMPEG \ No newline at end of file From 071fcf6ea217a2926173c58e874f717a07b0580a Mon Sep 17 00:00:00 2001 From: blakeblackshear Date: Sat, 9 Feb 2019 07:23:18 -0600 Subject: [PATCH 10/11] update opencv --- Dockerfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index c027df353..5fe568cc3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,17 +61,17 @@ RUN cd /usr/local/src/ \ RUN jupyter nbextension enable --py --sys-prefix widgetsnbextension # Download & build OpenCV -RUN wget -q -P /usr/local/src/ --no-check-certificate https://github.com/opencv/opencv/archive/3.4.1.zip +RUN wget -q -P /usr/local/src/ --no-check-certificate https://github.com/opencv/opencv/archive/4.0.1.zip RUN cd /usr/local/src/ \ - && unzip 3.4.1.zip \ - && rm 3.4.1.zip \ - && cd /usr/local/src/opencv-3.4.1/ \ + && unzip 4.0.1.zip \ + && rm 4.0.1.zip \ + && cd /usr/local/src/opencv-4.0.1/ \ && mkdir build \ - && cd /usr/local/src/opencv-3.4.1/build \ + && cd /usr/local/src/opencv-4.0.1/build \ && cmake -D CMAKE_INSTALL_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local/ .. \ && make -j4 \ && make install \ - && rm -rf /usr/local/src/opencv-3.4.1 + && rm -rf /usr/local/src/opencv-4.0.1 # Minimize image size RUN (apt-get autoremove -y; \ From 3e4256602fcd804df587604a5a35f8e185ce6cc2 Mon Sep 17 00:00:00 2001 From: blakeblackshear Date: Sat, 9 Feb 2019 07:23:54 -0600 Subject: [PATCH 11/11] update readme --- README.md | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8b5dc53c2..4f0003c50 100644 --- a/README.md +++ b/README.md @@ -33,10 +33,38 @@ Access the mjpeg stream at http://localhost:5000 ## Tips - Lower the framerate of the RTSP feed on the camera to what you want to reduce the CPU usage for capturing the feed +- Use SSDLite models ## Future improvements -- MQTT messages when detected objects change -- Dynamic changes to processing speed, ie. only process 1FPS unless motion detected -- Parallel processing to increase FPS -- Look into GPU accelerated decoding of RTSP stream -- Send video over a socket and use JSMPEG \ No newline at end of file +- [ ] Look for a subset of object types +- [ ] Try and simplify the tensorflow model to just look for the objects we care about +- [ ] MQTT messages when detected objects change +- [ ] Implement basic motion detection with opencv and only look for objects in the regions with detected motion +- [ ] Dynamic changes to processing speed, ie. only process 1FPS unless motion detected +- [x] Parallel processing to increase FPS +- [ ] Look into GPU accelerated decoding of RTSP stream +- [ ] Send video over a socket and use JSMPEG + +## Building Tensorflow from source for CPU optimizations +https://www.tensorflow.org/install/source#docker_linux_builds +used `tensorflow/tensorflow:1.12.0-devel-py3` + +## Optimizing the graph (cant say I saw much difference in CPU usage) +https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/graph_transforms/README.md#optimizing-for-deployment +``` +docker run -it -v ${PWD}:/lab -v ${PWD}/../back_camera_model/models/ssd_mobilenet_v2_coco_2018_03_29/frozen_inference_graph.pb:/frozen_inference_graph.pb:ro tensorflow/tensorflow:1.12.0-devel-py3 bash + +bazel build tensorflow/tools/graph_transforms:transform_graph + +bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ +--in_graph=/frozen_inference_graph.pb \ +--out_graph=/lab/optimized_inception_graph.pb \ +--inputs='image_tensor' \ +--outputs='num_detections,detection_scores,detection_boxes,detection_classes' \ +--transforms=' + strip_unused_nodes(type=float, shape="1,300,300,3") + remove_nodes(op=Identity, op=CheckNumerics) + fold_constants(ignore_errors=true) + fold_batch_norms + fold_old_batch_norms' +``` \ No newline at end of file