From 7e3d2f66112cafcfafd90276f2319e04f6b72144 Mon Sep 17 00:00:00 2001
From: blakeblackshear <blakeb@blakeshome.com>
Date: Fri, 1 Feb 2019 06:35:10 -0600
Subject: [PATCH 01/11] reduce CPU usage

---
 detect_objects.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/detect_objects.py b/detect_objects.py
index 237ef0d88..685cb94e8 100644
--- a/detect_objects.py
+++ b/detect_objects.py
@@ -193,6 +193,9 @@ def process_frames(shared_arr, shared_output_arr, shared_frame_time, frame_shape
             if no_frames_available > 0 and (datetime.datetime.now().timestamp() - no_frames_available) > 30:
                 time.sleep(1)
                 print("sleeping because no frames have been available in a while")
+            else:
+                # rest a little bit to avoid maxing out the CPU
+                time.sleep(0.01)
             continue
         
         # we got a valid frame, so reset the timer
@@ -202,6 +205,8 @@ def process_frames(shared_arr, shared_output_arr, shared_frame_time, frame_shape
         if (datetime.datetime.now().timestamp() - shared_frame_time.value) > 0.5:
             # signal that we need a new frame
             shared_frame_time.value = 0.0
+            # rest a little bit to avoid maxing out the CPU
+            time.sleep(0.01)
             continue
         
         # make a copy of the frame

From 98ce5a4a59f0ba155501ce7f7657a030b89aa810 Mon Sep 17 00:00:00 2001
From: blakeblackshear <blakeb@blakeshome.com>
Date: Fri, 1 Feb 2019 06:35:48 -0600
Subject: [PATCH 02/11] wip: just detect objects in a specific area

---
 detect_objects.py | 71 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 51 insertions(+), 20 deletions(-)

diff --git a/detect_objects.py b/detect_objects.py
index 685cb94e8..8c4ca2142 100644
--- a/detect_objects.py
+++ b/detect_objects.py
@@ -29,9 +29,9 @@ categories = label_map_util.convert_label_map_to_categories(label_map, max_num_c
                                                             use_display_name=True)
 category_index = label_map_util.create_category_index(categories)
 
-def detect_objects(image_np, sess, detection_graph):
+def detect_objects(cropped_frame, full_frame, sess, detection_graph):
     # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
-    image_np_expanded = np.expand_dims(image_np, axis=0)
+    image_np_expanded = np.expand_dims(cropped_frame, axis=0)
     image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
 
     # Each box represents a part of the image where a particular object was detected.
@@ -52,22 +52,41 @@ def detect_objects(image_np, sess, detection_graph):
     objects = []
     for index, value in enumerate(classes[0]):
         object_dict = {}
-        if scores[0, index] > 0.5:
+        if scores[0, index] > 0.1:
             object_dict[(category_index.get(value)).get('name').encode('utf8')] = \
                                 scores[0, index]
             objects.append(object_dict)
 
+    squeezed_boxes = np.squeeze(boxes)
+    squeezed_scores = np.squeeze(scores)
+
+    if(len(objects)>0):
+        # reposition bounding box based on full frame
+        for i, box in enumerate(squeezed_boxes):
+            if squeezed_scores[i] > .1:
+                ymin = ((box[0] * 300) + 200)/1080  # ymin
+                xmin = ((box[1] * 300) + 1300)/1920 # xmin
+                xmax = ((box[2] * 300) + 200)/1080  # ymax
+                ymax = ((box[3] * 300) + 1300)/1920 # xmax
+                print("ymin", box[0] * 300, ymin)
+                print("xmin", box[1] * 300, xmin)
+                print("ymax", box[2] * 300, ymax)
+                print("xmax", box[3] * 300, xmax)
+
     # draw boxes for detected objects on image
     vis_util.visualize_boxes_and_labels_on_image_array(
-        image_np,
-        np.squeeze(boxes),
+        cropped_frame,
+        squeezed_boxes,
         np.squeeze(classes).astype(np.int32),
-        np.squeeze(scores),
+        squeezed_scores,
         category_index,
         use_normalized_coordinates=True,
-        line_thickness=4)
+        line_thickness=4,
+        min_score_thresh=.1)
+    
+    # cv2.rectangle(full_frame, (800, 100), (1250, 550), (255,0,0), 2)
 
-    return objects, image_np
+    return objects, cropped_frame
 
 def main():
     # capture a single frame and check the frame shape so the correct array
@@ -88,18 +107,21 @@ def main():
     shared_frame_time = mp.Value('d', 0.0)
     # compute the flattened array length from the array shape
     flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2]
-    # create shared array for passing the image data from capture to detect_objects
+    # create shared array for storing the full frame image data
     shared_arr = mp.Array(ctypes.c_uint16, flat_array_length)
+    # create shared array for storing the cropped frame image data
+    # TODO: make dynamic
+    shared_cropped_arr = mp.Array(ctypes.c_uint16, 300*300*3)
     # create shared array for passing the image data from detect_objects to flask
-    shared_output_arr = mp.Array(ctypes.c_uint16, flat_array_length)
+    shared_output_arr = mp.Array(ctypes.c_uint16, 300*300*3)#flat_array_length)
     # create a numpy array with the image shape from the shared memory array
     # this is used by flask to output an mjpeg stream
-    frame_output_arr = tonumpyarray(shared_output_arr).reshape(frame_shape)
+    frame_output_arr = tonumpyarray(shared_output_arr).reshape(300,300,3)
 
-    capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_frame_time, frame_shape))
+    capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape))
     capture_process.daemon = True
 
-    detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, shared_frame_time, frame_shape))
+    detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_cropped_arr, shared_output_arr, shared_frame_time, frame_shape))
     detection_process.daemon = True
 
     capture_process.start()
@@ -119,9 +141,9 @@ def main():
             # max out at 5 FPS
             time.sleep(0.2)
             # convert back to BGR
-            frame_bgr = cv2.cvtColor(frame_output_arr, cv2.COLOR_RGB2BGR)
+            # frame_bgr = cv2.cvtColor(frame_output_arr, cv2.COLOR_RGB2BGR)
             # encode the image into a jpg
-            ret, jpg = cv2.imencode('.jpg', frame_bgr)
+            ret, jpg = cv2.imencode('.jpg', frame_output_arr)
             yield (b'--frame\r\n'
                 b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n')
 
@@ -136,9 +158,10 @@ def tonumpyarray(mp_arr):
 
 # fetch the frames as fast a possible, only decoding the frames when the
 # detection_process has consumed the current frame
-def fetch_frames(shared_arr, shared_frame_time, frame_shape):
+def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape):
     # convert shared memory array into numpy and shape into image array
     arr = tonumpyarray(shared_arr).reshape(frame_shape)
+    cropped_frame = tonumpyarray(shared_cropped_arr).reshape(300,300,3)
 
     # start the video capture
     video = cv2.VideoCapture(RTSP_URL)
@@ -158,6 +181,12 @@ def fetch_frames(shared_arr, shared_frame_time, frame_shape):
                 ret, frame = video.retrieve()
                 if ret:
                     # copy the frame into the numpy array
+                    # Position 1
+                    # cropped_frame[:] = frame[270:720, 550:1000]
+                    # Position 2
+                    # frame_cropped = frame[270:720, 100:550]
+                    # Car
+                    cropped_frame[:] = frame[200:500, 1300:1600]
                     arr[:] = frame
                     # signal to the detection_process by setting the shared_frame_time
                     shared_frame_time.value = frame_time.timestamp()
@@ -165,11 +194,12 @@ def fetch_frames(shared_arr, shared_frame_time, frame_shape):
     video.release()
 
 # do the actual object detection
-def process_frames(shared_arr, shared_output_arr, shared_frame_time, frame_shape):
+def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_frame_time, frame_shape):
     # shape shared input array into frame for processing
     arr = tonumpyarray(shared_arr).reshape(frame_shape)
+    shared_cropped_frame = tonumpyarray(shared_cropped_arr).reshape(300,300,3)
     # shape shared output array into frame so it can be copied into
-    output_arr = tonumpyarray(shared_output_arr).reshape(frame_shape)
+    output_arr = tonumpyarray(shared_output_arr).reshape(300,300,3)
 
     # Load a (frozen) Tensorflow model into memory before the processing loop
     detection_graph = tf.Graph()
@@ -211,14 +241,15 @@ def process_frames(shared_arr, shared_output_arr, shared_frame_time, frame_shape
         
         # make a copy of the frame
         frame = arr.copy()
+        cropped_frame = shared_cropped_frame.copy()
         frame_time = shared_frame_time.value
         # signal that the frame has been used so a new one will be ready
         shared_frame_time.value = 0.0
 
         # convert to RGB
-        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        cropped_frame_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB)
         # do the object detection
-        objects, frame_overlay = detect_objects(frame_rgb, sess, detection_graph)
+        objects, frame_overlay = detect_objects(cropped_frame_rgb, frame, sess, detection_graph)
         # copy the output frame with the bounding boxes to the output array
         output_arr[:] = frame_overlay
         if(len(objects) > 0):

From a976403edcbfc9c083682aa8ad5e7ec73da20b73 Mon Sep 17 00:00:00 2001
From: blakeblackshear <blakeb@blakeshome.com>
Date: Fri, 1 Feb 2019 06:57:03 -0600
Subject: [PATCH 03/11] got bounding boxes repositioned for full frame

---
 detect_objects.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/detect_objects.py b/detect_objects.py
index 8c4ca2142..bce8835d4 100644
--- a/detect_objects.py
+++ b/detect_objects.py
@@ -60,22 +60,21 @@ def detect_objects(cropped_frame, full_frame, sess, detection_graph):
     squeezed_boxes = np.squeeze(boxes)
     squeezed_scores = np.squeeze(scores)
 
+    full_frame_shape = full_frame.shape
+    cropped_frame_shape = cropped_frame.shape
+
     if(len(objects)>0):
         # reposition bounding box based on full frame
         for i, box in enumerate(squeezed_boxes):
-            if squeezed_scores[i] > .1:
-                ymin = ((box[0] * 300) + 200)/1080  # ymin
-                xmin = ((box[1] * 300) + 1300)/1920 # xmin
-                xmax = ((box[2] * 300) + 200)/1080  # ymax
-                ymax = ((box[3] * 300) + 1300)/1920 # xmax
-                print("ymin", box[0] * 300, ymin)
-                print("xmin", box[1] * 300, xmin)
-                print("ymax", box[2] * 300, ymax)
-                print("xmax", box[3] * 300, xmax)
+            if box[2] > 0:
+                squeezed_boxes[i][0] = ((box[0] * cropped_frame_shape[0]) + 200)/full_frame_shape[0]  # ymin
+                squeezed_boxes[i][1] = ((box[1] * cropped_frame_shape[0]) + 1300)/full_frame_shape[1] # xmin
+                squeezed_boxes[i][2] = ((box[2] * cropped_frame_shape[0]) + 200)/full_frame_shape[0]  # ymax
+                squeezed_boxes[i][3] = ((box[3] * cropped_frame_shape[0]) + 1300)/full_frame_shape[1]  # xmax
 
     # draw boxes for detected objects on image
     vis_util.visualize_boxes_and_labels_on_image_array(
-        cropped_frame,
+        full_frame,
         squeezed_boxes,
         np.squeeze(classes).astype(np.int32),
         squeezed_scores,
@@ -86,7 +85,7 @@ def detect_objects(cropped_frame, full_frame, sess, detection_graph):
     
     # cv2.rectangle(full_frame, (800, 100), (1250, 550), (255,0,0), 2)
 
-    return objects, cropped_frame
+    return objects, full_frame
 
 def main():
     # capture a single frame and check the frame shape so the correct array
@@ -113,10 +112,10 @@ def main():
     # TODO: make dynamic
     shared_cropped_arr = mp.Array(ctypes.c_uint16, 300*300*3)
     # create shared array for passing the image data from detect_objects to flask
-    shared_output_arr = mp.Array(ctypes.c_uint16, 300*300*3)#flat_array_length)
+    shared_output_arr = mp.Array(ctypes.c_uint16, flat_array_length)
     # create a numpy array with the image shape from the shared memory array
     # this is used by flask to output an mjpeg stream
-    frame_output_arr = tonumpyarray(shared_output_arr).reshape(300,300,3)
+    frame_output_arr = tonumpyarray(shared_output_arr).reshape(frame_shape)
 
     capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape))
     capture_process.daemon = True
@@ -199,7 +198,7 @@ def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_fra
     arr = tonumpyarray(shared_arr).reshape(frame_shape)
     shared_cropped_frame = tonumpyarray(shared_cropped_arr).reshape(300,300,3)
     # shape shared output array into frame so it can be copied into
-    output_arr = tonumpyarray(shared_output_arr).reshape(300,300,3)
+    output_arr = tonumpyarray(shared_output_arr).reshape(frame_shape)
 
     # Load a (frozen) Tensorflow model into memory before the processing loop
     detection_graph = tf.Graph()

From 11af9bb953feb5aef8eb6b99821e3116ad625d2d Mon Sep 17 00:00:00 2001
From: blakeblackshear <blakeb@blakeshome.com>
Date: Fri, 1 Feb 2019 21:38:13 -0600
Subject: [PATCH 04/11] wip: focus on dynamic region and delay drawing until
 viewing

---
 detect_objects.py | 90 +++++++++++++++++++----------------------------
 1 file changed, 36 insertions(+), 54 deletions(-)

diff --git a/detect_objects.py b/detect_objects.py
index bce8835d4..6affdc087 100644
--- a/detect_objects.py
+++ b/detect_objects.py
@@ -23,13 +23,17 @@ PATH_TO_LABELS = '/label_map.pbtext'
 # TODO: make dynamic?
 NUM_CLASSES = 90
 
+REGION_SIZE = 700
+REGION_X_OFFSET = 950
+REGION_Y_OFFSET = 380
+
 # Loading label map
 label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
 categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                             use_display_name=True)
 category_index = label_map_util.create_category_index(categories)
 
-def detect_objects(cropped_frame, full_frame, sess, detection_graph):
+def detect_objects(cropped_frame, sess, detection_graph):
     # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
     image_np_expanded = np.expand_dims(cropped_frame, axis=0)
     image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
@@ -51,41 +55,11 @@ def detect_objects(cropped_frame, full_frame, sess, detection_graph):
     # build an array of detected objects
     objects = []
     for index, value in enumerate(classes[0]):
-        object_dict = {}
-        if scores[0, index] > 0.1:
-            object_dict[(category_index.get(value)).get('name').encode('utf8')] = \
-                                scores[0, index]
-            objects.append(object_dict)
+        score = scores[0, index]
+        if score > 0.1:
+            objects += [value, scores[0, index]] + boxes[0, index].tolist()
 
-    squeezed_boxes = np.squeeze(boxes)
-    squeezed_scores = np.squeeze(scores)
-
-    full_frame_shape = full_frame.shape
-    cropped_frame_shape = cropped_frame.shape
-
-    if(len(objects)>0):
-        # reposition bounding box based on full frame
-        for i, box in enumerate(squeezed_boxes):
-            if box[2] > 0:
-                squeezed_boxes[i][0] = ((box[0] * cropped_frame_shape[0]) + 200)/full_frame_shape[0]  # ymin
-                squeezed_boxes[i][1] = ((box[1] * cropped_frame_shape[0]) + 1300)/full_frame_shape[1] # xmin
-                squeezed_boxes[i][2] = ((box[2] * cropped_frame_shape[0]) + 200)/full_frame_shape[0]  # ymax
-                squeezed_boxes[i][3] = ((box[3] * cropped_frame_shape[0]) + 1300)/full_frame_shape[1]  # xmax
-
-    # draw boxes for detected objects on image
-    vis_util.visualize_boxes_and_labels_on_image_array(
-        full_frame,
-        squeezed_boxes,
-        np.squeeze(classes).astype(np.int32),
-        squeezed_scores,
-        category_index,
-        use_normalized_coordinates=True,
-        line_thickness=4,
-        min_score_thresh=.1)
-    
-    # cv2.rectangle(full_frame, (800, 100), (1250, 550), (255,0,0), 2)
-
-    return objects, full_frame
+    return objects
 
 def main():
     # capture a single frame and check the frame shape so the correct array
@@ -108,14 +82,13 @@ def main():
     flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2]
     # create shared array for storing the full frame image data
     shared_arr = mp.Array(ctypes.c_uint16, flat_array_length)
+    # shape current frame so it can be treated as an image
+    frame_arr = tonumpyarray(shared_arr).reshape(frame_shape)
     # create shared array for storing the cropped frame image data
     # TODO: make dynamic
-    shared_cropped_arr = mp.Array(ctypes.c_uint16, 300*300*3)
+    shared_cropped_arr = mp.Array(ctypes.c_uint16, REGION_SIZE*REGION_SIZE*3)
     # create shared array for passing the image data from detect_objects to flask
-    shared_output_arr = mp.Array(ctypes.c_uint16, flat_array_length)
-    # create a numpy array with the image shape from the shared memory array
-    # this is used by flask to output an mjpeg stream
-    frame_output_arr = tonumpyarray(shared_output_arr).reshape(frame_shape)
+    shared_output_arr = mp.Array(ctypes.c_double, 6*10)
 
     capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape))
     capture_process.daemon = True
@@ -139,10 +112,23 @@ def main():
         while True:
             # max out at 5 FPS
             time.sleep(0.2)
-            # convert back to BGR
-            # frame_bgr = cv2.cvtColor(frame_output_arr, cv2.COLOR_RGB2BGR)
+            frame = frame_arr.copy()
+            # draw the bounding boxes on the screen
+            object_index = 0
+            while(object_index < 60 and shared_output_arr[object_index] > 0):
+                object_class = shared_output_arr[object_index]
+                score = shared_output_arr[object_index+1]
+                ymin = int(((shared_output_arr[object_index+2] * REGION_SIZE) + REGION_Y_OFFSET))
+                xmin = int(((shared_output_arr[object_index+3] * REGION_SIZE) + REGION_X_OFFSET))
+                ymax = int(((shared_output_arr[object_index+4] * REGION_SIZE) + REGION_Y_OFFSET))
+                xmax = int(((shared_output_arr[object_index+5] * REGION_SIZE) + REGION_X_OFFSET))
+                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255,0,0), 2)
+                object_index += 6
+                print(category_index.get(object_class).get('name').encode('utf8'), score)
             # encode the image into a jpg
-            ret, jpg = cv2.imencode('.jpg', frame_output_arr)
+
+            cv2.rectangle(frame, (REGION_X_OFFSET, REGION_Y_OFFSET), (REGION_X_OFFSET+REGION_SIZE, REGION_Y_OFFSET+REGION_SIZE), (255,255,255), 2)
+            ret, jpg = cv2.imencode('.jpg', frame)
             yield (b'--frame\r\n'
                 b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n')
 
@@ -160,7 +146,7 @@ def tonumpyarray(mp_arr):
 def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape):
     # convert shared memory array into numpy and shape into image array
     arr = tonumpyarray(shared_arr).reshape(frame_shape)
-    cropped_frame = tonumpyarray(shared_cropped_arr).reshape(300,300,3)
+    cropped_frame = tonumpyarray(shared_cropped_arr).reshape(REGION_SIZE,REGION_SIZE,3)
 
     # start the video capture
     video = cv2.VideoCapture(RTSP_URL)
@@ -185,7 +171,7 @@ def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape)
                     # Position 2
                     # frame_cropped = frame[270:720, 100:550]
                     # Car
-                    cropped_frame[:] = frame[200:500, 1300:1600]
+                    cropped_frame[:] = frame[REGION_Y_OFFSET:REGION_Y_OFFSET+REGION_SIZE, REGION_X_OFFSET:REGION_X_OFFSET+REGION_SIZE]
                     arr[:] = frame
                     # signal to the detection_process by setting the shared_frame_time
                     shared_frame_time.value = frame_time.timestamp()
@@ -196,9 +182,7 @@ def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape)
 def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_frame_time, frame_shape):
     # shape shared input array into frame for processing
     arr = tonumpyarray(shared_arr).reshape(frame_shape)
-    shared_cropped_frame = tonumpyarray(shared_cropped_arr).reshape(300,300,3)
-    # shape shared output array into frame so it can be copied into
-    output_arr = tonumpyarray(shared_output_arr).reshape(frame_shape)
+    shared_cropped_frame = tonumpyarray(shared_cropped_arr).reshape(REGION_SIZE,REGION_SIZE,3)
 
     # Load a (frozen) Tensorflow model into memory before the processing loop
     detection_graph = tf.Graph()
@@ -239,7 +223,7 @@ def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_fra
             continue
         
         # make a copy of the frame
-        frame = arr.copy()
+        # frame = arr.copy()
         cropped_frame = shared_cropped_frame.copy()
         frame_time = shared_frame_time.value
         # signal that the frame has been used so a new one will be ready
@@ -248,11 +232,9 @@ def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_fra
         # convert to RGB
         cropped_frame_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB)
         # do the object detection
-        objects, frame_overlay = detect_objects(cropped_frame_rgb, frame, sess, detection_graph)
-        # copy the output frame with the bounding boxes to the output array
-        output_arr[:] = frame_overlay
-        if(len(objects) > 0):
-            print(objects)
+        objects = detect_objects(cropped_frame_rgb, sess, detection_graph)
+        # copy the detected objects to the output array, filling the array when needed
+        shared_output_arr[:] = objects + [0.0] * (60-len(objects))
 
 if __name__ == '__main__':
     mp.freeze_support()

From b91c24bf8f0dda07e9d61eed234ce9c030418037 Mon Sep 17 00:00:00 2001
From: blakeblackshear <blakeb@blakeshome.com>
Date: Sat, 2 Feb 2019 08:16:35 -0600
Subject: [PATCH 05/11] crop the frame and calculate the coordinates in the
 subprocess and add labels to the image

---
 detect_objects.py | 70 +++++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 29 deletions(-)

diff --git a/detect_objects.py b/detect_objects.py
index 6affdc087..8718ed6ba 100644
--- a/detect_objects.py
+++ b/detect_objects.py
@@ -23,9 +23,9 @@ PATH_TO_LABELS = '/label_map.pbtext'
 # TODO: make dynamic?
 NUM_CLASSES = 90
 
-REGION_SIZE = 700
-REGION_X_OFFSET = 950
-REGION_Y_OFFSET = 380
+REGION_SIZE = 300
+REGION_X_OFFSET = 1250
+REGION_Y_OFFSET = 180
 
 # Loading label map
 label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
@@ -33,7 +33,7 @@ categories = label_map_util.convert_label_map_to_categories(label_map, max_num_c
                                                             use_display_name=True)
 category_index = label_map_util.create_category_index(categories)
 
-def detect_objects(cropped_frame, sess, detection_graph):
+def detect_objects(cropped_frame, sess, detection_graph, region_size, region_x_offset, region_y_offset):
     # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
     image_np_expanded = np.expand_dims(cropped_frame, axis=0)
     image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
@@ -57,7 +57,15 @@ def detect_objects(cropped_frame, sess, detection_graph):
     for index, value in enumerate(classes[0]):
         score = scores[0, index]
         if score > 0.1:
-            objects += [value, scores[0, index]] + boxes[0, index].tolist()
+            box = boxes[0, index].tolist()
+            box[0] = (box[0] * region_size) + region_y_offset
+            box[1] = (box[1] * region_size) + region_x_offset
+            box[2] = (box[2] * region_size) + region_y_offset
+            box[3] = (box[3] * region_size) + region_x_offset
+            objects += [value, scores[0, index]] + box
+        # only get the first 10 objects
+        if len(objects) = 60:
+            break
 
     return objects
 
@@ -84,16 +92,13 @@ def main():
     shared_arr = mp.Array(ctypes.c_uint16, flat_array_length)
     # shape current frame so it can be treated as an image
     frame_arr = tonumpyarray(shared_arr).reshape(frame_shape)
-    # create shared array for storing the cropped frame image data
-    # TODO: make dynamic
-    shared_cropped_arr = mp.Array(ctypes.c_uint16, REGION_SIZE*REGION_SIZE*3)
-    # create shared array for passing the image data from detect_objects to flask
+    # create shared array for storing 10 detected objects
     shared_output_arr = mp.Array(ctypes.c_double, 6*10)
 
-    capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape))
+    capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_frame_time, frame_shape))
     capture_process.daemon = True
 
-    detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_cropped_arr, shared_output_arr, shared_frame_time, frame_shape))
+    detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, shared_frame_time, frame_shape, REGION_SIZE, REGION_X_OFFSET, REGION_Y_OFFSET))
     detection_process.daemon = True
 
     capture_process.start()
@@ -113,21 +118,33 @@ def main():
             # max out at 5 FPS
             time.sleep(0.2)
             frame = frame_arr.copy()
+            # convert to RGB for drawing
+            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             # draw the bounding boxes on the screen
             object_index = 0
             while(object_index < 60 and shared_output_arr[object_index] > 0):
                 object_class = shared_output_arr[object_index]
+                object_name = str(category_index.get(object_class).get('name'))
                 score = shared_output_arr[object_index+1]
-                ymin = int(((shared_output_arr[object_index+2] * REGION_SIZE) + REGION_Y_OFFSET))
-                xmin = int(((shared_output_arr[object_index+3] * REGION_SIZE) + REGION_X_OFFSET))
-                ymax = int(((shared_output_arr[object_index+4] * REGION_SIZE) + REGION_Y_OFFSET))
-                xmax = int(((shared_output_arr[object_index+5] * REGION_SIZE) + REGION_X_OFFSET))
-                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255,0,0), 2)
+                display_str = '{}: {}%'.format(object_name, int(100*score))
+                ymin = int(shared_output_arr[object_index+2])
+                xmin = int(shared_output_arr[object_index+3])
+                ymax = int(shared_output_arr[object_index+4])
+                xmax = int(shared_output_arr[object_index+5])
+                vis_util.draw_bounding_box_on_image_array(frame,
+                    ymin,
+                    xmin,
+                    ymax,
+                    xmax,
+                    color='red',
+                    thickness=2,
+                    display_str_list=[display_str],
+                    use_normalized_coordinates=False)
                 object_index += 6
-                print(category_index.get(object_class).get('name').encode('utf8'), score)
-            # encode the image into a jpg
-
             cv2.rectangle(frame, (REGION_X_OFFSET, REGION_Y_OFFSET), (REGION_X_OFFSET+REGION_SIZE, REGION_Y_OFFSET+REGION_SIZE), (255,255,255), 2)
+            # convert back to BGR
+            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+            # encode the image into a jpg
             ret, jpg = cv2.imencode('.jpg', frame)
             yield (b'--frame\r\n'
                 b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n')
@@ -143,10 +160,9 @@ def tonumpyarray(mp_arr):
 
 # fetch the frames as fast a possible, only decoding the frames when the
 # detection_process has consumed the current frame
-def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape):
+def fetch_frames(shared_arr, shared_frame_time, frame_shape):
     # convert shared memory array into numpy and shape into image array
     arr = tonumpyarray(shared_arr).reshape(frame_shape)
-    cropped_frame = tonumpyarray(shared_cropped_arr).reshape(REGION_SIZE,REGION_SIZE,3)
 
     # start the video capture
     video = cv2.VideoCapture(RTSP_URL)
@@ -170,8 +186,6 @@ def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape)
                     # cropped_frame[:] = frame[270:720, 550:1000]
                     # Position 2
                     # frame_cropped = frame[270:720, 100:550]
-                    # Car
-                    cropped_frame[:] = frame[REGION_Y_OFFSET:REGION_Y_OFFSET+REGION_SIZE, REGION_X_OFFSET:REGION_X_OFFSET+REGION_SIZE]
                     arr[:] = frame
                     # signal to the detection_process by setting the shared_frame_time
                     shared_frame_time.value = frame_time.timestamp()
@@ -179,10 +193,9 @@ def fetch_frames(shared_arr, shared_cropped_arr, shared_frame_time, frame_shape)
     video.release()
 
 # do the actual object detection
-def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_frame_time, frame_shape):
+def process_frames(shared_arr, shared_output_arr, shared_frame_time, frame_shape, region_size, region_x_offset, region_y_offset):
     # shape shared input array into frame for processing
     arr = tonumpyarray(shared_arr).reshape(frame_shape)
-    shared_cropped_frame = tonumpyarray(shared_cropped_arr).reshape(REGION_SIZE,REGION_SIZE,3)
 
     # Load a (frozen) Tensorflow model into memory before the processing loop
     detection_graph = tf.Graph()
@@ -222,9 +235,8 @@ def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_fra
             time.sleep(0.01)
             continue
         
-        # make a copy of the frame
-        # frame = arr.copy()
-        cropped_frame = shared_cropped_frame.copy()
+        # make a copy of the cropped frame
+        cropped_frame = arr[region_y_offset:region_y_offset+region_size, region_x_offset:region_x_offset+region_size].copy()
         frame_time = shared_frame_time.value
         # signal that the frame has been used so a new one will be ready
         shared_frame_time.value = 0.0
@@ -232,7 +244,7 @@ def process_frames(shared_arr, shared_cropped_arr, shared_output_arr, shared_fra
         # convert to RGB
         cropped_frame_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB)
         # do the object detection
-        objects = detect_objects(cropped_frame_rgb, sess, detection_graph)
+        objects = detect_objects(cropped_frame_rgb, sess, detection_graph, region_size, region_x_offset, region_y_offset)
         # copy the detected objects to the output array, filling the array when needed
         shared_output_arr[:] = objects + [0.0] * (60-len(objects))
 

From 072997736c0468b183e9ac72d8287091f706a201 Mon Sep 17 00:00:00 2001
From: blakeblackshear <blakeb@blakeshome.com>
Date: Mon, 4 Feb 2019 06:18:49 -0600
Subject: [PATCH 06/11] parse the objects into a global array in a separate
 thread

---
 detect_objects.py | 68 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 46 insertions(+), 22 deletions(-)

diff --git a/detect_objects.py b/detect_objects.py
index 8718ed6ba..d23660b54 100644
--- a/detect_objects.py
+++ b/detect_objects.py
@@ -5,6 +5,7 @@ import datetime
 import ctypes
 import logging
 import multiprocessing as mp
+import threading
 from contextlib import closing
 import numpy as np
 import tensorflow as tf
@@ -27,6 +28,8 @@ REGION_SIZE = 300
 REGION_X_OFFSET = 1250
 REGION_Y_OFFSET = 180
 
+DETECTED_OBJECTS = []
+
 # Loading label map
 label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
 categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
@@ -64,11 +67,36 @@ def detect_objects(cropped_frame, sess, detection_graph, region_size, region_x_o
             box[3] = (box[3] * region_size) + region_x_offset
             objects += [value, scores[0, index]] + box
         # only get the first 10 objects
-        if len(objects) = 60:
+        if len(objects) == 60:
             break
 
     return objects
 
+class ObjectParser(threading.Thread):
+    def __init__(self, object_arrays):
+        threading.Thread.__init__(self)
+        self._object_arrays = object_arrays
+
+    def run(self):
+        global DETECTED_OBJECTS
+        while True:
+            detected_objects = []
+            for object_array in self._object_arrays:
+                object_index = 0
+                while(object_index < 60 and object_array[object_index] > 0):
+                    object_class = object_array[object_index]
+                    detected_objects.append({
+                        'name': str(category_index.get(object_class).get('name')),
+                        'score': object_array[object_index+1],
+                        'ymin': int(object_array[object_index+2]),
+                        'xmin': int(object_array[object_index+3]),
+                        'ymax': int(object_array[object_index+4]),
+                        'xmax': int(object_array[object_index+5])
+                    })
+                    object_index += 6
+            DETECTED_OBJECTS = detected_objects
+            time.sleep(0.01)
+
 def main():
     # capture a single frame and check the frame shape so the correct array
     # size can be allocated in memory
@@ -101,6 +129,9 @@ def main():
     detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, shared_frame_time, frame_shape, REGION_SIZE, REGION_X_OFFSET, REGION_Y_OFFSET))
     detection_process.daemon = True
 
+    object_parser = ObjectParser([shared_output_arr])
+    object_parser.start()
+
     capture_process.start()
     print("capture_process pid ", capture_process.pid)
     detection_process.start()
@@ -114,33 +145,27 @@ def main():
         return Response(imagestream(),
                         mimetype='multipart/x-mixed-replace; boundary=frame')
     def imagestream():
+        global DETECTED_OBJECTS
         while True:
             # max out at 5 FPS
             time.sleep(0.2)
+            # make a copy of the current detected objects
+            detected_objects = DETECTED_OBJECTS.copy()
+            # make a copy of the current frame
             frame = frame_arr.copy()
             # convert to RGB for drawing
             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             # draw the bounding boxes on the screen
-            object_index = 0
-            while(object_index < 60 and shared_output_arr[object_index] > 0):
-                object_class = shared_output_arr[object_index]
-                object_name = str(category_index.get(object_class).get('name'))
-                score = shared_output_arr[object_index+1]
-                display_str = '{}: {}%'.format(object_name, int(100*score))
-                ymin = int(shared_output_arr[object_index+2])
-                xmin = int(shared_output_arr[object_index+3])
-                ymax = int(shared_output_arr[object_index+4])
-                xmax = int(shared_output_arr[object_index+5])
+            for obj in DETECTED_OBJECTS:
                 vis_util.draw_bounding_box_on_image_array(frame,
-                    ymin,
-                    xmin,
-                    ymax,
-                    xmax,
+                    obj['ymin'],
+                    obj['xmin'],
+                    obj['ymax'],
+                    obj['xmax'],
                     color='red',
                     thickness=2,
-                    display_str_list=[display_str],
+                    display_str_list=["{}: {}%".format(obj['name'],int(obj['score']*100))],
                     use_normalized_coordinates=False)
-                object_index += 6
             cv2.rectangle(frame, (REGION_X_OFFSET, REGION_Y_OFFSET), (REGION_X_OFFSET+REGION_SIZE, REGION_Y_OFFSET+REGION_SIZE), (255,255,255), 2)
             # convert back to BGR
             frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
@@ -153,6 +178,7 @@ def main():
 
     capture_process.join()
     detection_process.join()
+    object_parser.join()
 
 # convert shared memory array into numpy array
 def tonumpyarray(mp_arr):
@@ -181,14 +207,12 @@ def fetch_frames(shared_arr, shared_frame_time, frame_shape):
                 # go ahead and decode the current frame
                 ret, frame = video.retrieve()
                 if ret:
-                    # copy the frame into the numpy array
-                    # Position 1
-                    # cropped_frame[:] = frame[270:720, 550:1000]
-                    # Position 2
-                    # frame_cropped = frame[270:720, 100:550]
                     arr[:] = frame
                     # signal to the detection_process by setting the shared_frame_time
                     shared_frame_time.value = frame_time.timestamp()
+            else:
+                # sleep a little to reduce CPU usage
+                time.sleep(0.01)
     
     video.release()
 

From 623a3044fb3a6e36b8207d9c0ab3543e8676e660 Mon Sep 17 00:00:00 2001
From: blakeblackshear <blakeb@blakeshome.com>
Date: Mon, 4 Feb 2019 06:34:32 -0600
Subject: [PATCH 07/11] multiple hard coded processes

---
 detect_objects.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/detect_objects.py b/detect_objects.py
index d23660b54..dbb21a07a 100644
--- a/detect_objects.py
+++ b/detect_objects.py
@@ -114,6 +114,7 @@ def main():
     #       is a float. otherwise it stops updating the value in shared
     #       memory. probably something to do with the size of the memory block
     shared_frame_time = mp.Value('d', 0.0)
+    shared_frame_time2 = mp.Value('d', 0.0)
     # compute the flattened array length from the array shape
     flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2]
     # create shared array for storing the full frame image data
@@ -122,20 +123,28 @@ def main():
     frame_arr = tonumpyarray(shared_arr).reshape(frame_shape)
     # create shared array for storing 10 detected objects
     shared_output_arr = mp.Array(ctypes.c_double, 6*10)
+    shared_output_arr2 = mp.Array(ctypes.c_double, 6*10)
 
-    capture_process = mp.Process(target=fetch_frames, args=(shared_arr, shared_frame_time, frame_shape))
+    capture_process = mp.Process(target=fetch_frames, args=(shared_arr, [shared_frame_time, shared_frame_time2], frame_shape))
     capture_process.daemon = True
 
-    detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, shared_frame_time, frame_shape, REGION_SIZE, REGION_X_OFFSET, REGION_Y_OFFSET))
+    detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, 
+        shared_frame_time, frame_shape, REGION_SIZE, REGION_X_OFFSET, REGION_Y_OFFSET))
     detection_process.daemon = True
 
-    object_parser = ObjectParser([shared_output_arr])
+    detection_process2 = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr2, 
+        shared_frame_time2, frame_shape, 1080, 0, 0))
+    detection_process.daemon = True
+
+    object_parser = ObjectParser([shared_output_arr, shared_output_arr2])
     object_parser.start()
 
     capture_process.start()
     print("capture_process pid ", capture_process.pid)
     detection_process.start()
     print("detection_process pid ", detection_process.pid)
+    detection_process2.start()
+    print("detection_process pid ", detection_process2.pid)
 
     app = Flask(__name__)
 
@@ -178,6 +187,7 @@ def main():
 
     capture_process.join()
     detection_process.join()
+    detection_process2.join()
     object_parser.join()
 
 # convert shared memory array into numpy array
@@ -186,7 +196,7 @@ def tonumpyarray(mp_arr):
 
 # fetch the frames as fast a possible, only decoding the frames when the
 # detection_process has consumed the current frame
-def fetch_frames(shared_arr, shared_frame_time, frame_shape):
+def fetch_frames(shared_arr, shared_frame_times, frame_shape):
     # convert shared memory array into numpy and shape into image array
     arr = tonumpyarray(shared_arr).reshape(frame_shape)
 
@@ -203,13 +213,14 @@ def fetch_frames(shared_arr, shared_frame_time, frame_shape):
         if ret:
             # if the detection_process is ready for the next frame decode it
             # otherwise skip this frame and move onto the next one
-            if shared_frame_time.value == 0.0:
+            if all(shared_frame_time.value == 0.0 for shared_frame_time in shared_frame_times):
                 # go ahead and decode the current frame
                 ret, frame = video.retrieve()
                 if ret:
                     arr[:] = frame
-                    # signal to the detection_process by setting the shared_frame_time
-                    shared_frame_time.value = frame_time.timestamp()
+                    # signal to the detection_processes by setting the shared_frame_time
+                    for shared_frame_time in shared_frame_times:
+                        shared_frame_time.value = frame_time.timestamp()
             else:
                 # sleep a little to reduce CPU usage
                 time.sleep(0.01)

From 0359e2d2a1831acd15e68e7113ffd941e68ce16a Mon Sep 17 00:00:00 2001
From: blakeblackshear <blakeb@blakeshome.com>
Date: Mon, 4 Feb 2019 07:07:13 -0600
Subject: [PATCH 08/11] dynamic number of processes based on selected regions

---
 detect_objects.py | 71 ++++++++++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 28 deletions(-)

diff --git a/detect_objects.py b/detect_objects.py
index dbb21a07a..d5625418a 100644
--- a/detect_objects.py
+++ b/detect_objects.py
@@ -24,9 +24,8 @@ PATH_TO_LABELS = '/label_map.pbtext'
 # TODO: make dynamic?
 NUM_CLASSES = 90
 
-REGION_SIZE = 300
-REGION_X_OFFSET = 1250
-REGION_Y_OFFSET = 180
+#REGIONS = "600,0,380:600,600,380:600,1200,380"
+REGIONS = os.getenv('REGIONS')
 
 DETECTED_OBJECTS = []
 
@@ -98,6 +97,15 @@ class ObjectParser(threading.Thread):
             time.sleep(0.01)
 
 def main():
+    # Parse selected regions
+    regions = []
+    for region_string in REGIONS.split(':'):
+        region_parts = region_string.split(',')
+        regions.append({
+            'size': int(region_parts[0]),
+            'x_offset': int(region_parts[1]),
+            'y_offset': int(region_parts[2])
+        })
     # capture a single frame and check the frame shape so the correct array
     # size can be allocated in memory
     video = cv2.VideoCapture(RTSP_URL)
@@ -109,42 +117,45 @@ def main():
         exit(1)
     video.release()
 
-    # create shared value for storing the time the frame was captured
-    # note: this must be a double even though the value you are storing
-    #       is a float. otherwise it stops updating the value in shared
-    #       memory. probably something to do with the size of the memory block
-    shared_frame_time = mp.Value('d', 0.0)
-    shared_frame_time2 = mp.Value('d', 0.0)
+    shared_memory_objects = []
+    for region in regions:
+        shared_memory_objects.append({
+            # create shared value for storing the time the frame was captured
+            # note: this must be a double even though the value you are storing
+            #       is a float. otherwise it stops updating the value in shared
+            #       memory. probably something to do with the size of the memory block
+            'frame_time': mp.Value('d', 0.0),
+            # create shared array for storing 10 detected objects
+            'output_array': mp.Array(ctypes.c_double, 6*10)
+        })
+        
     # compute the flattened array length from the array shape
     flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2]
     # create shared array for storing the full frame image data
     shared_arr = mp.Array(ctypes.c_uint16, flat_array_length)
     # shape current frame so it can be treated as an image
     frame_arr = tonumpyarray(shared_arr).reshape(frame_shape)
-    # create shared array for storing 10 detected objects
-    shared_output_arr = mp.Array(ctypes.c_double, 6*10)
-    shared_output_arr2 = mp.Array(ctypes.c_double, 6*10)
 
-    capture_process = mp.Process(target=fetch_frames, args=(shared_arr, [shared_frame_time, shared_frame_time2], frame_shape))
+    capture_process = mp.Process(target=fetch_frames, args=(shared_arr, [obj['frame_time'] for obj in shared_memory_objects], frame_shape))
     capture_process.daemon = True
 
-    detection_process = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr, 
-        shared_frame_time, frame_shape, REGION_SIZE, REGION_X_OFFSET, REGION_Y_OFFSET))
-    detection_process.daemon = True
+    detection_processes = []
+    for index, region in enumerate(regions):
+        detection_process = mp.Process(target=process_frames, args=(shared_arr, 
+            shared_memory_objects[index]['output_array'], 
+            shared_memory_objects[index]['frame_time'], frame_shape, 
+            region['size'], region['x_offset'], region['y_offset']))
+        detection_process.daemon = True
+        detection_processes.append(detection_process)
 
-    detection_process2 = mp.Process(target=process_frames, args=(shared_arr, shared_output_arr2, 
-        shared_frame_time2, frame_shape, 1080, 0, 0))
-    detection_process.daemon = True
-
-    object_parser = ObjectParser([shared_output_arr, shared_output_arr2])
+    object_parser = ObjectParser([obj['output_array'] for obj in shared_memory_objects])
     object_parser.start()
 
     capture_process.start()
     print("capture_process pid ", capture_process.pid)
-    detection_process.start()
-    print("detection_process pid ", detection_process.pid)
-    detection_process2.start()
-    print("detection_process pid ", detection_process2.pid)
+    for detection_process in detection_processes:
+        detection_process.start()
+        print("detection_process pid ", detection_process.pid)
 
     app = Flask(__name__)
 
@@ -175,7 +186,11 @@ def main():
                     thickness=2,
                     display_str_list=["{}: {}%".format(obj['name'],int(obj['score']*100))],
                     use_normalized_coordinates=False)
-            cv2.rectangle(frame, (REGION_X_OFFSET, REGION_Y_OFFSET), (REGION_X_OFFSET+REGION_SIZE, REGION_Y_OFFSET+REGION_SIZE), (255,255,255), 2)
+
+            for region in regions:
+                cv2.rectangle(frame, (region['x_offset'], region['y_offset']), 
+                    (region['x_offset']+region['size'], region['y_offset']+region['size']), 
+                    (255,255,255), 2)
             # convert back to BGR
             frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
             # encode the image into a jpg
@@ -186,8 +201,8 @@ def main():
     app.run(host='0.0.0.0', debug=False)
 
     capture_process.join()
-    detection_process.join()
-    detection_process2.join()
+    for detection_process in detection_processes:
+        detection_process.join()
     object_parser.join()
 
 # convert shared memory array into numpy array

From 4a77046c7c575954a7af394800b11cdaeac47519 Mon Sep 17 00:00:00 2001
From: blakeblackshear <blakeb@blakeshome.com>
Date: Mon, 4 Feb 2019 07:10:42 -0600
Subject: [PATCH 09/11] update readme

---
 README.md | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 771fa775f..8b5dc53c2 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,12 @@
 # Realtime Object Detection for RTSP Cameras
+This results in a MJPEG stream with objects identified that has a lower latency than directly viewing the RTSP feed with VLC.
 - Prioritizes realtime processing over frames per second. Dropping frames is fine.
 - OpenCV runs in a separate process so it can grab frames as quickly as possible to ensure there aren't old frames in the buffer
 - Object detection with Tensorflow runs in a separate process and ignores frames that are more than 0.5 seconds old
 - Uses shared memory arrays for handing frames between processes
 - Provides a url for viewing the video feed at a hard coded ~5FPS as an mjpeg stream
 - Frames are only encoded into mjpeg stream when it is being viewed
+- A process is created per detection region
 
 ## Getting Started
 Build the container with
@@ -23,13 +25,18 @@ docker run -it --rm \
 -v <path_to_labelmap.pbtext>:/label_map.pbtext:ro \
 -p 5000:5000 \
 -e RTSP_URL='<rtsp_url>' \
+-e REGIONS='<box_size_1>,<x_offset_1>,<y_offset_1>:<box_size_2>,<x_offset_2>,<y_offset_2>' \
 realtime-od:latest
 ```
 
 Access the mjpeg stream at http://localhost:5000
 
+## Tips
+- Lower the framerate of the RTSP feed on the camera to what you want to reduce the CPU usage for capturing the feed
+
 ## Future improvements
 - MQTT messages when detected objects change
 - Dynamic changes to processing speed, ie. only process 1FPS unless motion detected
-- Break incoming frame into multiple smaller images and run detection in parallel for lower latency (rather than input a lower resolution)
-- Parallel processing to increase FPS
\ No newline at end of file
+- Parallel processing to increase FPS
+- Look into GPU accelerated decoding of RTSP stream
+- Send video over a socket and use JSMPEG
\ No newline at end of file

From 071fcf6ea217a2926173c58e874f717a07b0580a Mon Sep 17 00:00:00 2001
From: blakeblackshear <blakeb@blakeshome.com>
Date: Sat, 9 Feb 2019 07:23:18 -0600
Subject: [PATCH 10/11] update opencv

---
 Dockerfile | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index c027df353..5fe568cc3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -61,17 +61,17 @@ RUN cd /usr/local/src/ \
 RUN jupyter nbextension enable --py --sys-prefix widgetsnbextension
 
 # Download & build OpenCV
-RUN wget -q -P /usr/local/src/ --no-check-certificate https://github.com/opencv/opencv/archive/3.4.1.zip
+RUN wget -q -P /usr/local/src/ --no-check-certificate https://github.com/opencv/opencv/archive/4.0.1.zip
 RUN cd /usr/local/src/ \
- && unzip 3.4.1.zip \
- && rm 3.4.1.zip \
- && cd /usr/local/src/opencv-3.4.1/ \
+ && unzip 4.0.1.zip \
+ && rm 4.0.1.zip \
+ && cd /usr/local/src/opencv-4.0.1/ \
  && mkdir build \
- && cd /usr/local/src/opencv-3.4.1/build \ 
+ && cd /usr/local/src/opencv-4.0.1/build \ 
  && cmake -D CMAKE_INSTALL_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local/ .. \
  && make -j4 \
  && make install \
- && rm -rf /usr/local/src/opencv-3.4.1
+ && rm -rf /usr/local/src/opencv-4.0.1
 
 # Minimize image size 
 RUN (apt-get autoremove -y; \

From 3e4256602fcd804df587604a5a35f8e185ce6cc2 Mon Sep 17 00:00:00 2001
From: blakeblackshear <blakeb@blakeshome.com>
Date: Sat, 9 Feb 2019 07:23:54 -0600
Subject: [PATCH 11/11] update readme

---
 README.md | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 8b5dc53c2..4f0003c50 100644
--- a/README.md
+++ b/README.md
@@ -33,10 +33,38 @@ Access the mjpeg stream at http://localhost:5000
 
 ## Tips
 - Lower the framerate of the RTSP feed on the camera to what you want to reduce the CPU usage for capturing the feed
+- Use SSDLite models
 
 ## Future improvements
-- MQTT messages when detected objects change
-- Dynamic changes to processing speed, ie. only process 1FPS unless motion detected
-- Parallel processing to increase FPS
-- Look into GPU accelerated decoding of RTSP stream
-- Send video over a socket and use JSMPEG
\ No newline at end of file
+- [ ] Look for a subset of object types
+- [ ] Try and simplify the tensorflow model to just look for the objects we care about
+- [ ] MQTT messages when detected objects change
+- [ ] Implement basic motion detection with opencv and only look for objects in the regions with detected motion
+- [ ] Dynamic changes to processing speed, ie. only process 1FPS unless motion detected
+- [x] Parallel processing to increase FPS
+- [ ] Look into GPU accelerated decoding of RTSP stream
+- [ ] Send video over a socket and use JSMPEG
+
+## Building Tensorflow from source for CPU optimizations
+https://www.tensorflow.org/install/source#docker_linux_builds
+used `tensorflow/tensorflow:1.12.0-devel-py3`
+
+## Optimizing the graph (cant say I saw much difference in CPU usage)
+https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/graph_transforms/README.md#optimizing-for-deployment
+```
+docker run -it -v ${PWD}:/lab -v ${PWD}/../back_camera_model/models/ssd_mobilenet_v2_coco_2018_03_29/frozen_inference_graph.pb:/frozen_inference_graph.pb:ro tensorflow/tensorflow:1.12.0-devel-py3 bash
+
+bazel build tensorflow/tools/graph_transforms:transform_graph
+
+bazel-bin/tensorflow/tools/graph_transforms/transform_graph \
+--in_graph=/frozen_inference_graph.pb \
+--out_graph=/lab/optimized_inception_graph.pb \
+--inputs='image_tensor' \
+--outputs='num_detections,detection_scores,detection_boxes,detection_classes' \
+--transforms='
+  strip_unused_nodes(type=float, shape="1,300,300,3")
+  remove_nodes(op=Identity, op=CheckNumerics)
+  fold_constants(ignore_errors=true)
+  fold_batch_norms
+  fold_old_batch_norms'
+```
\ No newline at end of file