diff --git a/Dockerfile b/Dockerfile index e061f1a40..2fbbdd453 100644 --- a/Dockerfile +++ b/Dockerfile @@ -101,7 +101,8 @@ RUN pip install -U pip \ Flask \ paho-mqtt \ PyYAML \ - matplotlib + matplotlib \ + scipy WORKDIR /opt/frigate/ ADD frigate frigate/ diff --git a/frigate/mqtt.py b/frigate/mqtt.py index 120a79760..9299e319d 100644 --- a/frigate/mqtt.py +++ b/frigate/mqtt.py @@ -3,6 +3,7 @@ import cv2 import threading import prctl from collections import Counter, defaultdict +import itertools class MqttObjectPublisher(threading.Thread): def __init__(self, client, topic_prefix, objects_parsed, detected_objects, best_frames): @@ -26,7 +27,7 @@ class MqttObjectPublisher(threading.Thread): # total up all scores by object type obj_counter = Counter() - for obj in detected_objects: + for obj in itertools.chain.from_iterable(detected_objects.values()): obj_counter[obj['name']] += obj['score'] # report on detected objects diff --git a/frigate/object_detection.py b/frigate/object_detection.py index 4a510c1c1..a7fb67765 100644 --- a/frigate/object_detection.py +++ b/frigate/object_detection.py @@ -31,7 +31,7 @@ class PreppedQueueProcessor(threading.Thread): frame = self.prepped_frame_queue.get() # Actual detection. - frame['detected_objects'] = self.engine.DetectWithInputTensor(frame['frame'], threshold=0.5, top_k=5) + frame['detected_objects'] = self.engine.DetectWithInputTensor(frame['frame'], threshold=0.4, top_k=5) self.fps.update() self.avg_inference_speed = (self.avg_inference_speed*9 + self.engine.get_inference_time())/10 @@ -56,8 +56,10 @@ class RegionRequester(threading.Thread): # make a copy of the frame_time frame_time = self.camera.frame_time.value + with self.camera.regions_in_process_lock: + self.camera.regions_in_process[frame_time] = len(self.camera.config['regions']) + for index, region in enumerate(self.camera.config['regions']): - # queue with priority 1 self.camera.resize_queue.put({ 'camera_name': self.camera.name, 'frame_time': frame_time, @@ -88,14 +90,14 @@ class RegionPrepper(threading.Thread): # make a copy of the region cropped_frame = frame[resize_request['y_offset']:resize_request['y_offset']+resize_request['size'], resize_request['x_offset']:resize_request['x_offset']+resize_request['size']].copy() - + # Resize to 300x300 if needed if cropped_frame.shape != (300, 300, 3): + # TODO: use Pillow-SIMD? cropped_frame = cv2.resize(cropped_frame, dsize=(300, 300), interpolation=cv2.INTER_LINEAR) # Expand dimensions since the model expects images to have shape: [1, 300, 300, 3] frame_expanded = np.expand_dims(cropped_frame, axis=0) # add the frame to the queue - if not self.prepped_frame_queue.full(): - resize_request['frame'] = frame_expanded.flatten().copy() - self.prepped_frame_queue.put(resize_request) \ No newline at end of file + resize_request['frame'] = frame_expanded.flatten().copy() + self.prepped_frame_queue.put(resize_request) \ No newline at end of file diff --git a/frigate/objects.py b/frigate/objects.py index e635a466d..cb8003146 100644 --- a/frigate/objects.py +++ b/frigate/objects.py @@ -3,8 +3,10 @@ import datetime import threading import cv2 import prctl +import itertools import numpy as np -from . util import draw_box_with_label, LABELS +from scipy.spatial import distance as dist +from . util import draw_box_with_label, LABELS, compute_intersection_rectangle, compute_intersection_over_union, calculate_region class ObjectCleaner(threading.Thread): def __init__(self, objects_parsed, detected_objects): @@ -25,14 +27,13 @@ class ObjectCleaner(threading.Thread): # (newest objects are appended to the end) detected_objects = self._detected_objects.copy() - num_to_delete = 0 - for obj in detected_objects: - if now-obj['frame_time']<2: - break - num_to_delete += 1 - if num_to_delete > 0: - del self._detected_objects[:num_to_delete] + objects_removed = False + for frame_time in detected_objects.keys(): + if now-frame_time>2: + del self._detected_objects[frame_time] + objects_removed = True + if objects_removed: # notify that parsed objects were changed with self._objects_parsed: self._objects_parsed.notify_all() @@ -49,88 +50,459 @@ class DetectedObjectsProcessor(threading.Thread): objects = frame['detected_objects'] - if len(objects) == 0: - return + # print(f"Processing objects for: {frame['size']} {frame['x_offset']} {frame['y_offset']}") + + # if len(objects) == 0: + # continue for raw_obj in objects: obj = { - 'score': float(raw_obj.score), - 'box': raw_obj.bounding_box.flatten().tolist(), 'name': str(LABELS[raw_obj.label_id]), + 'score': float(raw_obj.score), + 'box': { + 'xmin': int((raw_obj.bounding_box[0][0] * frame['size']) + frame['x_offset']), + 'ymin': int((raw_obj.bounding_box[0][1] * frame['size']) + frame['y_offset']), + 'xmax': int((raw_obj.bounding_box[1][0] * frame['size']) + frame['x_offset']), + 'ymax': int((raw_obj.bounding_box[1][1] * frame['size']) + frame['y_offset']) + }, + 'region': { + 'xmin': frame['x_offset'], + 'ymin': frame['y_offset'], + 'xmax': frame['x_offset']+frame['size'], + 'ymax': frame['y_offset']+frame['size'] + }, 'frame_time': frame['frame_time'], 'region_id': frame['region_id'] } - # find the matching region - region = self.camera.regions[frame['region_id']] - - # Compute some extra properties - obj.update({ - 'xmin': int((obj['box'][0] * frame['size']) + frame['x_offset']), - 'ymin': int((obj['box'][1] * frame['size']) + frame['y_offset']), - 'xmax': int((obj['box'][2] * frame['size']) + frame['x_offset']), - 'ymax': int((obj['box'][3] * frame['size']) + frame['y_offset']) - }) + if not obj['name'] == 'bicycle': + continue + + # if the object is within 5 pixels of the region border, and the region is not on the edge + # consider the object to be clipped + obj['clipped'] = False + if ((obj['region']['xmin'] > 5 and obj['box']['xmin']-obj['region']['xmin'] <= 5) or + (obj['region']['ymin'] > 5 and obj['box']['ymin']-obj['region']['ymin'] <= 5) or + (self.camera.frame_shape[1]-obj['region']['xmax'] > 5 and obj['region']['xmax']-obj['box']['xmax'] <= 5) or + (self.camera.frame_shape[0]-obj['region']['ymax'] > 5 and obj['region']['ymax']-obj['box']['ymax'] <= 5)): + obj['clipped'] = True # Compute the area - obj['area'] = (obj['xmax']-obj['xmin'])*(obj['ymax']-obj['ymin']) + obj['area'] = (obj['box']['xmax']-obj['box']['xmin'])*(obj['box']['ymax']-obj['box']['ymin']) - object_name = obj['name'] + # find the matching region + # region = self.camera.regions[frame['region_id']] + - if object_name in region['objects']: - obj_settings = region['objects'][object_name] + # object_name = obj['name'] + # TODO: move all this to wherever we manage "tracked objects" + # if object_name in region['objects']: + # obj_settings = region['objects'][object_name] - # if the min area is larger than the - # detected object, don't add it to detected objects - if obj_settings.get('min_area',-1) > obj['area']: - continue + # # if the min area is larger than the + # # detected object, don't add it to detected objects + # if obj_settings.get('min_area',-1) > obj['area']: + # continue - # if the detected object is larger than the - # max area, don't add it to detected objects - if obj_settings.get('max_area', region['size']**2) < obj['area']: - continue + # # if the detected object is larger than the + # # max area, don't add it to detected objects + # if obj_settings.get('max_area', region['size']**2) < obj['area']: + # continue - # if the score is lower than the threshold, skip - if obj_settings.get('threshold', 0) > obj['score']: - continue + # # if the score is lower than the threshold, skip + # if obj_settings.get('threshold', 0) > obj['score']: + # continue - # compute the coordinates of the object and make sure - # the location isnt outside the bounds of the image (can happen from rounding) - y_location = min(int(obj['ymax']), len(self.mask)-1) - x_location = min(int((obj['xmax']-obj['xmin'])/2.0)+obj['xmin'], len(self.mask[0])-1) + # # compute the coordinates of the object and make sure + # # the location isnt outside the bounds of the image (can happen from rounding) + # y_location = min(int(obj['ymax']), len(self.mask)-1) + # x_location = min(int((obj['xmax']-obj['xmin'])/2.0)+obj['xmin'], len(self.mask[0])-1) - # if the object is in a masked location, don't add it to detected objects - if self.camera.mask[y_location][x_location] == [0]: - continue - - # look to see if the bounding box is too close to the region border and the region border is not the edge of the frame - # if ((frame['x_offset'] > 0 and obj['box'][0] < 0.01) or - # (frame['y_offset'] > 0 and obj['box'][1] < 0.01) or - # (frame['x_offset']+frame['size'] < self.frame_shape[1] and obj['box'][2] > 0.99) or - # (frame['y_offset']+frame['size'] < self.frame_shape[0] and obj['box'][3] > 0.99)): + # # if the object is in a masked location, don't add it to detected objects + # if self.camera.mask[y_location][x_location] == [0]: + # continue - # size, x_offset, y_offset = calculate_region(self.frame_shape, obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']) - # This triggers WAY too often with stationary objects on the edge of a region. - # Every frame triggers it and fills the queue... - # I need to create a new region and add it to the list of regions, but - # it needs to check for a duplicate region first. + # see if the current object is a duplicate + # TODO: still need to decide which copy to keep + obj['duplicate'] = False + for existing_obj in self.camera.detected_objects[frame['frame_time']]: + # compute intersection rectangle with existing object and new objects region + existing_obj_current_region = compute_intersection_rectangle(existing_obj['box'], obj['region']) - # self.resize_queue.put({ - # 'camera_name': self.name, - # 'frame_time': frame['frame_time'], - # 'region_id': frame['region_id'], - # 'size': size, - # 'x_offset': x_offset, - # 'y_offset': y_offset - # }) - # print('object too close to region border') - #continue + # compute intersection rectangle with new object and existing objects region + new_obj_existing_region = compute_intersection_rectangle(obj['box'], existing_obj['region']) - self.camera.detected_objects.append(obj) + # compute iou for the two intersection rectangles that were just computed + iou = compute_intersection_over_union(existing_obj_current_region, new_obj_existing_region) + + # if intersection is greater than ?, flag as duplicate + if iou > .7: + obj['duplicate'] = True + break + + self.camera.detected_objects[frame['frame_time']].append(obj) + + with self.camera.regions_in_process_lock: + self.camera.regions_in_process[frame['frame_time']] -= 1 + # print(f"Remaining regions for {frame['frame_time']}: {self.camera.regions_in_process[frame['frame_time']]}") + + if self.camera.regions_in_process[frame['frame_time']] == 0: + del self.camera.regions_in_process[frame['frame_time']] + # print('Finished frame: ', frame['frame_time']) + self.camera.finished_frame_queue.put(frame['frame_time']) with self.camera.objects_parsed: self.camera.objects_parsed.notify_all() +# Thread that checks finished frames for clipped objects and sends back +# for processing if needed +class RegionRefiner(threading.Thread): + def __init__(self, camera): + threading.Thread.__init__(self) + self.camera = camera + + def run(self): + prctl.set_name(self.__class__.__name__) + while True: + # TODO: I need to process the frames in order for tracking... + frame_time = self.camera.finished_frame_queue.get() + + # print(f"{frame_time} finished") + + object_groups = [] + + # group all the duplicate objects together + # TODO: should I be grouping by object type too? also, the order can determine how well they group... + for new_obj in self.camera.detected_objects[frame_time]: + matching_group = self.find_group(new_obj, object_groups) + if matching_group is None: + object_groups.append([new_obj]) + else: + object_groups[matching_group].append(new_obj) + + # just keep the unclipped objects + self.camera.detected_objects[frame_time] = [obj for obj in self.camera.detected_objects[frame_time] if obj['clipped'] == False] + + # print(f"{frame_time} found {len(object_groups)} groups {object_groups}") + clipped_object = False + # deduped_objects = [] + # find the largest unclipped object in each group + for group in object_groups: + unclipped_objects = [obj for obj in group if obj['clipped'] == False] + # if no unclipped objects, we need to look again + if len(unclipped_objects) == 0: + # print(f"{frame_time} no unclipped objects in group") + with self.camera.regions_in_process_lock: + if not frame_time in self.camera.regions_in_process: + self.camera.regions_in_process[frame_time] = 1 + else: + self.camera.regions_in_process[frame_time] += 1 + xmin = min([obj['box']['xmin'] for obj in group]) + ymin = min([obj['box']['ymin'] for obj in group]) + xmax = max([obj['box']['xmax'] for obj in group]) + ymax = max([obj['box']['ymax'] for obj in group]) + # calculate a new region that will hopefully get the entire object + (size, x_offset, y_offset) = calculate_region(self.camera.frame_shape, + xmin, ymin, + xmax, ymax) + # print(f"{frame_time} new region: {size} {x_offset} {y_offset}") + + # add it to the queue + self.camera.resize_queue.put({ + 'camera_name': self.camera.name, + 'frame_time': frame_time, + 'region_id': -1, + 'size': size, + 'x_offset': x_offset, + 'y_offset': y_offset + }) + self.camera.dynamic_region_fps.update() + clipped_object = True + + # add the largest unclipped object + # TODO: this makes no sense + # deduped_objects.append(max(unclipped_objects, key=lambda obj: obj['area'])) + + # if we found a clipped object, then this frame is not ready for processing + if clipped_object: + continue + + # print(f"{frame_time} is actually finished") + # self.camera.detected_objects[frame_time] = deduped_objects + + # keep adding frames to the refined queue as long as they are finished + with self.camera.regions_in_process_lock: + while self.camera.frame_queue.qsize() > 0 and self.camera.frame_queue.queue[0] not in self.camera.regions_in_process: + self.camera.refined_frame_queue.put(self.camera.frame_queue.get()) + + def has_overlap(self, new_obj, obj, overlap=0): + # compute intersection rectangle with existing object and new objects region + existing_obj_current_region = compute_intersection_rectangle(obj['box'], new_obj['region']) + + # compute intersection rectangle with new object and existing objects region + new_obj_existing_region = compute_intersection_rectangle(new_obj['box'], obj['region']) + + # compute iou for the two intersection rectangles that were just computed + iou = compute_intersection_over_union(existing_obj_current_region, new_obj_existing_region) + + # if intersection is greater than overlap + if iou > overlap: + return True + else: + return False + + def find_group(self, new_obj, groups): + for index, group in enumerate(groups): + for obj in group: + if self.has_overlap(new_obj, obj): + return index + return None + +class ObjectTracker(threading.Thread): + def __init__(self, camera, max_disappeared): + threading.Thread.__init__(self) + self.camera = camera + self.tracked_objects = {} + self.disappeared = {} + self.max_disappeared = max_disappeared + + def run(self): + prctl.set_name(self.__class__.__name__) + while True: + # TODO: track objects + frame_time = self.camera.refined_frame_queue.get() + f = open(f"/debug/{str(frame_time)}.jpg", 'wb') + f.write(self.camera.frame_with_objects(frame_time)) + f.close() + + + def register(self, index, obj): + id = f"{str(obj.frame_time)}-{index}" + self.tracked_objects[id] = obj + self.disappeared[id] = 0 + + def deregister(self, id): + del self.disappeared[id] + del self.tracked_objects[id] + + def update(self, id, new_obj): + new_obj.detections = self.tracked_objects[id].detections + new_obj.detections.append({ + + }) + + def match_and_update(self, new_objects): + # check to see if the list of input bounding box rectangles + # is empty + if len(new_objects) == 0: + # loop over any existing tracked objects and mark them + # as disappeared + for objectID in list(self.disappeared.keys()): + self.disappeared[objectID] += 1 + + # if we have reached a maximum number of consecutive + # frames where a given object has been marked as + # missing, deregister it + if self.disappeared[objectID] > self.max_disappeared: + self.deregister(objectID) + + # return early as there are no centroids or tracking info + # to update + return + + # compute centroids + for obj in new_objects: + centroid_x = int((obj['box']['xmin']+obj['box']['xmax']) / 2.0) + centroid_y = int((obj['box']['ymin']+obj['box']['ymax']) / 2.0) + obj.centroid = (centroid_x, centroid_y) + + if len(self.tracked_objects) == 0: + for index, obj in enumerate(new_objects): + self.register(index, obj) + return + + new_centroids = np.array([o.centroid for o in new_objects]) + current_ids = list(self.tracked_objects.keys()) + current_centroids = np.array([o.centroid for o in self.tracked_objects]) + + # compute the distance between each pair of tracked + # centroids and new centroids, respectively -- our + # goal will be to match each new centroid to an existing + # object centroid + D = dist.cdist(current_centroids, new_centroids) + + # in order to perform this matching we must (1) find the + # smallest value in each row and then (2) sort the row + # indexes based on their minimum values so that the row + # with the smallest value is at the *front* of the index + # list + rows = D.min(axis=1).argsort() + + # next, we perform a similar process on the columns by + # finding the smallest value in each column and then + # sorting using the previously computed row index list + cols = D.argmin(axis=1)[rows] + + # in order to determine if we need to update, register, + # or deregister an object we need to keep track of which + # of the rows and column indexes we have already examined + usedRows = set() + usedCols = set() + + # loop over the combination of the (row, column) index + # tuples + for (row, col) in zip(rows, cols): + # if we have already examined either the row or + # column value before, ignore it + # val + if row in usedRows or col in usedCols: + continue + + # otherwise, grab the object ID for the current row, + # set its new centroid, and reset the disappeared + # counter + objectID = current_ids[row] + self.update(objectID, new_objects[col]) + self.disappeared[objectID] = 0 + + # indicate that we have examined each of the row and + # column indexes, respectively + usedRows.add(row) + usedCols.add(col) + + # compute both the row and column index we have NOT yet + # examined + unusedRows = set(range(0, D.shape[0])).difference(usedRows) + unusedCols = set(range(0, D.shape[1])).difference(usedCols) + + # in the event that the number of object centroids is + # equal or greater than the number of input centroids + # we need to check and see if some of these objects have + # potentially disappeared + if D.shape[0] >= D.shape[1]: + # loop over the unused row indexes + for row in unusedRows: + # grab the object ID for the corresponding row + # index and increment the disappeared counter + objectID = current_ids[row] + self.disappeared[objectID] += 1 + + # check to see if the number of consecutive + # frames the object has been marked "disappeared" + # for warrants deregistering the object + if self.disappeared[objectID] > self.max_disappeared: + self.deregister(objectID) + + # otherwise, if the number of input centroids is greater + # than the number of existing object centroids we need to + # register each new input centroid as a trackable object + else: + for col in unusedCols: + self.register(col, new_objects[col]) + + + # ------------- + + # # initialize an array of input centroids for the current frame + # inputCentroids = np.zeros((len(rects), 2), dtype="int") + + # # loop over the bounding box rectangles + # for (i, (startX, startY, endX, endY)) in enumerate(rects): + # # use the bounding box coordinates to derive the centroid + # cX = int((startX + endX) / 2.0) + # cY = int((startY + endY) / 2.0) + # inputCentroids[i] = (cX, cY) + + # # if we are currently not tracking any objects take the input + # # centroids and register each of them + # if len(self.objects) == 0: + # for i in range(0, len(inputCentroids)): + # self.register(inputCentroids[i]) + # # otherwise, are are currently tracking objects so we need to + # # try to match the input centroids to existing object + # # centroids + # else: + # # grab the set of object IDs and corresponding centroids + # objectIDs = list(self.objects.keys()) + # objectCentroids = list(self.objects.values()) + + # # compute the distance between each pair of object + # # centroids and input centroids, respectively -- our + # # goal will be to match an input centroid to an existing + # # object centroid + # D = dist.cdist(np.array(objectCentroids), inputCentroids) + + # # in order to perform this matching we must (1) find the + # # smallest value in each row and then (2) sort the row + # # indexes based on their minimum values so that the row + # # with the smallest value is at the *front* of the index + # # list + # rows = D.min(axis=1).argsort() + + # # next, we perform a similar process on the columns by + # # finding the smallest value in each column and then + # # sorting using the previously computed row index list + # cols = D.argmin(axis=1)[rows] + + # # in order to determine if we need to update, register, + # # or deregister an object we need to keep track of which + # # of the rows and column indexes we have already examined + # usedRows = set() + # usedCols = set() + + # # loop over the combination of the (row, column) index + # # tuples + # for (row, col) in zip(rows, cols): + # # if we have already examined either the row or + # # column value before, ignore it + # # val + # if row in usedRows or col in usedCols: + # continue + + # # otherwise, grab the object ID for the current row, + # # set its new centroid, and reset the disappeared + # # counter + # objectID = objectIDs[row] + # self.objects[objectID] = inputCentroids[col] + # self.disappeared[objectID] = 0 + + # # indicate that we have examined each of the row and + # # column indexes, respectively + # usedRows.add(row) + # usedCols.add(col) + + # # compute both the row and column index we have NOT yet + # # examined + # unusedRows = set(range(0, D.shape[0])).difference(usedRows) + # unusedCols = set(range(0, D.shape[1])).difference(usedCols) + + # # in the event that the number of object centroids is + # # equal or greater than the number of input centroids + # # we need to check and see if some of these objects have + # # potentially disappeared + # if D.shape[0] >= D.shape[1]: + # # loop over the unused row indexes + # for row in unusedRows: + # # grab the object ID for the corresponding row + # # index and increment the disappeared counter + # objectID = objectIDs[row] + # self.disappeared[objectID] += 1 + + # # check to see if the number of consecutive + # # frames the object has been marked "disappeared" + # # for warrants deregistering the object + # if self.disappeared[objectID] > self.maxDisappeared: + # self.deregister(objectID) + + # # otherwise, if the number of input centroids is greater + # # than the number of existing object centroids we need to + # # register each new input centroid as a trackable object + # else: + # for col in unusedCols: + # self.register(inputCentroids[col]) + + # # return the set of trackable objects + # return self.objects # Maintains the frame and object with the highest score class BestFrames(threading.Thread): @@ -153,7 +525,7 @@ class BestFrames(threading.Thread): # make a copy of detected objects detected_objects = self.detected_objects.copy() - for obj in detected_objects: + for obj in itertools.chain.from_iterable(detected_objects.values()): if obj['name'] in self.best_objects: now = datetime.datetime.now().timestamp() # if the object is a higher score than the current best score @@ -170,8 +542,8 @@ class BestFrames(threading.Thread): if obj['frame_time'] in recent_frames: best_frame = recent_frames[obj['frame_time']] #, np.zeros((720,1280,3), np.uint8)) - draw_box_with_label(best_frame, obj['xmin'], obj['ymin'], - obj['xmax'], obj['ymax'], obj['name'], obj['score'], obj['area']) + draw_box_with_label(best_frame, obj['box']['xmin'], obj['box']['ymin'], + obj['box']['xmax'], obj['box']['ymax'], obj['name'], f"{int(obj['score']*100)}% {obj['area']}") # print a timestamp time_to_show = datetime.datetime.fromtimestamp(obj['frame_time']).strftime("%m/%d/%Y %H:%M:%S") diff --git a/frigate/util.py b/frigate/util.py index b7c4e5585..c1ab445bc 100644 --- a/frigate/util.py +++ b/frigate/util.py @@ -16,22 +16,22 @@ def ReadLabelFile(file_path): return ret def calculate_region(frame_shape, xmin, ymin, xmax, ymax): - # size is 50% larger than longest edge - size = max(xmax-xmin, ymax-ymin) + # size is larger than longest edge + size = int(max(xmax-xmin, ymax-ymin)*1.5) # if the size is too big to fit in the frame if size > min(frame_shape[0], frame_shape[1]): size = min(frame_shape[0], frame_shape[1]) # x_offset is midpoint of bounding box minus half the size - x_offset = int(((xmax-xmin)/2+xmin)-size/2) + x_offset = int((xmax-xmin)/2.0+xmin-size/2.0) # if outside the image if x_offset < 0: x_offset = 0 elif x_offset > (frame_shape[1]-size): x_offset = (frame_shape[1]-size) - # x_offset is midpoint of bounding box minus half the size - y_offset = int(((ymax-ymin)/2+ymin)-size/2) + # y_offset is midpoint of bounding box minus half the size + y_offset = int((ymax-ymin)/2.0+ymin-size/2.0) # if outside the image if y_offset < 0: y_offset = 0 @@ -40,13 +40,44 @@ def calculate_region(frame_shape, xmin, ymin, xmax, ymax): return (size, x_offset, y_offset) +def compute_intersection_rectangle(box_a, box_b): + return { + 'xmin': max(box_a['xmin'], box_b['xmin']), + 'ymin': max(box_a['ymin'], box_b['ymin']), + 'xmax': min(box_a['xmax'], box_b['xmax']), + 'ymax': min(box_a['ymax'], box_b['ymax']) + } + +def compute_intersection_over_union(box_a, box_b): + # determine the (x, y)-coordinates of the intersection rectangle + intersect = compute_intersection_rectangle(box_a, box_b) + + # compute the area of intersection rectangle + inter_area = max(0, intersect['xmax'] - intersect['xmin'] + 1) * max(0, intersect['ymax'] - intersect['ymin'] + 1) + + if inter_area == 0: + return 0.0 + + # compute the area of both the prediction and ground-truth + # rectangles + box_a_area = (box_a['xmax'] - box_a['xmin'] + 1) * (box_a['ymax'] - box_a['ymin'] + 1) + box_b_area = (box_b['xmax'] - box_b['xmin'] + 1) * (box_b['ymax'] - box_b['ymin'] + 1) + + # compute the intersection over union by taking the intersection + # area and dividing it by the sum of prediction + ground-truth + # areas - the interesection area + iou = inter_area / float(box_a_area + box_b_area - inter_area) + + # return the intersection over union value + return iou + # convert shared memory array into numpy array def tonumpyarray(mp_arr): return np.frombuffer(mp_arr.get_obj(), dtype=np.uint8) -def draw_box_with_label(frame, x_min, y_min, x_max, y_max, label, score, area): +def draw_box_with_label(frame, x_min, y_min, x_max, y_max, label, info): color = COLOR_MAP[label] - display_text = "{}: {}% {}".format(label,int(score*100),int(area)) + display_text = "{}: {}".format(label, info) cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) diff --git a/frigate/video.py b/frigate/video.py index 242b56298..0c874655b 100644 --- a/frigate/video.py +++ b/frigate/video.py @@ -9,10 +9,11 @@ import multiprocessing as mp import subprocess as sp import numpy as np import prctl +import itertools from collections import defaultdict from . util import tonumpyarray, LABELS, draw_box_with_label, calculate_region, EventsPerSecond from . object_detection import RegionPrepper, RegionRequester -from . objects import ObjectCleaner, BestFrames, DetectedObjectsProcessor +from . objects import ObjectCleaner, BestFrames, DetectedObjectsProcessor, RegionRefiner, ObjectTracker from . mqtt import MqttObjectPublisher # Stores 2 seconds worth of frames so they can be used for other threads @@ -24,7 +25,7 @@ class FrameTracker(threading.Thread): self.frame_ready = frame_ready self.frame_lock = frame_lock self.recent_frames = recent_frames - + def run(self): prctl.set_name("FrameTracker") while True: @@ -36,7 +37,7 @@ class FrameTracker(threading.Thread): # delete any old frames stored_frame_times = list(self.recent_frames.keys()) for k in stored_frame_times: - if (now - k) > 2: + if (now - k) > 10: del self.recent_frames[k] def get_frame_shape(source): @@ -101,6 +102,7 @@ class CameraCapture(threading.Thread): .reshape(self.camera.frame_shape) ) self.camera.frame_cache[self.camera.frame_time.value] = self.camera.current_frame.copy() + self.camera.frame_queue.put(self.camera.frame_time.value) # Notify with the condition that a new frame is ready with self.camera.frame_ready: self.camera.frame_ready.notify_all() @@ -111,8 +113,17 @@ class Camera: def __init__(self, name, ffmpeg_config, global_objects_config, config, prepped_frame_queue, mqtt_client, mqtt_prefix): self.name = name self.config = config - self.detected_objects = [] + self.detected_objects = defaultdict(lambda: []) + self.tracked_objects = [] self.frame_cache = {} + # queue for re-assembling frames in order + self.frame_queue = queue.Queue() + # track how many regions have been requested for a frame so we know when a frame is complete + self.regions_in_process = {} + # Lock to control access + self.regions_in_process_lock = mp.Lock() + self.finished_frame_queue = queue.Queue() + self.refined_frame_queue = queue.Queue() self.ffmpeg = config.get('ffmpeg', {}) self.ffmpeg_input = get_ffmpeg_input(self.ffmpeg['input']) @@ -149,7 +160,7 @@ class Camera: self.detected_objects_queue = queue.Queue() self.detected_objects_processor = DetectedObjectsProcessor(self) self.detected_objects_processor.start() - + # initialize the frame cache self.cached_frame_with_objects = { 'frame_bytes': [], @@ -193,6 +204,16 @@ class Camera: self.object_cleaner = ObjectCleaner(self.objects_parsed, self.detected_objects) self.object_cleaner.start() + # start a thread to refine regions when objects are clipped + self.dynamic_region_fps = EventsPerSecond() + self.region_refiner = RegionRefiner(self) + self.region_refiner.start() + self.dynamic_region_fps.start() + + # start a thread to track objects + self.object_tracker = ObjectTracker(self, 10) + self.object_tracker.start() + # start a thread to publish object scores mqtt_publisher = MqttObjectPublisher(self.mqtt_client, self.mqtt_topic_prefix, self.objects_parsed, self.detected_objects, self.best_frames) mqtt_publisher.start() @@ -270,12 +291,47 @@ class Camera: def stats(self): return { 'camera_fps': self.fps.eps(60), - 'resize_queue': self.resize_queue.qsize() + 'resize_queue': self.resize_queue.qsize(), + 'frame_queue': self.frame_queue.qsize(), + 'finished_frame_queue': self.finished_frame_queue.qsize(), + 'refined_frame_queue': self.refined_frame_queue.qsize(), + 'regions_in_process': self.regions_in_process, + 'dynamic_regions_per_sec': self.dynamic_region_fps.eps() } + def frame_with_objects(self, frame_time): + frame = self.frame_cache[frame_time].copy() + + for region in self.regions: + color = (255,255,255) + cv2.rectangle(frame, (region['x_offset'], region['y_offset']), + (region['x_offset']+region['size'], region['y_offset']+region['size']), + color, 2) + + # draw the bounding boxes on the screen + for obj in self.detected_objects[frame_time]: + # for obj in detected_objects[frame_time]: + cv2.rectangle(frame, (obj['region']['xmin'], obj['region']['ymin']), + (obj['region']['xmax'], obj['region']['ymax']), + (0,255,0), 1) + draw_box_with_label(frame, obj['box']['xmin'], obj['box']['ymin'], obj['box']['xmax'], obj['box']['ymax'], obj['name'], f"{int(obj['score']*100)}% {obj['area']} {obj['clipped']}") + + # print a timestamp + time_to_show = datetime.datetime.fromtimestamp(frame_time).strftime("%m/%d/%Y %H:%M:%S") + cv2.putText(frame, time_to_show, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, fontScale=.8, color=(255, 255, 255), thickness=2) + + # print fps + cv2.putText(frame, str(self.fps.eps())+'FPS', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, fontScale=.8, color=(255, 255, 255), thickness=2) + + # convert to BGR + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) + + # encode the image into a jpg + ret, jpg = cv2.imencode('.jpg', frame) + + return jpg.tobytes() + def get_current_frame_with_objects(self): - # make a copy of the current detected objects - detected_objects = self.detected_objects.copy() # lock and make a copy of the current frame with self.frame_lock: frame = self.current_frame.copy() @@ -284,9 +340,16 @@ class Camera: if frame_time == self.cached_frame_with_objects['frame_time']: return self.cached_frame_with_objects['frame_bytes'] + # make a copy of the current detected objects + detected_objects = self.detected_objects.copy() + # draw the bounding boxes on the screen - for obj in detected_objects: - draw_box_with_label(frame, obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], obj['name'], obj['score'], obj['area']) + for obj in [obj for frame_list in detected_objects.values() for obj in frame_list]: + # for obj in detected_objects[frame_time]: + draw_box_with_label(frame, obj['box']['xmin'], obj['box']['ymin'], obj['box']['xmax'], obj['box']['ymax'], obj['name'], f"{int(obj['score']*100)}% {obj['area']} {obj['clipped']}") + cv2.rectangle(frame, (obj['region']['xmin'], obj['region']['ymin']), + (obj['region']['xmax'], obj['region']['ymax']), + (0,255,0), 2) for region in self.regions: color = (255,255,255)