diff --git a/Dockerfile b/Dockerfile index 7dd0026f2..b2f6e7edf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -92,6 +92,10 @@ RUN tar xzf edgetpu_api.tar.gz \ RUN (apt-get autoremove -y; \ apt-get autoclean -y) +# symlink the model and labels +RUN ln -s /python-tflite-source/edgetpu/test_data/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite /frozen_inference_graph.pb +RUN ln -s /python-tflite-source/edgetpu/test_data/coco_labels.txt /label_map.pbtext + # Set TF object detection available ENV PYTHONPATH "$PYTHONPATH:/usr/local/lib/python3.5/dist-packages/tensorflow/models/research:/usr/local/lib/python3.5/dist-packages/tensorflow/models/research/slim" RUN cd /usr/local/lib/python3.5/dist-packages/tensorflow/models/research && protoc object_detection/protos/*.proto --python_out=. @@ -101,6 +105,3 @@ ADD frigate frigate/ COPY detect_objects.py . CMD ["python3", "-u", "detect_objects.py"] - -# WORKDIR /python-tflite-source/edgetpu/ -# CMD ["python3", "-u", "demo/classify_image.py", "--model", "test_data/mobilenet_v2_1.0_224_inat_bird_quant_edgetpu.tflite", "--label", "test_data/inat_bird_labels.txt", "--image", "test_data/parrot.jpg"] \ No newline at end of file diff --git a/README.md b/README.md index 255865ea7..933a6ccfa 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,18 @@ # Frigate - Realtime Object Detection for RTSP Cameras +**Note:** This version requires the use of a [Google Coral USB Accelerator](https://coral.withgoogle.com/products/accelerator/) + Uses OpenCV and Tensorflow to perform realtime object detection locally for RTSP cameras. Designed for integration with HomeAssistant or others via MQTT. - Leverages multiprocessing and threads heavily with an emphasis on realtime over processing every frame -- Allows you to define specific regions (squares) in the image to look for motion/objects -- Motion detection runs in a separate process per region and signals to object detection to avoid wasting CPU cycles looking for objects when there is no motion -- Object detection with Tensorflow runs in a separate process per region -- Detected objects are placed on a shared mp.Queue and aggregated into a list of recently detected objects in a separate thread -- A person score is calculated as the sum of all scores/5 -- Motion and object info is published over MQTT for integration into HomeAssistant or others +- Allows you to define specific regions (squares) in the image to look for objects +- No motion detection (for now) +- Object detection with Tensorflow runs in a separate thread +- Object info is published over MQTT for integration into HomeAssistant as a binary sensor - An endpoint is available to view an MJPEG stream for debugging ![Diagram](diagram.png) -## Example video +## Example video (from older version) You see multiple bounding boxes because it draws bounding boxes from all frames in the past 1 second where a person was detected. Not all of the bounding boxes were from the current frame. [![](http://img.youtube.com/vi/nqHbCtyo4dY/0.jpg)](http://www.youtube.com/watch?v=nqHbCtyo4dY "Frigate") @@ -22,24 +22,16 @@ Build the container with docker build -t frigate . ``` -Download a model from the [zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md). - -Download the cooresponding label map from [here](https://github.com/tensorflow/models/tree/master/research/object_detection/data). +The `mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite` model is included and used by default. You can use your own model and labels by mounting files in the container at `/frozen_inference_graph.pb` and `/label_map.pbtext`. Models must be compatible with the Coral according to [this](https://coral.withgoogle.com/models/). Run the container with ``` docker run --rm \ --v :/frozen_inference_graph.pb:ro \ --v :/label_map.pbtext:ro \ +--privileged \ +-v /dev/bus/usb:/dev/bus/usb \ -v :/config:ro \ -p 5000:5000 \ --e RTSP_URL='' \ --e REGIONS=',,,,,:,,,,,' \ --e MQTT_HOST='your.mqtthost.com' \ --e MQTT_USER='username' \ --e MQTT_PASS='password' \ --e MQTT_TOPIC_PREFIX='cameras/1' \ --e DEBUG='0' \ +-e RTSP_PASSWORD='password' \ frigate:latest ``` @@ -48,107 +40,59 @@ Example docker-compose: frigate: container_name: frigate restart: unless-stopped + privileged: true image: frigate:latest volumes: - - :/frozen_inference_graph.pb:ro - - :/label_map.pbtext:ro + - /dev/bus/usb:/dev/bus/usb - :/config ports: - - "127.0.0.1:5000:5000" + - "5000:5000" environment: - RTSP_URL: "" - REGIONS: ",,,,,:,,,,," - MQTT_HOST: "your.mqtthost.com" - MQTT_USER: "username" #optional - MQTT_PASS: "password" #optional - MQTT_TOPIC_PREFIX: "cameras/1" - DEBUG: "0" + RTSP_PASSWORD: "password" ``` -Here is an example `REGIONS` env variable: -`350,0,300,5000,200,mask-0-300.bmp:400,350,250,2000,200,mask-350-250.bmp:400,750,250,2000,200,mask-750-250.bmp` +A `config.yml` file must exist in the `config` directory. See example [here](config/config.yml). -First region broken down (all are required): -- `350` - size of the square (350px by 350px) -- `0` - x coordinate of upper left corner (top left of image is 0,0) -- `300` - y coordinate of upper left corner (top left of image is 0,0) -- `5000` - minimum person bounding box size (width*height for bounding box of identified person) -- `200` - minimum number of changed pixels to trigger motion -- `mask-0-300.bmp` - a bmp file with the masked regions as pure black, must be the same size as the region - -Mask files go in the `/config` directory. - -Access the mjpeg stream at http://localhost:5000 +Access the mjpeg stream at `http://localhost:5000/` and the best person snapshot at `http://localhost:5000//best_person.jpg` ## Integration with HomeAssistant ``` camera: - name: Camera Last Person platform: generic - still_image_url: http://:5000/best_person.jpg - -binary_sensor: - - name: Camera Motion - platform: mqtt - state_topic: "cameras/1/motion" - device_class: motion - availability_topic: "cameras/1/available" + still_image_url: http://:5000//best_person.jpg sensor: - - name: Camera Person Score + - name: Camera Person platform: mqtt - state_topic: "cameras/1/objects" + state_topic: "frigate//objects" value_template: '{{ value_json.person }}' - unit_of_measurement: '%' - availability_topic: "cameras/1/available" + device_class: moving + availability_topic: "frigate/available" ``` ## Tips - Lower the framerate of the RTSP feed on the camera to reduce the CPU usage for capturing the feed -- Use SSDLite models to reduce CPU usage ## Future improvements - [x] Remove motion detection for now -- [ ] Try running object detection in a thread rather than a process +- [x] Try running object detection in a thread rather than a process - [x] Implement min person size again -- [ ] Switch to a config file -- [ ] Handle multiple cameras in the same container -- [ ] Simplify motion detection (check entire image against mask) +- [x] Switch to a config file +- [x] Handle multiple cameras in the same container +- [ ] Attempt to figure out coral symlinking +- [ ] Add object list to config with min scores for mqtt +- [ ] Move mjpeg encoding to a separate process +- [ ] Simplify motion detection (check entire image against mask, resize instead of gaussian blur) - [ ] See if motion detection is even worth running - [ ] Scan for people across entire image rather than specfic regions - [ ] Dynamically resize detection area and follow people - [ ] Add ability to turn detection on and off via MQTT -- [ ] MQTT motion occasionally gets stuck ON - [ ] Output movie clips of people for notifications, etc. - [ ] Integrate with homeassistant push camera - [ ] Merge bounding boxes that span multiple regions -- [ ] Allow motion regions to be different than object detection regions - [ ] Implement mode to save labeled objects for training - [ ] Try and reduce CPU usage by simplifying the tensorflow model to just include the objects we care about - [ ] Look into GPU accelerated decoding of RTSP stream - [ ] Send video over a socket and use JSMPEG - [x] Look into neural compute stick - -## Building Tensorflow from source for CPU optimizations -https://www.tensorflow.org/install/source#docker_linux_builds -used `tensorflow/tensorflow:1.12.0-devel-py3` - -## Optimizing the graph (cant say I saw much difference in CPU usage) -https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/graph_transforms/README.md#optimizing-for-deployment -``` -docker run -it -v ${PWD}:/lab -v ${PWD}/../back_camera_model/models/ssd_mobilenet_v2_coco_2018_03_29/frozen_inference_graph.pb:/frozen_inference_graph.pb:ro tensorflow/tensorflow:1.12.0-devel-py3 bash - -bazel build tensorflow/tools/graph_transforms:transform_graph - -bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ ---in_graph=/frozen_inference_graph.pb \ ---out_graph=/lab/optimized_inception_graph.pb \ ---inputs='image_tensor' \ ---outputs='num_detections,detection_scores,detection_boxes,detection_classes' \ ---transforms=' - strip_unused_nodes(type=float, shape="1,300,300,3") - remove_nodes(op=Identity, op=CheckNumerics) - fold_constants(ignore_errors=true) - fold_batch_norms - fold_old_batch_norms' -``` \ No newline at end of file diff --git a/config/config.yml b/config/config.yml index baa897662..53ee9d20b 100644 --- a/config/config.yml +++ b/config/config.yml @@ -1,8 +1,8 @@ web_port: 5000 mqtt: - host: mqtt.blakeshome.com - topic_prefix: cameras + host: mqtt.server.com + topic_prefix: frigate cameras: back: @@ -10,18 +10,40 @@ cameras: user: viewer host: 10.0.10.10 port: 554 + # values that begin with a "$" will be replaced with environment variable password: $RTSP_PASSWORD path: /cam/realmonitor?channel=1&subtype=2 regions: - size: 350 x_offset: 0 y_offset: 300 - min_person_size: 5000 + min_person_area: 5000 - size: 400 x_offset: 350 y_offset: 250 - min_person_size: 2000 + min_person_area: 2000 - size: 400 x_offset: 750 y_offset: 250 - min_person_size: 2000 \ No newline at end of file + min_person_area: 2000 + back2: + rtsp: + user: viewer + host: 10.0.10.10 + port: 554 + # values that begin with a "$" will be replaced with environment variable + password: $RTSP_PASSWORD + path: /cam/realmonitor?channel=1&subtype=2 + regions: + - size: 350 + x_offset: 0 + y_offset: 300 + min_person_area: 5000 + - size: 400 + x_offset: 350 + y_offset: 250 + min_person_area: 2000 + - size: 400 + x_offset: 750 + y_offset: 250 + min_person_area: 2000 \ No newline at end of file diff --git a/config/mask-0-300.bmp b/config/mask-0-300.bmp deleted file mode 100644 index 8a80c196f..000000000 Binary files a/config/mask-0-300.bmp and /dev/null differ diff --git a/config/mask-350-250.bmp b/config/mask-350-250.bmp deleted file mode 100644 index 636649e99..000000000 Binary files a/config/mask-350-250.bmp and /dev/null differ diff --git a/config/mask-750-250.bmp b/config/mask-750-250.bmp deleted file mode 100644 index 90f4a9714..000000000 Binary files a/config/mask-750-250.bmp and /dev/null differ diff --git a/detect_objects.py b/detect_objects.py index babdc3300..784a6c7d9 100644 --- a/detect_objects.py +++ b/detect_objects.py @@ -1,30 +1,15 @@ -import os import cv2 -import imutils import time -import datetime -import ctypes -import logging -import multiprocessing as mp import queue -import threading -import json import yaml -from contextlib import closing import numpy as np -from object_detection.utils import visualization_utils as vis_util -from flask import Flask, Response, make_response, send_file +from flask import Flask, Response, make_response import paho.mqtt.client as mqtt -from frigate.util import tonumpyarray -from frigate.mqtt import MqttMotionPublisher, MqttObjectPublisher -from frigate.objects import ObjectParser, ObjectCleaner, BestPersonFrame -from frigate.motion import detect_motion -from frigate.video import fetch_frames, FrameTracker, Camera -from frigate.object_detection import FramePrepper, PreppedQueueProcessor +from frigate.video import Camera +from frigate.object_detection import PreppedQueueProcessor with open('/config/config.yml') as f: - # use safe_load instead load CONFIG = yaml.safe_load(f) MQTT_HOST = CONFIG['mqtt']['host'] @@ -50,9 +35,9 @@ def main(): client.connect(MQTT_HOST, MQTT_PORT, 60) client.loop_start() - # Queue for prepped frames - # TODO: set length to 1.5x the number of total regions - prepped_frame_queue = queue.Queue(6) + # Queue for prepped frames, max size set to (number of cameras * 5) + max_queue_size = len(CONFIG['cameras'].items())*5 + prepped_frame_queue = queue.Queue(max_queue_size) cameras = {} for name, config in CONFIG['cameras'].items(): diff --git a/frigate/motion.py b/frigate/motion.py deleted file mode 100644 index b09c8afc7..000000000 --- a/frigate/motion.py +++ /dev/null @@ -1,116 +0,0 @@ -import datetime -import numpy as np -import cv2 -import imutils -from . util import tonumpyarray - -# do the actual motion detection -def detect_motion(shared_arr, shared_frame_time, frame_lock, frame_ready, motion_detected, motion_changed, - frame_shape, region_size, region_x_offset, region_y_offset, min_motion_area, mask, debug): - # shape shared input array into frame for processing - arr = tonumpyarray(shared_arr).reshape(frame_shape) - - avg_frame = None - avg_delta = None - last_motion = -1 - frame_time = 0.0 - motion_frames = 0 - while True: - now = datetime.datetime.now().timestamp() - - # if it has been long enough since the last motion, clear the flag - if last_motion > 0 and (now - last_motion) > 5: - last_motion = -1 - if motion_detected.is_set(): - motion_detected.clear() - with motion_changed: - motion_changed.notify_all() - - - with frame_ready: - # if there isnt a frame ready for processing or it is old, wait for a signal - if shared_frame_time.value == frame_time or (now - shared_frame_time.value) > 0.5: - frame_ready.wait() - - # lock and make a copy of the cropped frame - with frame_lock: - cropped_frame = arr[region_y_offset:region_y_offset+region_size, region_x_offset:region_x_offset+region_size].copy() - frame_time = shared_frame_time.value - - # convert to grayscale - gray = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2GRAY) - - # apply image mask to remove areas from motion detection - gray[mask] = [255] - - # apply gaussian blur - gray = cv2.GaussianBlur(gray, (21, 21), 0) - - if avg_frame is None: - avg_frame = gray.copy().astype("float") - continue - - # look at the delta from the avg_frame - frameDelta = cv2.absdiff(gray, cv2.convertScaleAbs(avg_frame)) - - if avg_delta is None: - avg_delta = frameDelta.copy().astype("float") - - # compute the average delta over the past few frames - # the alpha value can be modified to configure how sensitive the motion detection is. - # higher values mean the current frame impacts the delta a lot, and a single raindrop may - # register as motion, too low and a fast moving person wont be detected as motion - # this also assumes that a person is in the same location across more than a single frame - cv2.accumulateWeighted(frameDelta, avg_delta, 0.2) - - # compute the threshold image for the current frame - current_thresh = cv2.threshold(frameDelta, 25, 255, cv2.THRESH_BINARY)[1] - - # black out everything in the avg_delta where there isnt motion in the current frame - avg_delta_image = cv2.convertScaleAbs(avg_delta) - avg_delta_image[np.where(current_thresh==[0])] = [0] - - # then look for deltas above the threshold, but only in areas where there is a delta - # in the current frame. this prevents deltas from previous frames from being included - thresh = cv2.threshold(avg_delta_image, 25, 255, cv2.THRESH_BINARY)[1] - - # dilate the thresholded image to fill in holes, then find contours - # on thresholded image - thresh = cv2.dilate(thresh, None, iterations=2) - cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - cnts = imutils.grab_contours(cnts) - - motion_found = False - - # loop over the contours - for c in cnts: - # if the contour is big enough, count it as motion - contour_area = cv2.contourArea(c) - if contour_area > min_motion_area: - motion_found = True - if debug: - cv2.drawContours(cropped_frame, [c], -1, (0, 255, 0), 2) - x, y, w, h = cv2.boundingRect(c) - cv2.putText(cropped_frame, str(contour_area), (x, y), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 100, 0), 2) - else: - break - - if motion_found: - motion_frames += 1 - # if there have been enough consecutive motion frames, report motion - if motion_frames >= 3: - # only average in the current frame if the difference persists for at least 3 frames - cv2.accumulateWeighted(gray, avg_frame, 0.01) - motion_detected.set() - with motion_changed: - motion_changed.notify_all() - last_motion = now - else: - # when no motion, just keep averaging the frames together - cv2.accumulateWeighted(gray, avg_frame, 0.01) - motion_frames = 0 - - if debug and motion_frames == 3: - cv2.imwrite("/lab/debug/motion-{}-{}-{}.jpg".format(region_x_offset, region_y_offset, datetime.datetime.now().timestamp()), cropped_frame) - cv2.imwrite("/lab/debug/avg_delta-{}-{}-{}.jpg".format(region_x_offset, region_y_offset, datetime.datetime.now().timestamp()), avg_delta_image) diff --git a/frigate/mqtt.py b/frigate/mqtt.py index fbd401776..0a7bd6de7 100644 --- a/frigate/mqtt.py +++ b/frigate/mqtt.py @@ -1,29 +1,6 @@ import json import threading -class MqttMotionPublisher(threading.Thread): - def __init__(self, client, topic_prefix, motion_changed, motion_flags): - threading.Thread.__init__(self) - self.client = client - self.topic_prefix = topic_prefix - self.motion_changed = motion_changed - self.motion_flags = motion_flags - - def run(self): - last_sent_motion = "" - while True: - with self.motion_changed: - self.motion_changed.wait() - - # send message for motion - motion_status = 'OFF' - if any(obj.is_set() for obj in self.motion_flags): - motion_status = 'ON' - - if last_sent_motion != motion_status: - last_sent_motion = motion_status - self.client.publish(self.topic_prefix+'/motion', motion_status, retain=False) - class MqttObjectPublisher(threading.Thread): def __init__(self, client, topic_prefix, objects_parsed, detected_objects): threading.Thread.__init__(self) diff --git a/frigate/object_detection.py b/frigate/object_detection.py index f845920a6..76050f1ec 100644 --- a/frigate/object_detection.py +++ b/frigate/object_detection.py @@ -36,13 +36,10 @@ class PreppedQueueProcessor(threading.Thread): # process queue... while True: frame = self.prepped_frame_queue.get() - # print(self.prepped_frame_queue.qsize()) + # Actual detection. objects = self.engine.DetectWithInputTensor(frame['frame'], threshold=0.5, top_k=3) - # time.sleep(0.1) - # objects = [] - # print(self.engine.get_inference_time()) - # put detected objects in the queue + # parse and pass detected objects back to the camera parsed_objects = [] for obj in objects: box = obj.bounding_box.flatten().tolist() @@ -99,7 +96,6 @@ class FramePrepper(threading.Thread): # Expand dimensions since the model expects images to have shape: [1, 300, 300, 3] frame_expanded = np.expand_dims(cropped_frame_rgb, axis=0) - # print("Prepped frame at " + str(self.region_x_offset) + "," + str(self.region_y_offset)) # add the frame to the queue if not self.prepped_frame_queue.full(): self.prepped_frame_queue.put({ diff --git a/frigate/objects.py b/frigate/objects.py index 66672cf1d..5c5a2e8ac 100644 --- a/frigate/objects.py +++ b/frigate/objects.py @@ -3,18 +3,6 @@ import datetime import threading import cv2 from object_detection.utils import visualization_utils as vis_util -class ObjectParser(threading.Thread): - def __init__(self, cameras, object_queue, detected_objects, regions): - threading.Thread.__init__(self) - self.cameras = cameras - self.object_queue = object_queue - self.regions = regions - - def run(self): - # frame_times = {} - while True: - obj = self.object_queue.get() - self.cameras[obj['camera_name']].add_object(obj) class ObjectCleaner(threading.Thread): def __init__(self, objects_parsed, detected_objects): @@ -34,7 +22,6 @@ class ObjectCleaner(threading.Thread): # (newest objects are appended to the end) detected_objects = self._detected_objects.copy() - #print([round(now-obj['frame_time'],2) for obj in detected_objects]) num_to_delete = 0 for obj in detected_objects: if now-obj['frame_time']<2: @@ -69,8 +56,6 @@ class BestPersonFrame(threading.Thread): # make a copy of detected objects detected_objects = self.detected_objects.copy() detected_people = [obj for obj in detected_objects if obj['name'] == 'person'] - # make a copy of the recent frames - recent_frames = self.recent_frames.copy() # get the highest scoring person new_best_person = max(detected_people, key=lambda x:x['score'], default=self.best_person) @@ -89,7 +74,10 @@ class BestPersonFrame(threading.Thread): # or the current person is more than 1 minute old, use the new best person if new_best_person['score'] > self.best_person['score'] or (now - self.best_person['frame_time']) > 60: self.best_person = new_best_person - + + # make a copy of the recent frames + recent_frames = self.recent_frames.copy() + if not self.best_person is None and self.best_person['frame_time'] in recent_frames: best_frame = recent_frames[self.best_person['frame_time']] best_frame = cv2.cvtColor(best_frame, cv2.COLOR_BGR2RGB) diff --git a/frigate/video.py b/frigate/video.py index 903064cc1..41b77f9ba 100644 --- a/frigate/video.py +++ b/frigate/video.py @@ -8,11 +8,10 @@ import multiprocessing as mp from object_detection.utils import visualization_utils as vis_util from . util import tonumpyarray from . object_detection import FramePrepper -from . objects import ObjectCleaner, ObjectParser, BestPersonFrame +from . objects import ObjectCleaner, BestPersonFrame from . mqtt import MqttObjectPublisher -# fetch the frames as fast a possible, only decoding the frames when the -# detection_process has consumed the current frame +# fetch the frames as fast a possible and store current frame in a shared memory array def fetch_frames(shared_arr, shared_frame_time, frame_lock, frame_ready, frame_shape, rtsp_url): # convert shared memory array into numpy and shape into image array arr = tonumpyarray(shared_arr).reshape(frame_shape)