cleanup and update readme

This commit is contained in:
blakeblackshear 2019-03-30 07:58:31 -05:00
parent 4476bd8a13
commit e0b9b616ce
12 changed files with 74 additions and 278 deletions

View File

@ -92,6 +92,10 @@ RUN tar xzf edgetpu_api.tar.gz \
RUN (apt-get autoremove -y; \
apt-get autoclean -y)
# symlink the model and labels
RUN ln -s /python-tflite-source/edgetpu/test_data/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite /frozen_inference_graph.pb
RUN ln -s /python-tflite-source/edgetpu/test_data/coco_labels.txt /label_map.pbtext
# Set TF object detection available
ENV PYTHONPATH "$PYTHONPATH:/usr/local/lib/python3.5/dist-packages/tensorflow/models/research:/usr/local/lib/python3.5/dist-packages/tensorflow/models/research/slim"
RUN cd /usr/local/lib/python3.5/dist-packages/tensorflow/models/research && protoc object_detection/protos/*.proto --python_out=.
@ -101,6 +105,3 @@ ADD frigate frigate/
COPY detect_objects.py .
CMD ["python3", "-u", "detect_objects.py"]
# WORKDIR /python-tflite-source/edgetpu/
# CMD ["python3", "-u", "demo/classify_image.py", "--model", "test_data/mobilenet_v2_1.0_224_inat_bird_quant_edgetpu.tflite", "--label", "test_data/inat_bird_labels.txt", "--image", "test_data/parrot.jpg"]

114
README.md
View File

@ -1,18 +1,18 @@
# Frigate - Realtime Object Detection for RTSP Cameras
**Note:** This version requires the use of a [Google Coral USB Accelerator](https://coral.withgoogle.com/products/accelerator/)
Uses OpenCV and Tensorflow to perform realtime object detection locally for RTSP cameras. Designed for integration with HomeAssistant or others via MQTT.
- Leverages multiprocessing and threads heavily with an emphasis on realtime over processing every frame
- Allows you to define specific regions (squares) in the image to look for motion/objects
- Motion detection runs in a separate process per region and signals to object detection to avoid wasting CPU cycles looking for objects when there is no motion
- Object detection with Tensorflow runs in a separate process per region
- Detected objects are placed on a shared mp.Queue and aggregated into a list of recently detected objects in a separate thread
- A person score is calculated as the sum of all scores/5
- Motion and object info is published over MQTT for integration into HomeAssistant or others
- Allows you to define specific regions (squares) in the image to look for objects
- No motion detection (for now)
- Object detection with Tensorflow runs in a separate thread
- Object info is published over MQTT for integration into HomeAssistant as a binary sensor
- An endpoint is available to view an MJPEG stream for debugging
![Diagram](diagram.png)
## Example video
## Example video (from older version)
You see multiple bounding boxes because it draws bounding boxes from all frames in the past 1 second where a person was detected. Not all of the bounding boxes were from the current frame.
[![](http://img.youtube.com/vi/nqHbCtyo4dY/0.jpg)](http://www.youtube.com/watch?v=nqHbCtyo4dY "Frigate")
@ -22,24 +22,16 @@ Build the container with
docker build -t frigate .
```
Download a model from the [zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md).
Download the cooresponding label map from [here](https://github.com/tensorflow/models/tree/master/research/object_detection/data).
The `mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite` model is included and used by default. You can use your own model and labels by mounting files in the container at `/frozen_inference_graph.pb` and `/label_map.pbtext`. Models must be compatible with the Coral according to [this](https://coral.withgoogle.com/models/).
Run the container with
```
docker run --rm \
-v <path_to_frozen_detection_graph.pb>:/frozen_inference_graph.pb:ro \
-v <path_to_labelmap.pbtext>:/label_map.pbtext:ro \
--privileged \
-v /dev/bus/usb:/dev/bus/usb \
-v <path_to_config_dir>:/config:ro \
-p 5000:5000 \
-e RTSP_URL='<rtsp_url>' \
-e REGIONS='<box_size_1>,<x_offset_1>,<y_offset_1>,<min_person_size_1>,<min_motion_size_1>,<mask_file_1>:<box_size_2>,<x_offset_2>,<y_offset_2>,<min_person_size_2>,<min_motion_size_2>,<mask_file_2>' \
-e MQTT_HOST='your.mqtthost.com' \
-e MQTT_USER='username' \
-e MQTT_PASS='password' \
-e MQTT_TOPIC_PREFIX='cameras/1' \
-e DEBUG='0' \
-e RTSP_PASSWORD='password' \
frigate:latest
```
@ -48,107 +40,59 @@ Example docker-compose:
frigate:
container_name: frigate
restart: unless-stopped
privileged: true
image: frigate:latest
volumes:
- <path_to_frozen_detection_graph.pb>:/frozen_inference_graph.pb:ro
- <path_to_labelmap.pbtext>:/label_map.pbtext:ro
- /dev/bus/usb:/dev/bus/usb
- <path_to_config>:/config
ports:
- "127.0.0.1:5000:5000"
- "5000:5000"
environment:
RTSP_URL: "<rtsp_url>"
REGIONS: "<box_size_1>,<x_offset_1>,<y_offset_1>,<min_person_size_1>,<min_motion_size_1>,<mask_file_1>:<box_size_2>,<x_offset_2>,<y_offset_2>,<min_person_size_2>,<min_motion_size_2>,<mask_file_2>"
MQTT_HOST: "your.mqtthost.com"
MQTT_USER: "username" #optional
MQTT_PASS: "password" #optional
MQTT_TOPIC_PREFIX: "cameras/1"
DEBUG: "0"
RTSP_PASSWORD: "password"
```
Here is an example `REGIONS` env variable:
`350,0,300,5000,200,mask-0-300.bmp:400,350,250,2000,200,mask-350-250.bmp:400,750,250,2000,200,mask-750-250.bmp`
A `config.yml` file must exist in the `config` directory. See example [here](config/config.yml).
First region broken down (all are required):
- `350` - size of the square (350px by 350px)
- `0` - x coordinate of upper left corner (top left of image is 0,0)
- `300` - y coordinate of upper left corner (top left of image is 0,0)
- `5000` - minimum person bounding box size (width*height for bounding box of identified person)
- `200` - minimum number of changed pixels to trigger motion
- `mask-0-300.bmp` - a bmp file with the masked regions as pure black, must be the same size as the region
Mask files go in the `/config` directory.
Access the mjpeg stream at http://localhost:5000
Access the mjpeg stream at `http://localhost:5000/<camera_name>` and the best person snapshot at `http://localhost:5000/<camera_name>/best_person.jpg`
## Integration with HomeAssistant
```
camera:
- name: Camera Last Person
platform: generic
still_image_url: http://<ip>:5000/best_person.jpg
binary_sensor:
- name: Camera Motion
platform: mqtt
state_topic: "cameras/1/motion"
device_class: motion
availability_topic: "cameras/1/available"
still_image_url: http://<ip>:5000/<camera_name>/best_person.jpg
sensor:
- name: Camera Person Score
- name: Camera Person
platform: mqtt
state_topic: "cameras/1/objects"
state_topic: "frigate/<camera_name>/objects"
value_template: '{{ value_json.person }}'
unit_of_measurement: '%'
availability_topic: "cameras/1/available"
device_class: moving
availability_topic: "frigate/available"
```
## Tips
- Lower the framerate of the RTSP feed on the camera to reduce the CPU usage for capturing the feed
- Use SSDLite models to reduce CPU usage
## Future improvements
- [x] Remove motion detection for now
- [ ] Try running object detection in a thread rather than a process
- [x] Try running object detection in a thread rather than a process
- [x] Implement min person size again
- [ ] Switch to a config file
- [ ] Handle multiple cameras in the same container
- [ ] Simplify motion detection (check entire image against mask)
- [x] Switch to a config file
- [x] Handle multiple cameras in the same container
- [ ] Attempt to figure out coral symlinking
- [ ] Add object list to config with min scores for mqtt
- [ ] Move mjpeg encoding to a separate process
- [ ] Simplify motion detection (check entire image against mask, resize instead of gaussian blur)
- [ ] See if motion detection is even worth running
- [ ] Scan for people across entire image rather than specfic regions
- [ ] Dynamically resize detection area and follow people
- [ ] Add ability to turn detection on and off via MQTT
- [ ] MQTT motion occasionally gets stuck ON
- [ ] Output movie clips of people for notifications, etc.
- [ ] Integrate with homeassistant push camera
- [ ] Merge bounding boxes that span multiple regions
- [ ] Allow motion regions to be different than object detection regions
- [ ] Implement mode to save labeled objects for training
- [ ] Try and reduce CPU usage by simplifying the tensorflow model to just include the objects we care about
- [ ] Look into GPU accelerated decoding of RTSP stream
- [ ] Send video over a socket and use JSMPEG
- [x] Look into neural compute stick
## Building Tensorflow from source for CPU optimizations
https://www.tensorflow.org/install/source#docker_linux_builds
used `tensorflow/tensorflow:1.12.0-devel-py3`
## Optimizing the graph (cant say I saw much difference in CPU usage)
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/graph_transforms/README.md#optimizing-for-deployment
```
docker run -it -v ${PWD}:/lab -v ${PWD}/../back_camera_model/models/ssd_mobilenet_v2_coco_2018_03_29/frozen_inference_graph.pb:/frozen_inference_graph.pb:ro tensorflow/tensorflow:1.12.0-devel-py3 bash
bazel build tensorflow/tools/graph_transforms:transform_graph
bazel-bin/tensorflow/tools/graph_transforms/transform_graph \
--in_graph=/frozen_inference_graph.pb \
--out_graph=/lab/optimized_inception_graph.pb \
--inputs='image_tensor' \
--outputs='num_detections,detection_scores,detection_boxes,detection_classes' \
--transforms='
strip_unused_nodes(type=float, shape="1,300,300,3")
remove_nodes(op=Identity, op=CheckNumerics)
fold_constants(ignore_errors=true)
fold_batch_norms
fold_old_batch_norms'
```

View File

@ -1,8 +1,8 @@
web_port: 5000
mqtt:
host: mqtt.blakeshome.com
topic_prefix: cameras
host: mqtt.server.com
topic_prefix: frigate
cameras:
back:
@ -10,18 +10,40 @@ cameras:
user: viewer
host: 10.0.10.10
port: 554
# values that begin with a "$" will be replaced with environment variable
password: $RTSP_PASSWORD
path: /cam/realmonitor?channel=1&subtype=2
regions:
- size: 350
x_offset: 0
y_offset: 300
min_person_size: 5000
min_person_area: 5000
- size: 400
x_offset: 350
y_offset: 250
min_person_size: 2000
min_person_area: 2000
- size: 400
x_offset: 750
y_offset: 250
min_person_size: 2000
min_person_area: 2000
back2:
rtsp:
user: viewer
host: 10.0.10.10
port: 554
# values that begin with a "$" will be replaced with environment variable
password: $RTSP_PASSWORD
path: /cam/realmonitor?channel=1&subtype=2
regions:
- size: 350
x_offset: 0
y_offset: 300
min_person_area: 5000
- size: 400
x_offset: 350
y_offset: 250
min_person_area: 2000
- size: 400
x_offset: 750
y_offset: 250
min_person_area: 2000

Binary file not shown.

Before

Width:  |  Height:  |  Size: 239 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 313 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 313 KiB

View File

@ -1,30 +1,15 @@
import os
import cv2
import imutils
import time
import datetime
import ctypes
import logging
import multiprocessing as mp
import queue
import threading
import json
import yaml
from contextlib import closing
import numpy as np
from object_detection.utils import visualization_utils as vis_util
from flask import Flask, Response, make_response, send_file
from flask import Flask, Response, make_response
import paho.mqtt.client as mqtt
from frigate.util import tonumpyarray
from frigate.mqtt import MqttMotionPublisher, MqttObjectPublisher
from frigate.objects import ObjectParser, ObjectCleaner, BestPersonFrame
from frigate.motion import detect_motion
from frigate.video import fetch_frames, FrameTracker, Camera
from frigate.object_detection import FramePrepper, PreppedQueueProcessor
from frigate.video import Camera
from frigate.object_detection import PreppedQueueProcessor
with open('/config/config.yml') as f:
# use safe_load instead load
CONFIG = yaml.safe_load(f)
MQTT_HOST = CONFIG['mqtt']['host']
@ -50,9 +35,9 @@ def main():
client.connect(MQTT_HOST, MQTT_PORT, 60)
client.loop_start()
# Queue for prepped frames
# TODO: set length to 1.5x the number of total regions
prepped_frame_queue = queue.Queue(6)
# Queue for prepped frames, max size set to (number of cameras * 5)
max_queue_size = len(CONFIG['cameras'].items())*5
prepped_frame_queue = queue.Queue(max_queue_size)
cameras = {}
for name, config in CONFIG['cameras'].items():

View File

@ -1,116 +0,0 @@
import datetime
import numpy as np
import cv2
import imutils
from . util import tonumpyarray
# do the actual motion detection
def detect_motion(shared_arr, shared_frame_time, frame_lock, frame_ready, motion_detected, motion_changed,
frame_shape, region_size, region_x_offset, region_y_offset, min_motion_area, mask, debug):
# shape shared input array into frame for processing
arr = tonumpyarray(shared_arr).reshape(frame_shape)
avg_frame = None
avg_delta = None
last_motion = -1
frame_time = 0.0
motion_frames = 0
while True:
now = datetime.datetime.now().timestamp()
# if it has been long enough since the last motion, clear the flag
if last_motion > 0 and (now - last_motion) > 5:
last_motion = -1
if motion_detected.is_set():
motion_detected.clear()
with motion_changed:
motion_changed.notify_all()
with frame_ready:
# if there isnt a frame ready for processing or it is old, wait for a signal
if shared_frame_time.value == frame_time or (now - shared_frame_time.value) > 0.5:
frame_ready.wait()
# lock and make a copy of the cropped frame
with frame_lock:
cropped_frame = arr[region_y_offset:region_y_offset+region_size, region_x_offset:region_x_offset+region_size].copy()
frame_time = shared_frame_time.value
# convert to grayscale
gray = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2GRAY)
# apply image mask to remove areas from motion detection
gray[mask] = [255]
# apply gaussian blur
gray = cv2.GaussianBlur(gray, (21, 21), 0)
if avg_frame is None:
avg_frame = gray.copy().astype("float")
continue
# look at the delta from the avg_frame
frameDelta = cv2.absdiff(gray, cv2.convertScaleAbs(avg_frame))
if avg_delta is None:
avg_delta = frameDelta.copy().astype("float")
# compute the average delta over the past few frames
# the alpha value can be modified to configure how sensitive the motion detection is.
# higher values mean the current frame impacts the delta a lot, and a single raindrop may
# register as motion, too low and a fast moving person wont be detected as motion
# this also assumes that a person is in the same location across more than a single frame
cv2.accumulateWeighted(frameDelta, avg_delta, 0.2)
# compute the threshold image for the current frame
current_thresh = cv2.threshold(frameDelta, 25, 255, cv2.THRESH_BINARY)[1]
# black out everything in the avg_delta where there isnt motion in the current frame
avg_delta_image = cv2.convertScaleAbs(avg_delta)
avg_delta_image[np.where(current_thresh==[0])] = [0]
# then look for deltas above the threshold, but only in areas where there is a delta
# in the current frame. this prevents deltas from previous frames from being included
thresh = cv2.threshold(avg_delta_image, 25, 255, cv2.THRESH_BINARY)[1]
# dilate the thresholded image to fill in holes, then find contours
# on thresholded image
thresh = cv2.dilate(thresh, None, iterations=2)
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
motion_found = False
# loop over the contours
for c in cnts:
# if the contour is big enough, count it as motion
contour_area = cv2.contourArea(c)
if contour_area > min_motion_area:
motion_found = True
if debug:
cv2.drawContours(cropped_frame, [c], -1, (0, 255, 0), 2)
x, y, w, h = cv2.boundingRect(c)
cv2.putText(cropped_frame, str(contour_area), (x, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 100, 0), 2)
else:
break
if motion_found:
motion_frames += 1
# if there have been enough consecutive motion frames, report motion
if motion_frames >= 3:
# only average in the current frame if the difference persists for at least 3 frames
cv2.accumulateWeighted(gray, avg_frame, 0.01)
motion_detected.set()
with motion_changed:
motion_changed.notify_all()
last_motion = now
else:
# when no motion, just keep averaging the frames together
cv2.accumulateWeighted(gray, avg_frame, 0.01)
motion_frames = 0
if debug and motion_frames == 3:
cv2.imwrite("/lab/debug/motion-{}-{}-{}.jpg".format(region_x_offset, region_y_offset, datetime.datetime.now().timestamp()), cropped_frame)
cv2.imwrite("/lab/debug/avg_delta-{}-{}-{}.jpg".format(region_x_offset, region_y_offset, datetime.datetime.now().timestamp()), avg_delta_image)

View File

@ -1,29 +1,6 @@
import json
import threading
class MqttMotionPublisher(threading.Thread):
def __init__(self, client, topic_prefix, motion_changed, motion_flags):
threading.Thread.__init__(self)
self.client = client
self.topic_prefix = topic_prefix
self.motion_changed = motion_changed
self.motion_flags = motion_flags
def run(self):
last_sent_motion = ""
while True:
with self.motion_changed:
self.motion_changed.wait()
# send message for motion
motion_status = 'OFF'
if any(obj.is_set() for obj in self.motion_flags):
motion_status = 'ON'
if last_sent_motion != motion_status:
last_sent_motion = motion_status
self.client.publish(self.topic_prefix+'/motion', motion_status, retain=False)
class MqttObjectPublisher(threading.Thread):
def __init__(self, client, topic_prefix, objects_parsed, detected_objects):
threading.Thread.__init__(self)

View File

@ -36,13 +36,10 @@ class PreppedQueueProcessor(threading.Thread):
# process queue...
while True:
frame = self.prepped_frame_queue.get()
# print(self.prepped_frame_queue.qsize())
# Actual detection.
objects = self.engine.DetectWithInputTensor(frame['frame'], threshold=0.5, top_k=3)
# time.sleep(0.1)
# objects = []
# print(self.engine.get_inference_time())
# put detected objects in the queue
# parse and pass detected objects back to the camera
parsed_objects = []
for obj in objects:
box = obj.bounding_box.flatten().tolist()
@ -99,7 +96,6 @@ class FramePrepper(threading.Thread):
# Expand dimensions since the model expects images to have shape: [1, 300, 300, 3]
frame_expanded = np.expand_dims(cropped_frame_rgb, axis=0)
# print("Prepped frame at " + str(self.region_x_offset) + "," + str(self.region_y_offset))
# add the frame to the queue
if not self.prepped_frame_queue.full():
self.prepped_frame_queue.put({

View File

@ -3,18 +3,6 @@ import datetime
import threading
import cv2
from object_detection.utils import visualization_utils as vis_util
class ObjectParser(threading.Thread):
def __init__(self, cameras, object_queue, detected_objects, regions):
threading.Thread.__init__(self)
self.cameras = cameras
self.object_queue = object_queue
self.regions = regions
def run(self):
# frame_times = {}
while True:
obj = self.object_queue.get()
self.cameras[obj['camera_name']].add_object(obj)
class ObjectCleaner(threading.Thread):
def __init__(self, objects_parsed, detected_objects):
@ -34,7 +22,6 @@ class ObjectCleaner(threading.Thread):
# (newest objects are appended to the end)
detected_objects = self._detected_objects.copy()
#print([round(now-obj['frame_time'],2) for obj in detected_objects])
num_to_delete = 0
for obj in detected_objects:
if now-obj['frame_time']<2:
@ -69,8 +56,6 @@ class BestPersonFrame(threading.Thread):
# make a copy of detected objects
detected_objects = self.detected_objects.copy()
detected_people = [obj for obj in detected_objects if obj['name'] == 'person']
# make a copy of the recent frames
recent_frames = self.recent_frames.copy()
# get the highest scoring person
new_best_person = max(detected_people, key=lambda x:x['score'], default=self.best_person)
@ -89,7 +74,10 @@ class BestPersonFrame(threading.Thread):
# or the current person is more than 1 minute old, use the new best person
if new_best_person['score'] > self.best_person['score'] or (now - self.best_person['frame_time']) > 60:
self.best_person = new_best_person
# make a copy of the recent frames
recent_frames = self.recent_frames.copy()
if not self.best_person is None and self.best_person['frame_time'] in recent_frames:
best_frame = recent_frames[self.best_person['frame_time']]
best_frame = cv2.cvtColor(best_frame, cv2.COLOR_BGR2RGB)

View File

@ -8,11 +8,10 @@ import multiprocessing as mp
from object_detection.utils import visualization_utils as vis_util
from . util import tonumpyarray
from . object_detection import FramePrepper
from . objects import ObjectCleaner, ObjectParser, BestPersonFrame
from . objects import ObjectCleaner, BestPersonFrame
from . mqtt import MqttObjectPublisher
# fetch the frames as fast a possible, only decoding the frames when the
# detection_process has consumed the current frame
# fetch the frames as fast a possible and store current frame in a shared memory array
def fetch_frames(shared_arr, shared_frame_time, frame_lock, frame_ready, frame_shape, rtsp_url):
# convert shared memory array into numpy and shape into image array
arr = tonumpyarray(shared_arr).reshape(frame_shape)