diff --git a/Dockerfile b/Dockerfile index 9478692d4..86eeca94c 100755 --- a/Dockerfile +++ b/Dockerfile @@ -55,5 +55,6 @@ RUN wget -q https://storage.googleapis.com/download.tensorflow.org/models/tflite WORKDIR /opt/frigate/ ADD frigate frigate/ COPY detect_objects.py . +COPY benchmark.py . CMD ["python3.7", "-u", "detect_objects.py"] diff --git a/README.md b/README.md index 63d522054..a87f558f1 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,13 @@ # Frigate - Realtime Object Detection for IP Cameras -**Note:** This version requires the use of a [Google Coral USB Accelerator](https://coral.withgoogle.com/products/accelerator/) - Uses OpenCV and Tensorflow to perform realtime object detection locally for IP cameras. Designed for integration with HomeAssistant or others via MQTT. -- Leverages multiprocessing and threads heavily with an emphasis on realtime over processing every frame -- Allows you to define specific regions (squares) in the image to look for objects -- No motion detection (for now) -- Object detection with Tensorflow runs in a separate thread +Use of a [Google Coral USB Accelerator](https://coral.withgoogle.com/products/accelerator/) is optional, but highly recommended. On my Intel i7 processor, I can process 2-3 FPS with the CPU. The Coral can process 100+ FPS with very low CPU load. + +- Leverages multiprocessing heavily with an emphasis on realtime over processing every frame +- Uses a very low overhead motion detection to determine where to run object detection +- Object detection with Tensorflow runs in a separate process - Object info is published over MQTT for integration into HomeAssistant as a binary sensor -- An endpoint is available to view an MJPEG stream for debugging +- An endpoint is available to view an MJPEG stream for debugging, but should not be used continuously ![Diagram](diagram.png) @@ -22,12 +21,16 @@ Build the container with docker build -t frigate . ``` -The `mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite` model is included and used by default. You can use your own model and labels by mounting files in the container at `/frozen_inference_graph.pb` and `/label_map.pbtext`. Models must be compatible with the Coral according to [this](https://coral.withgoogle.com/models/). +Models for both CPU and EdgeTPU (Coral) are bundled in the image. You can use your own models with volume mounts: +- CPU Model: `/cpu_model.tflite` +- EdgeTPU Model: `/edgetpu_model.tflite` +- Labels: `/labelmap.txt` Run the container with -``` +```bash docker run --rm \ --privileged \ +--shm-size=512m \ # should work for a 2-3 cameras -v /dev/bus/usb:/dev/bus/usb \ -v :/config:ro \ -v /etc/localtime:/etc/localtime:ro \ @@ -37,11 +40,12 @@ frigate:latest ``` Example docker-compose: -``` +```yaml frigate: container_name: frigate restart: unless-stopped privileged: true + shm_size: '1g' # should work for 5-7 cameras image: frigate:latest volumes: - /dev/bus/usb:/dev/bus/usb @@ -57,6 +61,8 @@ A `config.yml` file must exist in the `config` directory. See example [here](con Access the mjpeg stream at `http://localhost:5000/` and the best snapshot for any object type with at `http://localhost:5000///best.jpg` +Debug info is available at `http://localhost:5000/debug/stats` + ## Integration with HomeAssistant ``` camera: @@ -93,30 +99,34 @@ automation: photo: - url: http://:5000//person/best.jpg caption: A person was detected. + +sensor: + - platform: rest + name: Frigate Debug + resource: http://localhost:5000/debug/stats + scan_interval: 5 + json_attributes: + - back + - coral + value_template: 'OK' + - platform: template + sensors: + back_fps: + value_template: '{{ states.sensor.frigate_debug.attributes["back"]["fps"] }}' + unit_of_measurement: 'FPS' + back_skipped_fps: + value_template: '{{ states.sensor.frigate_debug.attributes["back"]["skipped_fps"] }}' + unit_of_measurement: 'FPS' + back_detection_fps: + value_template: '{{ states.sensor.frigate_debug.attributes["back"]["detection_fps"] }}' + unit_of_measurement: 'FPS' + frigate_coral_fps: + value_template: '{{ states.sensor.frigate_debug.attributes["coral"]["fps"] }}' + unit_of_measurement: 'FPS' + frigate_coral_inference: + value_template: '{{ states.sensor.frigate_debug.attributes["coral"]["inference_speed"] }}' + unit_of_measurement: 'ms' ``` ## Tips - Lower the framerate of the video feed on the camera to reduce the CPU usage for capturing the feed - -## Future improvements -- [x] Remove motion detection for now -- [x] Try running object detection in a thread rather than a process -- [x] Implement min person size again -- [x] Switch to a config file -- [x] Handle multiple cameras in the same container -- [ ] Attempt to figure out coral symlinking -- [ ] Add object list to config with min scores for mqtt -- [ ] Move mjpeg encoding to a separate process -- [ ] Simplify motion detection (check entire image against mask, resize instead of gaussian blur) -- [ ] See if motion detection is even worth running -- [ ] Scan for people across entire image rather than specfic regions -- [ ] Dynamically resize detection area and follow people -- [ ] Add ability to turn detection on and off via MQTT -- [ ] Output movie clips of people for notifications, etc. -- [ ] Integrate with homeassistant push camera -- [ ] Merge bounding boxes that span multiple regions -- [ ] Implement mode to save labeled objects for training -- [ ] Try and reduce CPU usage by simplifying the tensorflow model to just include the objects we care about -- [ ] Look into GPU accelerated decoding of RTSP stream -- [ ] Send video over a socket and use JSMPEG -- [x] Look into neural compute stick diff --git a/benchmark.py b/benchmark.py old mode 100644 new mode 100755 index 81f935005..a8c6ff2cb --- a/benchmark.py +++ b/benchmark.py @@ -1,20 +1,18 @@ import statistics import numpy as np -from edgetpu.detection.engine import DetectionEngine +import time +from frigate.edgetpu import ObjectDetector -# Path to frozen detection graph. This is the actual model that is used for the object detection. -PATH_TO_CKPT = '/frozen_inference_graph.pb' - -# Load the edgetpu engine and labels -engine = DetectionEngine(PATH_TO_CKPT) +object_detector = ObjectDetector() frame = np.zeros((300,300,3), np.uint8) -flattened_frame = np.expand_dims(frame, axis=0).flatten() +input_frame = np.expand_dims(frame, axis=0) detection_times = [] -for x in range(0, 1000): - objects = engine.detect_with_input_tensor(flattened_frame, threshold=0.1, top_k=3) - detection_times.append(engine.get_inference_time()) +for x in range(0, 100): + start = time.monotonic() + object_detector.detect_raw(input_frame) + detection_times.append(time.monotonic()-start) -print("Average inference time: " + str(statistics.mean(detection_times))) \ No newline at end of file +print(f"Average inference time: {statistics.mean(detection_times)*1000:.2f}ms") \ No newline at end of file diff --git a/config/config.example.yml b/config/config.example.yml index 162df2f74..ec50dbb33 100644 --- a/config/config.example.yml +++ b/config/config.example.yml @@ -39,8 +39,6 @@ mqtt: # - -use_wallclock_as_timestamps # - '1' # output_args: -# - -vf -# - mpdecimate # - -f # - rawvideo # - -pix_fmt @@ -89,12 +87,15 @@ cameras: # width: 720 ################ - ## Optional mask. Must be the same dimensions as your video feed. + ## Optional mask. Must be the same aspect ratio as your video feed. + ## ## The mask works by looking at the bottom center of the bounding box for the detected ## person in the image. If that pixel in the mask is a black pixel, it ignores it as a ## false positive. In my mask, the grass and driveway visible from my backdoor camera ## are white. The garage doors, sky, and trees (anywhere it would be impossible for a ## person to stand) are black. + ## + ## Masked areas are also ignored for motion detection. ################ # mask: back-mask.bmp @@ -106,13 +107,14 @@ cameras: take_frame: 1 ################ - # The number of seconds frigate will allow a camera to go without sending a frame before - # assuming the ffmpeg process has a problem and restarting. + # The expected framerate for the camera. Frigate will try and ensure it maintains this framerate + # by dropping frames as necessary. Setting this lower than the actual framerate will allow frigate + # to process every frame at the expense of realtime processing. ################ - # watchdog_timeout: 300 + fps: 5 ################ - # Configuration for the snapshot sent over mqtt + # Configuration for the snapshots in the debug view and mqtt ################ snapshots: show_timestamp: True @@ -128,21 +130,3 @@ cameras: min_area: 5000 max_area: 100000 threshold: 0.5 - - ################ - # size: size of the region in pixels - # x_offset/y_offset: position of the upper left corner of your region (top left of image is 0,0) - # Tips: All regions are resized to 300x300 before detection because the model is trained on that size. - # Resizing regions takes CPU power. Ideally, all regions should be as close to 300x300 as possible. - # Defining a region that goes outside the bounds of the image will result in errors. - ################ - regions: - - size: 350 - x_offset: 0 - y_offset: 300 - - size: 400 - x_offset: 350 - y_offset: 250 - - size: 400 - x_offset: 750 - y_offset: 250 diff --git a/diagram.png b/diagram.png index 4fd2d03a2..1ee8659e5 100644 Binary files a/diagram.png and b/diagram.png differ diff --git a/frigate/video.py b/frigate/video.py index 39c700194..e3b2e48bd 100755 --- a/frigate/video.py +++ b/frigate/video.py @@ -350,38 +350,3 @@ def track_camera(name, config, ffmpeg_global_config, global_objects_config, dete plasma_client.put(frame, plasma.ObjectID(object_id)) # add to the queue detected_objects_queue.put((name, frame_time, object_tracker.tracked_objects)) - - # if (frames >= 700 and frames <= 1635) or (frames >= 2500): - # if (frames >= 300 and frames <= 600): - # if (frames >= 0): - # row1 = cv2.hconcat([gray, cv2.convertScaleAbs(avg_frame)]) - # row2 = cv2.hconcat([frameDelta, thresh]) - # cv2.imwrite(f"/lab/debug/output/{frames}.jpg", cv2.vconcat([row1, row2])) - # # cv2.imwrite(f"/lab/debug/output/resized-frame-{frames}.jpg", resized_frame) - # for region in motion_regions: - # cv2.rectangle(frame, (region[0], region[1]), (region[2], region[3]), (255,128,0), 2) - # for region in object_regions: - # cv2.rectangle(frame, (region[0], region[1]), (region[2], region[3]), (0,128,255), 2) - # for region in merged_regions: - # cv2.rectangle(frame, (region[0], region[1]), (region[2], region[3]), (0,255,0), 2) - # for box in motion_boxes: - # cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (255,0,0), 2) - # for detection in detections: - # box = detection[2] - # draw_box_with_label(frame, box[0], box[1], box[2], box[3], detection[0], f"{detection[1]*100}%") - # for obj in object_tracker.tracked_objects.values(): - # box = obj['box'] - # draw_box_with_label(frame, box[0], box[1], box[2], box[3], obj['label'], obj['id'], thickness=1, color=(0,0,255), position='bl') - # cv2.putText(frame, str(total_detections), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 0, 0), thickness=2) - # cv2.putText(frame, str(frame_detections), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 0, 0), thickness=2) - # cv2.imwrite(f"/lab/debug/output/frame-{frames}.jpg", frame) - # break - - # start a thread to publish object scores - # mqtt_publisher = MqttObjectPublisher(self.mqtt_client, self.mqtt_topic_prefix, self) - # mqtt_publisher.start() - - # create a watchdog thread for capture process - # self.watchdog = CameraWatchdog(self) - -