store the best recent person image and reconnect the RTSP stream if unable to grab several consecutive frames

2025-07-06 01:17:03 +02:00 · 2019-02-27 20:55:07 -06:00 · 2019-02-27 20:55:07 -06:00 · df7b90e367
commit df7b90e367
parent 2e3c9da650
4 changed files with 109 additions and 8 deletions
--- a/README.md
+++ b/README.md
@ -2,6 +2,7 @@
 This results in a MJPEG stream with objects identified that has a lower latency than directly viewing the RTSP feed with VLC.
 - Prioritizes realtime processing over frames per second. Dropping frames is fine.
 - OpenCV runs in a separate process so it can grab frames as quickly as possible to ensure there aren't old frames in the buffer
 - Allows you to define specific regions (squares) in the image to look for motion/objects
 - Motion detection runs in a separate process per region and signals to object detection to avoid wasting CPU cycles to look for objects when there is no motion
 - Object detection with Tensorflow runs in a separate process per region and ignores frames that are more than 0.5 seconds old
 - Uses shared memory arrays for handing frames between processes
@ -45,16 +46,17 @@ Access the mjpeg stream at http://localhost:5000
 - [x] Add last will and availability for MQTT
 - [ ] Build tensorflow from source for CPU optimizations
 - [ ] Add ability to turn detection on and off via MQTT
- [ ] MQTT reconnect if disconnected
+- [ ] MQTT reconnect if disconnected (and resend availability message)
 - [ ] MQTT motion occasionally gets stuck ON
 - [ ] Output movie clips of people for notifications, etc.
 - [x] Store highest scoring person frame from most recent event
 - [x] Add a max size for motion and objects (height/width > 1.5, total area > 1500 and < 100,000)
 - [x] Make motion less sensitive to rain
 - [x] Use Events or Conditions to signal between threads rather than polling a value
 - [x] Implement a debug option to save images with detected objects
 - [x] Only report if x% of the recent frames have a person to avoid single frame false positives (maybe take an average of the person scores in the past x frames?)
 - [x] Filter out detected objects that are not the right size
- [ ] Make resilient to network drop outs
+- [x] Make RTSP resilient to network drop outs
 - [ ] Merge bounding boxes that span multiple regions
 - [ ] Switch to a config file
 - [ ] Allow motion regions to be different than object detection regions
--- a/detect_objects.py
+++ b/detect_objects.py
@ -11,12 +11,12 @@ import json
 from contextlib import closing
 import numpy as np
 from object_detection.utils import visualization_utils as vis_util
-from flask import Flask, Response, make_response
+from flask import Flask, Response, make_response, send_file
 import paho.mqtt.client as mqtt
 from frigate.util import tonumpyarray
 from frigate.mqtt import MqttMotionPublisher, MqttObjectPublisher
-from frigate.objects import ObjectParser, ObjectCleaner
+from frigate.objects import ObjectParser, ObjectCleaner, BestPersonFrame
 from frigate.motion import detect_motion
 from frigate.video import fetch_frames, FrameTracker
 from frigate.object_detection import detect_objects
@ -126,6 +126,11 @@ def main():
        recent_motion_frames, motion_changed, [region['motion_detected'] for region in regions])
    frame_tracker.start()
    # start a thread to store the highest scoring recent person frame
    best_person_frame = BestPersonFrame(objects_parsed, recent_motion_frames, DETECTED_OBJECTS, 
        motion_changed, [region['motion_detected'] for region in regions])
    best_person_frame.start()
    # start a thread to parse objects from the queue
    object_parser = ObjectParser(object_queue, objects_parsed, DETECTED_OBJECTS)
    object_parser.start()
@ -168,6 +173,14 @@ def main():
    # create a flask app that encodes frames a mjpeg on demand
    app = Flask(__name__)
    @app.route('/best_person.jpg')
    def best_person():
        frame = np.zeros(frame_shape, np.uint8) if best_person_frame.best_frame is None else best_person_frame.best_frame
        ret, jpg = cv2.imencode('.jpg', frame)
        response = make_response(jpg.tobytes())
        response.headers['Content-Type'] = 'image/jpg'
        return response
    @app.route('/')
    def index():
        # return a multipart response
@ -219,6 +232,7 @@ def main():
    for motion_process in motion_processes:
        motion_process.join()
    frame_tracker.join()
    best_person_frame.join()
    object_parser.join()
    object_cleaner.join()
    mqtt_publisher.join()
--- a/frigate/objects.py
+++ b/frigate/objects.py
@ -1,7 +1,8 @@
 import time
 import datetime
 import threading
-
+import cv2
 from object_detection.utils import visualization_utils as vis_util
 class ObjectParser(threading.Thread):
    def __init__(self, object_queue, objects_parsed, detected_objects):
        threading.Thread.__init__(self)
@ -46,3 +47,77 @@ class ObjectCleaner(threading.Thread):
            # wait a bit before checking for more expired frames
            time.sleep(0.2)
 # Maintains the frame and person with the highest score from the most recent
 # motion event
 class BestPersonFrame(threading.Thread):
    def __init__(self, objects_parsed, recent_frames, detected_objects, motion_changed, motion_regions):
        threading.Thread.__init__(self)
        self.objects_parsed = objects_parsed
        self.recent_frames = recent_frames
        self.detected_objects = detected_objects
        self.motion_changed = motion_changed
        self.motion_regions = motion_regions
        self.best_person = None
        self.best_frame = None
    def run(self):
        motion_start = 0.0
        motion_end = 0.0
        while True:
             # while there is motion
            while len([r for r in self.motion_regions if r.is_set()]) > 0:
                # wait until objects have been parsed
                with self.objects_parsed:
                    self.objects_parsed.wait()
                # make a copy of detected objects
                detected_objects = self.detected_objects.copy()
                detected_people = [obj for obj in detected_objects if obj['name'] == 'person']
                # make a copy of the recent frames
                recent_frames = self.recent_frames.copy()
                # get the highest scoring person
                new_best_person = max(detected_people, key=lambda x:x['score'], default=self.best_person)
                # if there isnt a person, continue
                if new_best_person is None:
                    continue
                # if there is no current best_person
                if self.best_person is None:
                    self.best_person = new_best_person
                # if there is already a best_person
                else:
                    now = datetime.datetime.now().timestamp()
                    # if the new best person is a higher score than the current best person 
                    # or the current person is more than 1 minute old, use the new best person
                    if new_best_person['score'] > self.best_person['score'] or (now - self.best_person['frame_time']) > 60:
                        self.best_person = new_best_person
                if not self.best_person is None and self.best_person['frame_time'] in recent_frames:
                    best_frame = recent_frames[self.best_person['frame_time']]
                    best_frame = cv2.cvtColor(best_frame, cv2.COLOR_BGR2RGB)
                    # draw the bounding box on the frame
                    vis_util.draw_bounding_box_on_image_array(best_frame,
                        self.best_person['ymin'],
                        self.best_person['xmin'],
                        self.best_person['ymax'],
                        self.best_person['xmax'],
                        color='red',
                        thickness=2,
                        display_str_list=["{}: {}%".format(self.best_person['name'],int(self.best_person['score']*100))],
                        use_normalized_coordinates=False)
                    # convert back to BGR
                    self.best_frame = cv2.cvtColor(best_frame, cv2.COLOR_RGB2BGR)
            motion_end = datetime.datetime.now().timestamp()
            # wait for the global motion flag to change
            with self.motion_changed:
                self.motion_changed.wait()
            motion_start = datetime.datetime.now().timestamp()
--- a/frigate/video.py
+++ b/frigate/video.py
@ -16,6 +16,7 @@ def fetch_frames(shared_arr, shared_frame_time, frame_lock, frame_ready, frame_s
    # keep the buffer small so we minimize old data
    video.set(cv2.CAP_PROP_BUFFERSIZE,1)
    bad_frame_counter = 0
    while True:
        # check if the video stream is still open, and reopen if needed
        if not video.isOpened():
@ -38,9 +39,20 @@ def fetch_frames(shared_arr, shared_frame_time, frame_lock, frame_ready, frame_s
                # Notify with the condition that a new frame is ready
                with frame_ready:
                    frame_ready.notify_all()
                bad_frame_counter = 0
            else:
                print("Unable to decode frame")
                bad_frame_counter += 1
        else:
            print("Unable to grab a frame")
            bad_frame_counter += 1
        if bad_frame_counter > 100:
            video.release()
    video.release()
 # Stores 2 seconds worth of frames when motion is detected so they can be used for other threads
 class FrameTracker(threading.Thread):
    def __init__(self, shared_frame, frame_time, frame_ready, frame_lock, recent_frames, motion_changed, motion_regions):
        threading.Thread.__init__(self)
@ -78,8 +90,6 @@ class FrameTracker(threading.Thread):
                    if (now - k) > 2:
                        del self.recent_frames[k]
                print(stored_frame_times)
            # wait for the global motion flag to change
            with self.motion_changed:
                self.motion_changed.wait()