blakeblackshear.frigate/frigate/events/audio.py
Nicolas Mowen f37f034b6a
Only run audio detection when audio is above noise threshold (#7078)
* Only run audio detection when audio is above noise threshold

* Formatting

* Fix event parsing

* Update frigate/events/audio.py

Co-authored-by: Blake Blackshear <blakeb@blakeshome.com>

---------

Co-authored-by: Blake Blackshear <blakeb@blakeshome.com>
2023-07-08 07:16:24 -05:00

261 lines
8.4 KiB
Python

"""Handle creating audio events."""
import datetime
import logging
import multiprocessing as mp
import os
import signal
import threading
from types import FrameType
from typing import Optional
import numpy as np
import requests
from setproctitle import setproctitle
from frigate.config import CameraConfig, FrigateConfig
from frigate.const import (
AUDIO_DURATION,
AUDIO_FORMAT,
AUDIO_MAX_BIT_RANGE,
AUDIO_SAMPLE_RATE,
CACHE_DIR,
FRIGATE_LOCALHOST,
)
from frigate.ffmpeg_presets import parse_preset_input
from frigate.log import LogPipe
from frigate.object_detection import load_labels
from frigate.types import FeatureMetricsTypes
from frigate.util.builtin import get_ffmpeg_arg_list
from frigate.util.services import listen
from frigate.video import start_or_restart_ffmpeg, stop_ffmpeg
try:
from tflite_runtime.interpreter import Interpreter
except ModuleNotFoundError:
from tensorflow.lite.python.interpreter import Interpreter
logger = logging.getLogger(__name__)
def get_ffmpeg_command(input_args: list[str], input_path: str, pipe: str) -> list[str]:
return get_ffmpeg_arg_list(
f"ffmpeg {{}} -i {{}} -f {AUDIO_FORMAT} -ar {AUDIO_SAMPLE_RATE} -ac 1 -y {{}}".format(
" ".join(input_args),
input_path,
pipe,
)
)
def listen_to_audio(
config: FrigateConfig,
process_info: dict[str, FeatureMetricsTypes],
) -> None:
stop_event = mp.Event()
audio_threads: list[threading.Thread] = []
def exit_process() -> None:
for thread in audio_threads:
thread.join()
logger.info("Exiting audio detector...")
def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None:
stop_event.set()
exit_process()
signal.signal(signal.SIGTERM, receiveSignal)
signal.signal(signal.SIGINT, receiveSignal)
threading.current_thread().name = "process:audio_manager"
setproctitle("frigate.audio_manager")
listen()
for camera in config.cameras.values():
if camera.enabled and camera.audio.enabled_in_config:
audio = AudioEventMaintainer(camera, process_info, stop_event)
audio_threads.append(audio)
audio.start()
class AudioTfl:
def __init__(self, stop_event: mp.Event):
self.stop_event = stop_event
self.labels = load_labels("/audio-labelmap.txt")
self.interpreter = Interpreter(
model_path="/cpu_audio_model.tflite",
num_threads=2,
)
self.interpreter.allocate_tensors()
self.tensor_input_details = self.interpreter.get_input_details()
self.tensor_output_details = self.interpreter.get_output_details()
def _detect_raw(self, tensor_input):
self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
self.interpreter.invoke()
detections = np.zeros((20, 6), np.float32)
res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
non_zero_indices = res > 0
class_ids = np.argpartition(-res, 20)[:20]
class_ids = class_ids[np.argsort(-res[class_ids])]
class_ids = class_ids[non_zero_indices[class_ids]]
scores = res[class_ids]
boxes = np.full((scores.shape[0], 4), -1, np.float32)
count = len(scores)
for i in range(count):
if scores[i] < 0.4 or i == 20:
break
detections[i] = [
class_ids[i],
float(scores[i]),
boxes[i][0],
boxes[i][1],
boxes[i][2],
boxes[i][3],
]
return detections
def detect(self, tensor_input, threshold=0.8):
detections = []
if self.stop_event.is_set():
return detections
raw_detections = self._detect_raw(tensor_input)
for d in raw_detections:
if d[1] < threshold:
break
detections.append(
(self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5]))
)
return detections
class AudioEventMaintainer(threading.Thread):
def __init__(
self,
camera: CameraConfig,
feature_metrics: dict[str, FeatureMetricsTypes],
stop_event: mp.Event,
) -> None:
threading.Thread.__init__(self)
self.name = f"{camera.name}_audio_event_processor"
self.config = camera
self.feature_metrics = feature_metrics
self.detections: dict[dict[str, any]] = feature_metrics
self.stop_event = stop_event
self.detector = AudioTfl(stop_event)
self.shape = (int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE)),)
self.chunk_size = int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE * 2))
self.pipe = f"{CACHE_DIR}/{self.config.name}-audio"
self.ffmpeg_cmd = get_ffmpeg_command(
get_ffmpeg_arg_list(self.config.ffmpeg.global_args)
+ parse_preset_input("preset-rtsp-audio-only", 1),
[i.path for i in self.config.ffmpeg.inputs if "audio" in i.roles][0],
self.pipe,
)
self.pipe_file = None
self.logpipe = LogPipe(f"ffmpeg.{self.config.name}.audio")
self.audio_listener = None
def detect_audio(self, audio) -> None:
if not self.feature_metrics[self.config.name]["audio_enabled"].value:
return
rms = np.sqrt(np.mean(np.absolute(np.square(audio.astype(np.float32)))))
# only run audio detection when volume is above min_volume
if rms >= self.config.audio.min_volume:
waveform = (audio / AUDIO_MAX_BIT_RANGE).astype(np.float32)
model_detections = self.detector.detect(waveform)
for label, score, _ in model_detections:
if label not in self.config.audio.listen:
continue
self.handle_detection(label, score)
self.expire_detections()
def handle_detection(self, label: str, score: float) -> None:
if self.detections.get(label):
self.detections[label][
"last_detection"
] = datetime.datetime.now().timestamp()
else:
resp = requests.post(
f"{FRIGATE_LOCALHOST}/api/events/{self.config.name}/{label}/create",
json={"duration": None, "source_type": "audio"},
)
if resp.status_code == 200:
event_id = resp.json()["event_id"]
self.detections[label] = {
"id": event_id,
"label": label,
"last_detection": datetime.datetime.now().timestamp(),
}
def expire_detections(self) -> None:
now = datetime.datetime.now().timestamp()
for detection in self.detections.values():
if not detection:
continue
if (
now - detection.get("last_detection", now)
> self.config.audio.max_not_heard
):
resp = requests.put(
f"{FRIGATE_LOCALHOST}/api/events/{detection['id']}/end",
json={
"end_time": detection["last_detection"]
+ self.config.record.events.post_capture
},
)
if resp.status_code == 200:
self.detections[detection["label"]] = None
else:
logger.warn(
f"Failed to end audio event {detection['id']} with status code {resp.status_code}"
)
def restart_audio_pipe(self) -> None:
try:
os.mkfifo(self.pipe)
except FileExistsError:
pass
self.audio_listener = start_or_restart_ffmpeg(
self.ffmpeg_cmd, logger, self.logpipe, None, self.audio_listener
)
def read_audio(self) -> None:
if self.pipe_file is None:
self.pipe_file = open(self.pipe, "rb")
try:
audio = np.frombuffer(self.pipe_file.read(self.chunk_size), dtype=np.int16)
self.detect_audio(audio)
except BrokenPipeError:
self.logpipe.dump()
self.restart_audio_pipe()
def run(self) -> None:
self.restart_audio_pipe()
while not self.stop_event.is_set():
self.read_audio()
self.pipe_file.close()
stop_ffmpeg(self.audio_listener, logger)
self.logpipe.close()