Audio events (#6848)

* Initial audio classification model implementation

* fix mypy

* Keep audio labelmap local

* Cleanup

* Start adding config for audio

* Add the detector

* Add audio detection process keypoints

* Build out base config

* Load labelmap correctly

* Fix config bugs

* Start audio process

* Fix startup issues

* Try to cleanup restarting

* Add ffmpeg input args

* Get audio detection working

* Save event to db

* End events if not heard for 30 seconds

* Use not heard config

* Stop ffmpeg when shutting down

* Fixes

* End events correctly

* Use api instead of event queue to save audio events

* Get events working

* Close threads when stop event is sent

* remove unused

* Only start audio process if at least one camera is enabled

* Add const for float

* Cleanup labelmap

* Add audio icon in frontend

* Add ability to toggle audio with mqtt

* Set initial audio value

* Fix audio enabling

* Close logpipe

* Isort

* Formatting

* Fix web tests

* Fix web tests

* Handle cases where args are a string

* Remove log

* Cleanup process close

* Use correct field

* Simplify if statement

* Use var for localhost

* Add audio detectors docs

* Add restream docs to mention audio detection

* Add full config docs

* Fix links to other docs

---------

Co-authored-by: Jason Hunter <hunterjm@gmail.com>
This commit is contained in:
Nicolas Mowen 2023-07-01 07:18:33 -06:00 committed by GitHub
parent f1dc3a639c
commit c3b313a70d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 1090 additions and 69 deletions

View File

@ -98,7 +98,9 @@ COPY labelmap.txt .
COPY --from=ov-converter /models/public/ssdlite_mobilenet_v2/FP16 openvino-model
RUN wget -q https://github.com/openvinotoolkit/open_model_zoo/raw/master/data/dataset_classes/coco_91cl_bkgr.txt -O openvino-model/coco_91cl_bkgr.txt && \
sed -i 's/truck/car/g' openvino-model/coco_91cl_bkgr.txt
# Get Audio Model and labels
RUN wget -qO cpu_audio_model.tflite https://tfhub.dev/google/lite-model/yamnet/classification/tflite/1?lite-format=tflite
COPY audio-labelmap.txt .
FROM wget AS s6-overlay

521
audio-labelmap.txt Normal file
View File

@ -0,0 +1,521 @@
speech
speech
speech
speech
babbling
speech
yell
bellow
whoop
yell
yell
yell
whispering
laughter
laughter
laughter
snicker
laughter
laughter
crying
crying
crying
yell
sigh
singing
choir
sodeling
chant
mantra
child_singing
synthetic_singing
rapping
humming
groan
grunt
whistling
breathing
wheeze
snoring
gasp
pant
snort
cough
throat_clearing
sneeze
sniff
run
shuffle
footsteps
chewing
biting
gargling
stomach_rumble
burping
hiccup
fart
hands
finger_snapping
clapping
heartbeat
heart_murmur
cheering
applause
chatter
crowd
speech
children_playing
animal
pets
dog
bark
yip
howl
bow-wow
growling
whimper_dog
cat
purr
meow
hiss
caterwaul
livestock
horse
clip-clop
neigh
cattle
moo
cowbell
pig
oink
goat
bleat
sheep
fowl
chicken
cluck
cock-a-doodle-doo
turkey
gobble
duck
quack
goose
honk
wild_animals
roaring_cats
roar
bird
chird
chirp
squawk
pigeon
coo
crow
caw
owl
hoot
flapping_wings
dogs
rats
mouse
patter
insect
cricket
mosquito
fly
buzz
buzz
frog
croak
snake
rattle
whale_vocalization
music
musical_instrument
plucked_string_instrument
guitar
electric_guitar
bass_guitar
acoustic_guitar
steel_guitar
tapping
strum
banjo
sitar
mandolin
zither
ukulele
keyboard
piano
electric_piano
organ
electronic_organ
hammond_organ
synthesizer
sampler
harpsichord
percussion
drum_kit
drum_machine
drum
snare_drum
rimshot
drum_roll
bass_drum
timpani
tabla
cymbal
hi-hat
wood_block
tambourine
rattle
maraca
gong
tubular_bells
mallet_percussion
marimba
glockenspiel
vibraphone
steelpan
orchestra
brass_instrument
french_horn
trumpet
trombone
bowed_string_instrument
string_section
violin
pizzicato
cello
double_bass
wind_instrument
flute
saxophone
clarinet
harp
bell
church_bell
jingle_bell
bicycle_bell
tuning_fork
chime
wind_chime
change_ringing
harmonica
accordion
bagpipes
didgeridoo
shofar
theremin
singing_bowl
scratching
pop_music
hip_hop_music
beatboxing
rock_music
heavy_metal
punk_rock
grunge
progressive_rock
rock_and_roll
psychedelic_rock
rhythm_and_blues
soul_music
reggae
country
swing_music
bluegrass
funk
folk_music
middle_eastern_music
jazz
disco
classical_music
opera
electronic_music
house_music
techno
dubstep
drum_and_bass
electronica
electronic_dance_music
ambient_music
trance_music
music_of_latin_america
salsa_music
flamenco
blues
music_for_children
new-age_music
vocal_music
a_capella
music_of_africa
afrobeat
christian_music
gospel_music
music_of_asia
carnatic_music
music_of_bollywood
ska
traditional_music
independent_music
song
background_music
theme_music
jingle
soundtrack_music
lullaby
video_game_music
christmas_music
dance_music
wedding_music
happy_music
sad_music
tender_music
exciting_music
angry_music
scary_music
wind
rustling_leaves
wind_noise
thunderstorm
thunder
water
rain
raindrop
rain_on_surface
stream
waterfall
ocean
waves
steam
gurgling
fire
crackle
vehicle
boat
sailboat
rowboat
motorboat
ship
motor_vehicle
car
honk
toot
car_alarm
power_windows
skidding
tire_squeal
car_passing_by
race_car
truck
air_brake
air_horn
reversing_beeps
ice_cream_truck
bus
emergency_vehicle
police_car
ambulance
fire_engine
motorcycle
traffic_noise
rail_transport
train
train_whistle
train_horn
railroad_car
train_wheels_squealing
subway
aircraft
aircraft_engine
jet_engine
propeller
helicopter
fixed-wing_aircraft
bicycle
skateboard
engine
light_engine
dental_drill's_drill
lawn_mower
chainsaw
medium_engine
heavy_engine
engine_knocking
engine_starting
idling
accelerating
door
doorbell
ding-dong
sliding_door
slam
knock
tap
squeak
cupboard_open_or_close
drawer_open_or_close
dishes
cutlery
chopping
frying
microwave_oven
blender
water_tap
sink
bathtub
hair_dryer
toilet_flush
toothbrush
electric_toothbrush
vacuum_cleaner
zipper
keys_jangling
coin
scissors
electric_shaver
shuffling_cards
typing
typewriter
computer_keyboard
writing
alarm
telephone
telephone_bell_ringing
ringtone
telephone_dialing
dial_tone
busy_signal
alarm_clock
siren
civil_defense_siren
buzzer
smoke_detector
fire_alarm
foghorn
whistle
steam_whistle
mechanisms
ratchet
clock
tick
tick-tock
gears
pulleys
sewing_machine
mechanical_fan
air_conditioning
cash_register
printer
camera
single-lens_reflex_camera
tools
hammer
jackhammer
sawing
filing
sanding
power_tool
drill
explosion
gunshot
machine_gun
fusillade
artillery_fire
cap_gun
fireworks
firecracker
burst
eruption
boom
wood
chop
splinter
crack
glass
chink
shatter
liquid
splash
slosh
squish
drip
pour
trickle
gush
fill
spray
pump
stir
boiling
sonar
arrow
whoosh
thump
thunk
electronic_tuner
effects_unit
chorus_effect
basketball_bounce
bang
slap
whack
smash
breaking
bouncing
whip
flap
scratch
scrape
rub
roll
crushing
crumpling
tearing
beep
ping
ding
clang
squeal
creak
rustle
whir
clatter
sizzle
clicking
clickety-clack
rumble
plop
jingle
hum
zing
boing
crunch
silence
sine_wave
harmonic
chirp_tone
sound_effect
pulse
inside
inside
inside
outside
outside
reverberation
echo
noise
environmental_noise
static
mains_hum
distortion
sidetone
cacophony
white_noise
pink_noise
throbbing
vibration
television
radio
field_recording

View File

@ -0,0 +1,63 @@
---
id: audio_detectors
title: Audio Detectors
---
Frigate provides a builtin audio detector which runs on the CPU. Compared to object detection in images, audio detection is a relatively lightweight operation so the only option is to run the detection on a CPU.
## Configuration
Audio events work by detecting a type of audio and creating an event, the event will end once the type of audio has not been heard for the configured amount of time. Audio events save a snapshot at the beginning of the event as well as recordings throughout the event. The recordings are retained using the configured recording retention.
### Enabling Audio Events
Audio events can be enabled for all cameras or only for specific cameras.
```yaml
audio: # <- enable audio events for all camera
enabled: True
cameras:
front_camera:
ffmpeg:
...
audio:
enabled: True # <- enable audio events for the front_camera
```
If you are using multiple streams then you must set the `audio` role on the stream that is going to be used for audio detection, this can be any stream but the stream must have audio included.
:::note
The ffmpeg process for capturing audio will be a separate connection to the camera along with the other roles assigned to the camera, for this reason it is recommended that the go2rtc restream is used for this purpose. See [the restream docs](/configuration/restream.md) for more information.
:::
```yaml
cameras:
front_camera:
ffmpeg:
inputs:
- path: rtsp://.../main_stream
roles:
- record
- path: rtsp://.../sub_stream # <- this stream must have audio enabled
roles:
- audio
- detect
```
### Configuring Audio Events
The included audio model has over 500 different types of audio that can be detected, many of which are not practical. By default `bark`, `speech`, `yell`, and `scream` are enabled but these can be customized.
```yaml
audio:
enabled: True
listen:
- bark
- scream
- speech
- yell
```

View File

@ -138,6 +138,20 @@ model:
labelmap:
2: vehicle
# Optional: Audio Events Configuration
# NOTE: Can be overridden at the camera level
audio:
# Optional: Enable audio events (default: shown below)
enabled: False
# Optional: Configure the amount of seconds without detected audio to end the event (default: shown below)
max_not_heard: 30
# Optional: Types of audio to listen for (default: shown below)
listen:
- bark
- scream
- speech
- yell
# Optional: logger verbosity settings
logger:
# Optional: Default log verbosity (default: shown below)

View File

@ -1,6 +1,6 @@
---
id: detectors
title: Detectors
id: object_detectors
title: Object Detectors
---
Frigate provides the following builtin detector types: `cpu`, `edgetpu`, `openvino`, and `tensorrt`. By default, Frigate will use a single CPU detector. Other detectors may require additional configuration as described below. When using multiple detectors they will run in dedicated processes, but pull from a common queue of detection requests from across all cameras.
@ -275,6 +275,6 @@ detectors:
api_timeout: 0.1 # seconds
```
Replace `<your_codeproject_ai_server_ip>` and `<port>` with the IP address and port of your CodeProject.AI server.
Replace `<your_codeproject_ai_server_ip>` and `<port>` with the IP address and port of your CodeProject.AI server.
To verify that the integration is working correctly, start Frigate and observe the logs for any error messages related to CodeProject.AI. Additionally, you can check the Frigate web interface to see if the objects detected by CodeProject.AI are being displayed and tracked properly.

View File

@ -67,6 +67,7 @@ cameras:
roles:
- record
- detect
- audio # <- only necessary if audio detection is enabled
http_cam:
ffmpeg:
output_args:
@ -77,6 +78,7 @@ cameras:
roles:
- record
- detect
- audio # <- only necessary if audio detection is enabled
```
### With Sub Stream
@ -112,6 +114,7 @@ cameras:
- path: rtsp://127.0.0.1:8554/rtsp_cam_sub # <--- the name here must match the name of the camera_sub in restream
input_args: preset-rtsp-restream
roles:
- audio # <- only necessary if audio detection is enabled
- detect
http_cam:
ffmpeg:
@ -125,6 +128,7 @@ cameras:
- path: rtsp://127.0.0.1:8554/http_cam_sub # <--- the name here must match the name of the camera_sub in restream
input_args: preset-rtsp-restream
roles:
- audio # <- only necessary if audio detection is enabled
- detect
```

View File

@ -50,7 +50,7 @@ The OpenVINO detector type is able to run on:
- 6th Gen Intel Platforms and newer that have an iGPU
- x86 & Arm64 hosts with VPU Hardware (ex: Intel NCS2)
More information is available [in the detector docs](/configuration/detectors#openvino-detector)
More information is available [in the detector docs](/configuration/object_detectors#openvino-detector)
Inference speeds vary greatly depending on the CPU, GPU, or VPU used, some known examples are below:
@ -72,7 +72,7 @@ Inference speeds vary greatly depending on the CPU, GPU, or VPU used, some known
### TensorRT
The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 11.x series of CUDA libraries. The minimum driver version on the host system must be `>=450.80.02`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/detectors#nvidia-tensorrt-detector).
The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 11.x series of CUDA libraries. The minimum driver version on the host system must be `>=450.80.02`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/object_detectors#nvidia-tensorrt-detector).
Inference speeds will vary greatly depending on the GPU and the model used.
`tiny` variants are faster than the equivalent non-tiny model, some known examples are below:

View File

@ -71,7 +71,7 @@ cameras:
...
```
More details on available detectors can be found [here](../configuration/detectors.md).
More details on available detectors can be found [here](../configuration/object_detectors.md).
Restart Frigate and you should start seeing detections for `person`. If you want to track other objects, they will need to be added according to the [configuration file reference](../configuration/index.md#full-configuration-reference).

View File

@ -16,7 +16,8 @@ module.exports = {
],
Configuration: [
"configuration/index",
"configuration/detectors",
"configuration/object_detectors",
"configuration/audio_detectors",
"configuration/cameras",
"configuration/masks",
"configuration/record",

View File

@ -29,6 +29,7 @@ from frigate.const import (
MODEL_CACHE_DIR,
RECORD_DIR,
)
from frigate.events.audio import listen_to_audio
from frigate.events.cleanup import EventCleanup
from frigate.events.external import ExternalEventProcessor
from frigate.events.maintainer import EventProcessor
@ -44,7 +45,7 @@ from frigate.record.record import manage_recordings
from frigate.stats import StatsEmitter, stats_init
from frigate.storage import StorageMaintainer
from frigate.timeline import TimelineProcessor
from frigate.types import CameraMetricsTypes, RecordMetricsTypes
from frigate.types import CameraMetricsTypes, FeatureMetricsTypes
from frigate.version import VERSION
from frigate.video import capture_camera, track_camera
from frigate.watchdog import FrigateWatchdog
@ -62,7 +63,7 @@ class FrigateApp:
self.log_queue: Queue = mp.Queue()
self.plus_api = PlusApi()
self.camera_metrics: dict[str, CameraMetricsTypes] = {}
self.record_metrics: dict[str, RecordMetricsTypes] = {}
self.feature_metrics: dict[str, FeatureMetricsTypes] = {}
self.processes: dict[str, int] = {}
def set_environment_vars(self) -> None:
@ -104,7 +105,7 @@ class FrigateApp:
user_config = FrigateConfig.parse_file(config_file)
self.config = user_config.runtime_config(self.plus_api)
for camera_name in self.config.cameras.keys():
for camera_name, camera_config in self.config.cameras.items():
# create camera_metrics
self.camera_metrics[camera_name] = {
"camera_fps": mp.Value("d", 0.0), # type: ignore[typeddict-item]
@ -159,13 +160,19 @@ class FrigateApp:
"capture_process": None,
"process": None,
}
self.record_metrics[camera_name] = {
self.feature_metrics[camera_name] = {
"audio_enabled": mp.Value( # type: ignore[typeddict-item]
# issue https://github.com/python/typeshed/issues/8799
# from mypy 0.981 onwards
"i",
self.config.cameras[camera_name].audio.enabled,
),
"record_enabled": mp.Value( # type: ignore[typeddict-item]
# issue https://github.com/python/typeshed/issues/8799
# from mypy 0.981 onwards
"i",
self.config.cameras[camera_name].record.enabled,
)
),
}
def set_log_levels(self) -> None:
@ -253,7 +260,7 @@ class FrigateApp:
recording_process = mp.Process(
target=manage_recordings,
name="recording_manager",
args=(self.config, self.recordings_info_queue, self.record_metrics),
args=(self.config, self.recordings_info_queue, self.feature_metrics),
)
recording_process.daemon = True
self.recording_process = recording_process
@ -312,7 +319,7 @@ class FrigateApp:
self.config,
self.onvif_controller,
self.camera_metrics,
self.record_metrics,
self.feature_metrics,
comms,
)
@ -421,6 +428,17 @@ class FrigateApp:
capture_process.start()
logger.info(f"Capture process started for {name}: {capture_process.pid}")
def start_audio_processors(self) -> None:
if len([c for c in self.config.cameras.values() if c.audio.enabled]) > 0:
audio_process = mp.Process(
target=listen_to_audio,
name="audio_capture",
args=(self.config, self.feature_metrics),
)
audio_process.daemon = True
audio_process.start()
logger.info(f"Audio process started: {audio_process.pid}")
def start_timeline_processor(self) -> None:
self.timeline_processor = TimelineProcessor(
self.config, self.timeline_queue, self.stop_event
@ -517,6 +535,7 @@ class FrigateApp:
self.start_detected_frames_processor()
self.start_camera_processors()
self.start_camera_capture_processes()
self.start_audio_processors()
self.start_storage_maintainer()
self.init_stats()
self.init_external_event_processor()

View File

@ -6,7 +6,7 @@ from typing import Any, Callable
from frigate.config import FrigateConfig
from frigate.ptz import OnvifCommandEnum, OnvifController
from frigate.types import CameraMetricsTypes, RecordMetricsTypes
from frigate.types import CameraMetricsTypes, FeatureMetricsTypes
from frigate.util import restart_frigate
logger = logging.getLogger(__name__)
@ -39,19 +39,20 @@ class Dispatcher:
config: FrigateConfig,
onvif: OnvifController,
camera_metrics: dict[str, CameraMetricsTypes],
record_metrics: dict[str, RecordMetricsTypes],
feature_metrics: dict[str, FeatureMetricsTypes],
communicators: list[Communicator],
) -> None:
self.config = config
self.onvif = onvif
self.camera_metrics = camera_metrics
self.record_metrics = record_metrics
self.feature_metrics = feature_metrics
self.comms = communicators
for comm in self.comms:
comm.subscribe(self._receive)
self._camera_settings_handlers: dict[str, Callable] = {
"audio": self._on_audio_command,
"detect": self._on_detect_command,
"improve_contrast": self._on_motion_improve_contrast_command,
"motion": self._on_motion_command,
@ -186,6 +187,29 @@ class Dispatcher:
motion_settings.threshold = payload # type: ignore[union-attr]
self.publish(f"{camera_name}/motion_threshold/state", payload, retain=True)
def _on_audio_command(self, camera_name: str, payload: str) -> None:
"""Callback for audio topic."""
audio_settings = self.config.cameras[camera_name].audio
if payload == "ON":
if not self.config.cameras[camera_name].audio.enabled_in_config:
logger.error(
"Audio detection must be enabled in the config to be turned on via MQTT."
)
return
if not audio_settings.enabled:
logger.info(f"Turning on audio detection for {camera_name}")
audio_settings.enabled = True
self.feature_metrics[camera_name]["audio_enabled"].value = True
elif payload == "OFF":
if self.feature_metrics[camera_name]["audio_enabled"].value:
logger.info(f"Turning off audio detection for {camera_name}")
audio_settings.enabled = False
self.feature_metrics[camera_name]["audio_enabled"].value = False
self.publish(f"{camera_name}/audio/state", payload, retain=True)
def _on_recordings_command(self, camera_name: str, payload: str) -> None:
"""Callback for recordings topic."""
record_settings = self.config.cameras[camera_name].record
@ -200,12 +224,12 @@ class Dispatcher:
if not record_settings.enabled:
logger.info(f"Turning on recordings for {camera_name}")
record_settings.enabled = True
self.record_metrics[camera_name]["record_enabled"].value = True
self.feature_metrics[camera_name]["record_enabled"].value = True
elif payload == "OFF":
if self.record_metrics[camera_name]["record_enabled"].value:
if self.feature_metrics[camera_name]["record_enabled"].value:
logger.info(f"Turning off recordings for {camera_name}")
record_settings.enabled = False
self.record_metrics[camera_name]["record_enabled"].value = False
self.feature_metrics[camera_name]["record_enabled"].value = False
self.publish(f"{camera_name}/recordings/state", payload, retain=True)

View File

@ -41,7 +41,7 @@ class MqttClient(Communicator): # type: ignore[misc]
for camera_name, camera in self.config.cameras.items():
self.publish(
f"{camera_name}/recordings/state",
"ON" if camera.record.enabled else "OFF",
"ON" if camera.record.enabled_in_config else "OFF",
retain=True,
)
self.publish(
@ -49,6 +49,11 @@ class MqttClient(Communicator): # type: ignore[misc]
"ON" if camera.snapshots.enabled else "OFF",
retain=True,
)
self.publish(
f"{camera_name}/audio/state",
"ON" if camera.audio.enabled_in_config else "OFF",
retain=True,
)
self.publish(
f"{camera_name}/detect/state",
"ON" if camera.detect.enabled else "OFF",

View File

@ -40,6 +40,7 @@ DEFAULT_TIME_FORMAT = "%m/%d/%Y %H:%M:%S"
FRIGATE_ENV_VARS = {k: v for k, v in os.environ.items() if k.startswith("FRIGATE_")}
DEFAULT_TRACKED_OBJECTS = ["person"]
DEFAULT_LISTEN_AUDIO = ["bark", "speech", "yell", "scream"]
DEFAULT_DETECTORS = {"cpu": {"type": "cpu"}}
@ -387,6 +388,19 @@ class ObjectConfig(FrigateBaseModel):
mask: Union[str, List[str]] = Field(default="", title="Object mask.")
class AudioConfig(FrigateBaseModel):
enabled: bool = Field(default=False, title="Enable audio events.")
max_not_heard: int = Field(
default=30, title="Seconds of not hearing the type of audio to end the event."
)
listen: List[str] = Field(
default=DEFAULT_LISTEN_AUDIO, title="Audio to listen for."
)
enabled_in_config: Optional[bool] = Field(
title="Keep track of original state of audio detection."
)
class BirdseyeModeEnum(str, Enum):
objects = "objects"
motion = "motion"
@ -470,6 +484,7 @@ class FfmpegConfig(FrigateBaseModel):
class CameraRoleEnum(str, Enum):
audio = "audio"
record = "record"
rtmp = "rtmp"
detect = "detect"
@ -631,6 +646,9 @@ class CameraConfig(FrigateBaseModel):
objects: ObjectConfig = Field(
default_factory=ObjectConfig, title="Object configuration."
)
audio: AudioConfig = Field(
default_factory=AudioConfig, title="Audio events configuration."
)
motion: Optional[MotionConfig] = Field(title="Motion detection configuration.")
detect: DetectConfig = Field(
default_factory=DetectConfig, title="Object detection configuration."
@ -661,12 +679,16 @@ class CameraConfig(FrigateBaseModel):
# add roles to the input if there is only one
if len(config["ffmpeg"]["inputs"]) == 1:
has_rtmp = "rtmp" in config["ffmpeg"]["inputs"][0].get("roles", [])
has_audio = "audio" in config["ffmpeg"]["inputs"][0].get("roles", [])
config["ffmpeg"]["inputs"][0]["roles"] = [
"record",
"detect",
]
if has_audio:
config["ffmpeg"]["inputs"][0]["roles"].append("audio")
if has_rtmp:
config["ffmpeg"]["inputs"][0]["roles"].append("rtmp")
@ -799,6 +821,11 @@ def verify_config_roles(camera_config: CameraConfig) -> None:
f"Camera {camera_config.name} has rtmp enabled, but rtmp is not assigned to an input."
)
if camera_config.audio.enabled and "audio" not in assigned_roles:
raise ValueError(
f"Camera {camera_config.name} has audio events enabled, but audio is not assigned to an input."
)
def verify_valid_live_stream_name(
frigate_config: FrigateConfig, camera_config: CameraConfig
@ -911,6 +938,9 @@ class FrigateConfig(FrigateBaseModel):
objects: ObjectConfig = Field(
default_factory=ObjectConfig, title="Global object configuration."
)
audio: AudioConfig = Field(
default_factory=AudioConfig, title="Global Audio events configuration."
)
motion: Optional[MotionConfig] = Field(
title="Global motion detection configuration."
)
@ -935,6 +965,7 @@ class FrigateConfig(FrigateBaseModel):
# Global config to propagate down to camera level
global_config = config.dict(
include={
"audio": ...,
"birdseye": ...,
"record": ...,
"snapshots": ...,
@ -980,8 +1011,9 @@ class FrigateConfig(FrigateBaseModel):
camera_config.onvif.password = camera_config.onvif.password.format(
**FRIGATE_ENV_VARS
)
# set config recording value
# set config pre-value
camera_config.record.enabled_in_config = camera_config.record.enabled
camera_config.audio.enabled_in_config = camera_config.audio.enabled
# Add default filters
object_keys = camera_config.objects.track

View File

@ -8,6 +8,7 @@ EXPORT_DIR = f"{BASE_DIR}/exports"
BIRDSEYE_PIPE = "/tmp/cache/birdseye"
CACHE_DIR = "/tmp/cache"
YAML_EXT = (".yaml", ".yml")
FRIGATE_LOCALHOST = "http://127.0.0.1:5000"
PLUS_ENV_VAR = "PLUS_API_KEY"
PLUS_API_HOST = "https://api.frigate.video"
BTBN_PATH = "/usr/lib/btbn-ffmpeg"
@ -22,6 +23,13 @@ ALL_ATTRIBUTE_LABELS = [
item for sublist in ATTRIBUTE_LABEL_MAP.values() for item in sublist
]
# Audio Consts
AUDIO_DURATION = 0.975
AUDIO_FORMAT = "s16le"
AUDIO_MAX_BIT_RANGE = 32768.0
AUDIO_SAMPLE_RATE = 16000
# Regex Consts
REGEX_CAMERA_NAME = r"^[a-zA-Z0-9_-]+$"

247
frigate/events/audio.py Normal file
View File

@ -0,0 +1,247 @@
"""Handle creating audio events."""
import datetime
import logging
import multiprocessing as mp
import os
import signal
import threading
from types import FrameType
from typing import Optional
import numpy as np
import requests
from setproctitle import setproctitle
from frigate.config import CameraConfig, FrigateConfig
from frigate.const import (
AUDIO_DURATION,
AUDIO_FORMAT,
AUDIO_MAX_BIT_RANGE,
AUDIO_SAMPLE_RATE,
CACHE_DIR,
FRIGATE_LOCALHOST,
)
from frigate.ffmpeg_presets import parse_preset_input
from frigate.log import LogPipe
from frigate.object_detection import load_labels
from frigate.types import FeatureMetricsTypes
from frigate.util import get_ffmpeg_arg_list, listen
from frigate.video import start_or_restart_ffmpeg, stop_ffmpeg
try:
from tflite_runtime.interpreter import Interpreter
except ModuleNotFoundError:
from tensorflow.lite.python.interpreter import Interpreter
logger = logging.getLogger(__name__)
def get_ffmpeg_command(input_args: list[str], input_path: str, pipe: str) -> list[str]:
return get_ffmpeg_arg_list(
f"ffmpeg {{}} -i {{}} -f {AUDIO_FORMAT} -ar {AUDIO_SAMPLE_RATE} -ac 1 -y {{}}".format(
" ".join(input_args),
input_path,
pipe,
)
)
def listen_to_audio(
config: FrigateConfig,
process_info: dict[str, FeatureMetricsTypes],
) -> None:
stop_event = mp.Event()
audio_threads: list[threading.Thread] = []
def exit_process() -> None:
for thread in audio_threads:
thread.join()
logger.info("Exiting audio detector...")
def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None:
stop_event.set()
exit_process()
signal.signal(signal.SIGTERM, receiveSignal)
signal.signal(signal.SIGINT, receiveSignal)
threading.current_thread().name = "process:audio_manager"
setproctitle("frigate.audio_manager")
listen()
for camera in config.cameras.values():
if camera.enabled and camera.audio.enabled_in_config:
audio = AudioEventMaintainer(camera, process_info, stop_event)
audio_threads.append(audio)
audio.start()
class AudioTfl:
def __init__(self, stop_event: mp.Event):
self.stop_event = stop_event
self.labels = load_labels("/audio-labelmap.txt")
self.interpreter = Interpreter(
model_path="/cpu_audio_model.tflite",
num_threads=2,
)
self.interpreter.allocate_tensors()
self.tensor_input_details = self.interpreter.get_input_details()
self.tensor_output_details = self.interpreter.get_output_details()
def _detect_raw(self, tensor_input):
self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
self.interpreter.invoke()
detections = np.zeros((20, 6), np.float32)
res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
non_zero_indices = res > 0
class_ids = np.argpartition(-res, 20)[:20]
class_ids = class_ids[np.argsort(-res[class_ids])]
class_ids = class_ids[non_zero_indices[class_ids]]
scores = res[class_ids]
boxes = np.full((scores.shape[0], 4), -1, np.float32)
count = len(scores)
for i in range(count):
if scores[i] < 0.4 or i == 20:
break
detections[i] = [
class_ids[i],
float(scores[i]),
boxes[i][0],
boxes[i][1],
boxes[i][2],
boxes[i][3],
]
return detections
def detect(self, tensor_input, threshold=0.8):
detections = []
if self.stop_event.is_set():
return detections
raw_detections = self._detect_raw(tensor_input)
for d in raw_detections:
if d[1] < threshold:
break
detections.append(
(self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5]))
)
return detections
class AudioEventMaintainer(threading.Thread):
def __init__(
self,
camera: CameraConfig,
feature_metrics: dict[str, FeatureMetricsTypes],
stop_event: mp.Event,
) -> None:
threading.Thread.__init__(self)
self.name = f"{camera.name}_audio_event_processor"
self.config = camera
self.feature_metrics = feature_metrics
self.detections: dict[dict[str, any]] = feature_metrics
self.stop_event = stop_event
self.detector = AudioTfl(stop_event)
self.shape = (int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE)),)
self.chunk_size = int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE * 2))
self.pipe = f"{CACHE_DIR}/{self.config.name}-audio"
self.ffmpeg_cmd = get_ffmpeg_command(
get_ffmpeg_arg_list(self.config.ffmpeg.global_args)
+ parse_preset_input("preset-rtsp-audio-only", 1),
[i.path for i in self.config.ffmpeg.inputs if "audio" in i.roles][0],
self.pipe,
)
self.pipe_file = None
self.logpipe = LogPipe(f"ffmpeg.{self.config.name}.audio")
self.audio_listener = None
def detect_audio(self, audio) -> None:
if not self.feature_metrics[self.config.name]["audio_enabled"].value:
return
waveform = (audio / AUDIO_MAX_BIT_RANGE).astype(np.float32)
model_detections = self.detector.detect(waveform)
for label, score, _ in model_detections:
if label not in self.config.audio.listen:
continue
self.handle_detection(label, score)
self.expire_detections()
def handle_detection(self, label: str, score: float) -> None:
if self.detections.get(label):
self.detections[label][
"last_detection"
] = datetime.datetime.now().timestamp()
else:
resp = requests.post(
f"{FRIGATE_LOCALHOST}/api/events/{self.config.name}/{label}/create",
json={"duration": None},
)
if resp.status_code == 200:
event_id = resp.json()[0]["event_id"]
self.detections[label] = {
"id": event_id,
"label": label,
"last_detection": datetime.datetime.now().timestamp(),
}
def expire_detections(self) -> None:
now = datetime.datetime.now().timestamp()
for detection in self.detections.values():
if (
now - detection.get("last_detection", now)
> self.config.audio.max_not_heard
):
self.detections[detection["label"]] = None
requests.put(
f"{FRIGATE_LOCALHOST}/api/events/{detection['id']}/end",
json={
"end_time": detection["last_detection"]
+ self.config.record.events.post_capture
},
)
def restart_audio_pipe(self) -> None:
try:
os.mkfifo(self.pipe)
except FileExistsError:
pass
self.audio_listener = start_or_restart_ffmpeg(
self.ffmpeg_cmd, logger, self.logpipe, None, self.audio_listener
)
def read_audio(self) -> None:
if self.pipe_file is None:
self.pipe_file = open(self.pipe, "rb")
try:
audio = np.frombuffer(self.pipe_file.read(self.chunk_size), dtype=np.int16)
self.detect_audio(audio)
except BrokenPipeError:
self.logpipe.dump()
self.restart_audio_pipe()
def run(self) -> None:
self.restart_audio_pipe()
while not self.stop_event.is_set():
self.read_audio()
self.pipe_file.close()
stop_ffmpeg(self.audio_listener, logger)
self.logpipe.close()

View File

@ -67,11 +67,10 @@ class ExternalEventProcessor:
return event_id
def finish_manual_event(self, event_id: str) -> None:
def finish_manual_event(self, event_id: str, end_time: float) -> None:
"""Finish external event with indeterminate duration."""
now = datetime.datetime.now().timestamp()
self.queue.put(
(EventTypeEnum.api, "end", None, {"id": event_id, "end_time": now})
(EventTypeEnum.api, "end", None, {"id": event_id, "end_time": end_time})
)
def _write_images(

View File

@ -18,7 +18,6 @@ logger = logging.getLogger(__name__)
class EventTypeEnum(str, Enum):
api = "api"
# audio = "audio"
tracked_object = "tracked_object"
@ -73,19 +72,21 @@ class EventProcessor(threading.Thread):
except queue.Empty:
continue
logger.debug(f"Event received: {event_type} {camera} {event_data['id']}")
self.timeline_queue.put(
(
camera,
source_type,
event_type,
self.events_in_process.get(event_data["id"]),
event_data,
)
logger.debug(
f"Event received: {source_type} {event_type} {camera} {event_data['id']}"
)
if source_type == EventTypeEnum.tracked_object:
self.timeline_queue.put(
(
camera,
source_type,
event_type,
self.events_in_process.get(event_data["id"]),
event_data,
)
)
if event_type == "start":
self.events_in_process[event_data["id"]] = event_data
continue
@ -215,7 +216,7 @@ class EventProcessor(threading.Thread):
del self.events_in_process[event_data["id"]]
self.event_processed_queue.put((event_data["id"], camera))
def handle_external_detection(self, type: str, event_data: Event):
def handle_external_detection(self, type: str, event_data: Event) -> None:
if type == "new":
event = {
Event.id: event_data["id"],
@ -230,20 +231,14 @@ class EventProcessor(threading.Thread):
Event.zones: [],
Event.data: {},
}
Event.insert(event).execute()
elif type == "end":
event = {
Event.id: event_data["id"],
Event.end_time: event_data["end_time"],
}
try:
(
Event.insert(event)
.on_conflict(
conflict_target=[Event.id],
update=event,
)
.execute()
)
except Exception:
logger.warning(f"Failed to update manual event: {event_data['id']}")
try:
Event.update(event).execute()
except Exception:
logger.warning(f"Failed to update manual event: {event_data['id']}")

View File

@ -282,6 +282,13 @@ PRESETS_INPUT = {
"-use_wallclock_as_timestamps",
"1",
],
"preset-rtsp-audio-only": [
"-rtsp_transport",
"tcp",
TIMEOUT_PARAM,
"5000000",
"-vn",
],
"preset-rtsp-restream": _user_agent_args
+ [
"-rtsp_transport",

View File

@ -908,8 +908,11 @@ def create_event(camera_name, label):
@bp.route("/events/<event_id>/end", methods=["PUT"])
def end_event(event_id):
json: dict[str, any] = request.get_json(silent=True) or {}
try:
current_app.external_processor.finish_manual_event(event_id)
end_time = json.get("end_time", datetime.now().timestamp())
current_app.external_processor.finish_manual_event(event_id, end_time)
except Exception:
return jsonify(
{"success": False, "message": f"{event_id} must be set and valid."}, 404

View File

@ -156,7 +156,12 @@ class BroadcastThread(threading.Thread):
class BirdsEyeFrameManager:
def __init__(self, config: FrigateConfig, frame_manager: SharedMemoryFrameManager):
def __init__(
self,
config: FrigateConfig,
frame_manager: SharedMemoryFrameManager,
stop_event: mp.Event,
):
self.config = config
self.mode = config.birdseye.mode
self.frame_manager = frame_manager
@ -165,6 +170,7 @@ class BirdsEyeFrameManager:
self.frame_shape = (height, width)
self.yuv_shape = (height * 3 // 2, width)
self.frame = np.ndarray(self.yuv_shape, dtype=np.uint8)
self.stop_event = stop_event
# initialize the frame as black and with the Frigate logo
self.blank_frame = np.zeros(self.yuv_shape, np.uint8)
@ -458,6 +464,9 @@ class BirdsEyeFrameManager:
# decrease scaling coefficient until height of all cameras can fit into the birdseye canvas
while calculating:
if self.stop_event.is_set():
return
layout_candidate = calculate_layout(
(canvas_width, canvas_height),
active_cameras_to_add,
@ -580,7 +589,7 @@ def output_frames(config: FrigateConfig, video_output_queue):
for t in broadcasters.values():
t.start()
birdseye_manager = BirdsEyeFrameManager(config, frame_manager)
birdseye_manager = BirdsEyeFrameManager(config, frame_manager, stop_event)
if config.birdseye.restream:
birdseye_buffer = frame_manager.create(

View File

@ -20,7 +20,7 @@ import psutil
from frigate.config import FrigateConfig, RetainModeEnum
from frigate.const import CACHE_DIR, MAX_SEGMENT_DURATION, RECORD_DIR
from frigate.models import Event, Recordings
from frigate.types import RecordMetricsTypes
from frigate.types import FeatureMetricsTypes
from frigate.util import area, get_video_properties
logger = logging.getLogger(__name__)
@ -31,7 +31,7 @@ class RecordingMaintainer(threading.Thread):
self,
config: FrigateConfig,
recordings_info_queue: mp.Queue,
process_info: dict[str, RecordMetricsTypes],
process_info: dict[str, FeatureMetricsTypes],
stop_event: MpEvent,
):
threading.Thread.__init__(self)

View File

@ -14,7 +14,7 @@ from frigate.config import FrigateConfig
from frigate.models import Event, Recordings, RecordingsToDelete, Timeline
from frigate.record.cleanup import RecordingCleanup
from frigate.record.maintainer import RecordingMaintainer
from frigate.types import RecordMetricsTypes
from frigate.types import FeatureMetricsTypes
from frigate.util import listen
logger = logging.getLogger(__name__)
@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
def manage_recordings(
config: FrigateConfig,
recordings_info_queue: mp.Queue,
process_info: dict[str, RecordMetricsTypes],
process_info: dict[str, FeatureMetricsTypes],
) -> None:
stop_event = mp.Event()

View File

@ -25,7 +25,8 @@ class CameraMetricsTypes(TypedDict):
skipped_fps: Synchronized
class RecordMetricsTypes(TypedDict):
class FeatureMetricsTypes(TypedDict):
audio_enabled: Synchronized
record_enabled: Synchronized

View File

@ -16,6 +16,7 @@ export const handlers = [
front: {
name: 'front',
objects: { track: ['taco', 'cat', 'dog'] },
audio: { enabled: false, enabled_in_config: false },
record: { enabled: true, enabled_in_config: true },
detect: { width: 1280, height: 720 },
snapshots: {},
@ -25,6 +26,7 @@ export const handlers = [
side: {
name: 'side',
objects: { track: ['taco', 'cat', 'dog'] },
audio: { enabled: false, enabled_in_config: false },
record: { enabled: false, enabled_in_config: true },
detect: { width: 1280, height: 720 },
snapshots: {},

View File

@ -113,8 +113,8 @@ describe('WsProvider', () => {
vi.spyOn(Date, 'now').mockReturnValue(123456);
const config = {
cameras: {
front: { name: 'front', detect: { enabled: true }, record: { enabled: false }, snapshots: { enabled: true } },
side: { name: 'side', detect: { enabled: false }, record: { enabled: false }, snapshots: { enabled: false } },
front: { name: 'front', detect: { enabled: true }, record: { enabled: false }, snapshots: { enabled: true }, audio: { enabled: false } },
side: { name: 'side', detect: { enabled: false }, record: { enabled: false }, snapshots: { enabled: false }, audio: { enabled: false } },
},
};
render(

View File

@ -41,10 +41,11 @@ export function WsProvider({
useEffect(() => {
Object.keys(config.cameras).forEach((camera) => {
const { name, record, detect, snapshots } = config.cameras[camera];
const { name, record, detect, snapshots, audio } = config.cameras[camera];
dispatch({ topic: `${name}/recordings/state`, payload: record.enabled ? 'ON' : 'OFF', retain: false });
dispatch({ topic: `${name}/detect/state`, payload: detect.enabled ? 'ON' : 'OFF', retain: false });
dispatch({ topic: `${name}/snapshots/state`, payload: snapshots.enabled ? 'ON' : 'OFF', retain: false });
dispatch({ topic: `${name}/audio/state`, payload: audio.enabled ? 'ON' : 'OFF', retain: false });
});
}, [config]);
@ -120,6 +121,15 @@ export function useSnapshotsState(camera) {
return { payload, send, connected };
}
export function useAudioState(camera) {
const {
value: { payload },
send,
connected,
} = useWs(`${camera}/audio/state`, `${camera}/audio/set`);
return { payload, send, connected };
}
export function usePtzCommand(camera) {
const {
value: { payload },

36
web/src/icons/Audio.jsx Normal file
View File

@ -0,0 +1,36 @@
import { h } from 'preact';
import { memo } from 'preact/compat';
export function Snapshot({ className = 'h-6 w-6', stroke = 'currentColor', onClick = () => {} }) {
return (
<svg
xmlns="http://www.w3.org/2000/svg"
className={className}
fill="none"
viewBox="0 0 32 32"
stroke={stroke}
onClick={onClick}
>
<path
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="2"
d="M18 30v-2a10.011 10.011 0 0010-10h2a12.013 12.013 0 01-12 12z"
/>
<path
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="2"
d="M18 22v-2a2.002 2.002 0 002-2h2a4.004 4.004 0 01-4 4zM10 2a9.01 9.01 0 00-9 9h2a7 7 0 0114 0 7.09 7.09 0 01-3.501 6.135l-.499.288v3.073a2.935 2.935 0 01-.9 2.151 4.182 4.182 0 01-4.633 1.03A4.092 4.092 0 015 20H3a6.116 6.116 0 003.67 5.512 5.782 5.782 0 002.314.486 6.585 6.585 0 004.478-1.888A4.94 4.94 0 0015 20.496v-1.942A9.108 9.108 0 0019 11a9.01 9.01 0 00-9-9z"
/>
<path
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="2"
d="M9.28 8.082A3.006 3.006 0 0113 11h2a4.979 4.979 0 00-1.884-3.911 5.041 5.041 0 00-4.281-.957 4.95 4.95 0 00-3.703 3.703 5.032 5.032 0 002.304 5.458A3.078 3.078 0 019 17.924V20h2v-2.077a5.06 5.06 0 00-2.537-4.346 3.002 3.002 0 01.817-5.494z"
/>
</svg>
);
}
export default memo(Snapshot);

View File

@ -2,10 +2,11 @@ import { h, Fragment } from 'preact';
import ActivityIndicator from '../components/ActivityIndicator';
import Card from '../components/Card';
import CameraImage from '../components/CameraImage';
import AudioIcon from '../icons/Audio';
import ClipIcon from '../icons/Clip';
import MotionIcon from '../icons/Motion';
import SnapshotIcon from '../icons/Snapshot';
import { useDetectState, useRecordingsState, useSnapshotsState } from '../api/ws';
import { useAudioState, useDetectState, useRecordingsState, useSnapshotsState } from '../api/ws';
import { useMemo } from 'preact/hooks';
import useSWR from 'swr';
@ -43,6 +44,7 @@ function Camera({ name, config }) {
const { payload: detectValue, send: sendDetect } = useDetectState(name);
const { payload: recordValue, send: sendRecordings } = useRecordingsState(name);
const { payload: snapshotValue, send: sendSnapshots } = useSnapshotsState(name);
const { payload: audioValue, send: sendAudio } = useAudioState(name);
const href = `/cameras/${name}`;
const buttons = useMemo(() => {
return [
@ -50,10 +52,9 @@ function Camera({ name, config }) {
{ name: 'Recordings', href: `/recording/${name}` },
];
}, [name]);
const cleanName = useMemo(
() => { return `${name.replaceAll('_', ' ')}` },
[name]
);
const cleanName = useMemo(() => {
return `${name.replaceAll('_', ' ')}`;
}, [name]);
const icons = useMemo(
() => [
{
@ -65,7 +66,9 @@ function Camera({ name, config }) {
},
},
{
name: config.record.enabled_in_config ? `Toggle recordings ${recordValue === 'ON' ? 'off' : 'on'}` : 'Recordings must be enabled in the config to be turned on in the UI.',
name: config.record.enabled_in_config
? `Toggle recordings ${recordValue === 'ON' ? 'off' : 'on'}`
: 'Recordings must be enabled in the config to be turned on in the UI.',
icon: ClipIcon,
color: config.record.enabled_in_config ? (recordValue === 'ON' ? 'blue' : 'gray') : 'red',
onClick: () => {
@ -82,11 +85,27 @@ function Camera({ name, config }) {
sendSnapshots(snapshotValue === 'ON' ? 'OFF' : 'ON', true);
},
},
],
[config, detectValue, sendDetect, recordValue, sendRecordings, snapshotValue, sendSnapshots]
config.audio.enabled_in_config
? {
name: `Toggle audio detection ${audioValue === 'ON' ? 'off' : 'on'}`,
icon: AudioIcon,
color: audioValue === 'ON' ? 'blue' : 'gray',
onClick: () => {
sendAudio(audioValue === 'ON' ? 'OFF' : 'ON', true);
},
}
: null,
].filter((button) => button != null),
[config, audioValue, sendAudio, detectValue, sendDetect, recordValue, sendRecordings, snapshotValue, sendSnapshots]
);
return (
<Card buttons={buttons} href={href} header={cleanName} icons={icons} media={<CameraImage camera={name} stretch />} />
<Card
buttons={buttons}
href={href}
header={cleanName}
icons={icons}
media={<CameraImage camera={name} stretch />}
/>
);
}