Audio events (#6848)

* Initial audio classification model implementation * fix mypy * Keep audio labelmap local * Cleanup * Start adding config for audio * Add the detector * Add audio detection process keypoints * Build out base config * Load labelmap correctly * Fix config bugs * Start audio process * Fix startup issues * Try to cleanup restarting * Add ffmpeg input args * Get audio detection working * Save event to db * End events if not heard for 30 seconds * Use not heard config * Stop ffmpeg when shutting down * Fixes * End events correctly * Use api instead of event queue to save audio events * Get events working * Close threads when stop event is sent * remove unused * Only start audio process if at least one camera is enabled * Add const for float * Cleanup labelmap * Add audio icon in frontend * Add ability to toggle audio with mqtt * Set initial audio value * Fix audio enabling * Close logpipe * Isort * Formatting * Fix web tests * Fix web tests * Handle cases where args are a string * Remove log * Cleanup process close * Use correct field * Simplify if statement * Use var for localhost * Add audio detectors docs * Add restream docs to mention audio detection * Add full config docs * Fix links to other docs --------- Co-authored-by: Jason Hunter <hunterjm@gmail.com>
2025-07-26 13:47:03 +02:00 · 2023-07-01 07:18:33 -06:00 · 2023-07-01 07:18:33 -06:00 · c3b313a70d
commit c3b313a70d
parent f1dc3a639c
28 changed files with 1090 additions and 69 deletions
--- a/4
+++ b/4
@ -98,7 +98,9 @@ COPY labelmap.txt .
 COPY --from=ov-converter /models/public/ssdlite_mobilenet_v2/FP16 openvino-model
 RUN wget -q https://github.com/openvinotoolkit/open_model_zoo/raw/master/data/dataset_classes/coco_91cl_bkgr.txt -O openvino-model/coco_91cl_bkgr.txt && \
    sed -i 's/truck/car/g' openvino-model/coco_91cl_bkgr.txt
-
+# Get Audio Model and labels
+RUN wget -qO cpu_audio_model.tflite https://tfhub.dev/google/lite-model/yamnet/classification/tflite/1?lite-format=tflite
+COPY audio-labelmap.txt .


 FROM wget AS s6-overlay
--- a/audio-labelmap.txt
+++ b/audio-labelmap.txt
@ -0,0 +1,521 @@
+speech
+speech
+speech
+speech
+babbling
+speech
+yell
+bellow
+whoop
+yell
+yell
+yell
+whispering
+laughter
+laughter
+laughter
+snicker
+laughter
+laughter
+crying
+crying
+crying
+yell
+sigh
+singing
+choir
+sodeling
+chant
+mantra
+child_singing
+synthetic_singing
+rapping
+humming
+groan
+grunt
+whistling
+breathing
+wheeze
+snoring
+gasp
+pant
+snort
+cough
+throat_clearing
+sneeze
+sniff
+run
+shuffle
+footsteps
+chewing
+biting
+gargling
+stomach_rumble
+burping
+hiccup
+fart
+hands
+finger_snapping
+clapping
+heartbeat
+heart_murmur
+cheering
+applause
+chatter
+crowd
+speech
+children_playing
+animal
+pets
+dog
+bark
+yip
+howl
+bow-wow
+growling
+whimper_dog
+cat
+purr
+meow
+hiss
+caterwaul
+livestock
+horse
+clip-clop
+neigh
+cattle
+moo
+cowbell
+pig
+oink
+goat
+bleat
+sheep
+fowl
+chicken
+cluck
+cock-a-doodle-doo
+turkey
+gobble
+duck
+quack
+goose
+honk
+wild_animals
+roaring_cats
+roar
+bird
+chird
+chirp
+squawk
+pigeon
+coo
+crow
+caw
+owl
+hoot
+flapping_wings
+dogs
+rats
+mouse
+patter
+insect
+cricket
+mosquito
+fly
+buzz
+buzz
+frog
+croak
+snake
+rattle
+whale_vocalization
+music
+musical_instrument
+plucked_string_instrument
+guitar
+electric_guitar
+bass_guitar
+acoustic_guitar
+steel_guitar
+tapping
+strum
+banjo
+sitar
+mandolin
+zither
+ukulele
+keyboard
+piano
+electric_piano
+organ
+electronic_organ
+hammond_organ
+synthesizer
+sampler
+harpsichord
+percussion
+drum_kit
+drum_machine
+drum
+snare_drum
+rimshot
+drum_roll
+bass_drum
+timpani
+tabla
+cymbal
+hi-hat
+wood_block
+tambourine
+rattle
+maraca
+gong
+tubular_bells
+mallet_percussion
+marimba
+glockenspiel
+vibraphone
+steelpan
+orchestra
+brass_instrument
+french_horn
+trumpet
+trombone
+bowed_string_instrument
+string_section
+violin
+pizzicato
+cello
+double_bass
+wind_instrument
+flute
+saxophone
+clarinet
+harp
+bell
+church_bell
+jingle_bell
+bicycle_bell
+tuning_fork
+chime
+wind_chime
+change_ringing
+harmonica
+accordion
+bagpipes
+didgeridoo
+shofar
+theremin
+singing_bowl
+scratching
+pop_music
+hip_hop_music
+beatboxing
+rock_music
+heavy_metal
+punk_rock
+grunge
+progressive_rock
+rock_and_roll
+psychedelic_rock
+rhythm_and_blues
+soul_music
+reggae
+country
+swing_music
+bluegrass
+funk
+folk_music
+middle_eastern_music
+jazz
+disco
+classical_music
+opera
+electronic_music
+house_music
+techno
+dubstep
+drum_and_bass
+electronica
+electronic_dance_music
+ambient_music
+trance_music
+music_of_latin_america
+salsa_music
+flamenco
+blues
+music_for_children
+new-age_music
+vocal_music
+a_capella
+music_of_africa
+afrobeat
+christian_music
+gospel_music
+music_of_asia
+carnatic_music
+music_of_bollywood
+ska
+traditional_music
+independent_music
+song
+background_music
+theme_music
+jingle
+soundtrack_music
+lullaby
+video_game_music
+christmas_music
+dance_music
+wedding_music
+happy_music
+sad_music
+tender_music
+exciting_music
+angry_music
+scary_music
+wind
+rustling_leaves
+wind_noise
+thunderstorm
+thunder
+water
+rain
+raindrop
+rain_on_surface
+stream
+waterfall
+ocean
+waves
+steam
+gurgling
+fire
+crackle
+vehicle
+boat
+sailboat
+rowboat
+motorboat
+ship
+motor_vehicle
+car
+honk
+toot
+car_alarm
+power_windows
+skidding
+tire_squeal
+car_passing_by
+race_car
+truck
+air_brake
+air_horn
+reversing_beeps
+ice_cream_truck
+bus
+emergency_vehicle
+police_car
+ambulance
+fire_engine
+motorcycle
+traffic_noise
+rail_transport
+train
+train_whistle
+train_horn
+railroad_car
+train_wheels_squealing
+subway
+aircraft
+aircraft_engine
+jet_engine
+propeller
+helicopter
+fixed-wing_aircraft
+bicycle
+skateboard
+engine
+light_engine
+dental_drill's_drill
+lawn_mower
+chainsaw
+medium_engine
+heavy_engine
+engine_knocking
+engine_starting
+idling
+accelerating
+door
+doorbell
+ding-dong
+sliding_door
+slam
+knock
+tap
+squeak
+cupboard_open_or_close
+drawer_open_or_close
+dishes
+cutlery
+chopping
+frying
+microwave_oven
+blender
+water_tap
+sink
+bathtub
+hair_dryer
+toilet_flush
+toothbrush
+electric_toothbrush
+vacuum_cleaner
+zipper
+keys_jangling
+coin
+scissors
+electric_shaver
+shuffling_cards
+typing
+typewriter
+computer_keyboard
+writing
+alarm
+telephone
+telephone_bell_ringing
+ringtone
+telephone_dialing
+dial_tone
+busy_signal
+alarm_clock
+siren
+civil_defense_siren
+buzzer
+smoke_detector
+fire_alarm
+foghorn
+whistle
+steam_whistle
+mechanisms
+ratchet
+clock
+tick
+tick-tock
+gears
+pulleys
+sewing_machine
+mechanical_fan
+air_conditioning
+cash_register
+printer
+camera
+single-lens_reflex_camera
+tools
+hammer
+jackhammer
+sawing
+filing
+sanding
+power_tool
+drill
+explosion
+gunshot
+machine_gun
+fusillade
+artillery_fire
+cap_gun
+fireworks
+firecracker
+burst
+eruption
+boom
+wood
+chop
+splinter
+crack
+glass
+chink
+shatter
+liquid
+splash
+slosh
+squish
+drip
+pour
+trickle
+gush
+fill
+spray
+pump
+stir
+boiling
+sonar
+arrow
+whoosh
+thump
+thunk
+electronic_tuner
+effects_unit
+chorus_effect
+basketball_bounce
+bang
+slap
+whack
+smash
+breaking
+bouncing
+whip
+flap
+scratch
+scrape
+rub
+roll
+crushing
+crumpling
+tearing
+beep
+ping
+ding
+clang
+squeal
+creak
+rustle
+whir
+clatter
+sizzle
+clicking
+clickety-clack
+rumble
+plop
+jingle
+hum
+zing
+boing
+crunch
+silence
+sine_wave
+harmonic
+chirp_tone
+sound_effect
+pulse
+inside
+inside
+inside
+outside
+outside
+reverberation
+echo
+noise
+environmental_noise
+static
+mains_hum
+distortion
+sidetone
+cacophony
+white_noise
+pink_noise
+throbbing
+vibration
+television
+radio
+field_recording
--- a/docs/docs/configuration/audio_detectors.md
+++ b/docs/docs/configuration/audio_detectors.md
@ -0,0 +1,63 @@
+---
+id: audio_detectors
+title: Audio Detectors
+---
+
+Frigate provides a builtin audio detector which runs on the CPU. Compared to object detection in images, audio detection is a relatively lightweight operation so the only option is to run the detection on a CPU.
+
+## Configuration
+
+Audio events work by detecting a type of audio and creating an event, the event will end once the type of audio has not been heard for the configured amount of time. Audio events save a snapshot at the beginning of the event as well as recordings throughout the event. The recordings are retained using the configured recording retention.
+
+### Enabling Audio Events
+
+Audio events can be enabled for all cameras or only for specific cameras.
+
+```yaml
+
+audio: # <- enable audio events for all camera
+  enabled: True
+
+cameras:
+  front_camera:
+    ffmpeg:
+    ...
+    audio:
+      enabled: True # <- enable audio events for the front_camera
+```
+
+If you are using multiple streams then you must set the `audio` role on the stream that is going to be used for audio detection, this can be any stream but the stream must have audio included.
+
+:::note
+
+The ffmpeg process for capturing audio will be a separate connection to the camera along with the other roles assigned to the camera, for this reason it is recommended that the go2rtc restream is used for this purpose. See [the restream docs](/configuration/restream.md) for more information.
+
+:::
+
+```yaml
+cameras:
+  front_camera:
+    ffmpeg:
+      inputs:
+        - path: rtsp://.../main_stream
+          roles:
+            - record
+        - path: rtsp://.../sub_stream # <- this stream must have audio enabled
+          roles:
+            - audio
+            - detect
+```
+
+### Configuring Audio Events
+
+The included audio model has over 500 different types of audio that can be detected, many of which are not practical. By default `bark`, `speech`, `yell`, and `scream` are enabled but these can be customized.
+
+```yaml
+audio:
+  enabled: True
+  listen:
+    - bark
+    - scream
+    - speech
+    - yell
+```
--- a/docs/docs/configuration/index.md
+++ b/docs/docs/configuration/index.md
@ -138,6 +138,20 @@ model:
  labelmap:
    2: vehicle

+# Optional: Audio Events Configuration
+# NOTE: Can be overridden at the camera level
+audio:
+  # Optional: Enable audio events (default: shown below)
+  enabled: False
+  # Optional: Configure the amount of seconds without detected audio to end the event (default: shown below)
+  max_not_heard: 30
+  # Optional: Types of audio to listen for (default: shown below)
+  listen:
+    - bark
+    - scream
+    - speech
+    - yell
+
 # Optional: logger verbosity settings
 logger:
  # Optional: Default log verbosity (default: shown below)
--- a/docs/docs/configuration/object_detectors.md
+++ b/docs/docs/configuration/object_detectors.md
@ -1,6 +1,6 @@
 ---
-id: detectors
-title: Detectors
+id: object_detectors
+title: Object Detectors
 ---

 Frigate provides the following builtin detector types: `cpu`, `edgetpu`, `openvino`, and `tensorrt`. By default, Frigate will use a single CPU detector. Other detectors may require additional configuration as described below. When using multiple detectors they will run in dedicated processes, but pull from a common queue of detection requests from across all cameras.
@ -275,6 +275,6 @@ detectors:
    api_timeout: 0.1 # seconds
 ```

-Replace `<your_codeproject_ai_server_ip>` and `<port>` with the IP address and port of your CodeProject.AI server. 
+Replace `<your_codeproject_ai_server_ip>` and `<port>` with the IP address and port of your CodeProject.AI server.

 To verify that the integration is working correctly, start Frigate and observe the logs for any error messages related to CodeProject.AI. Additionally, you can check the Frigate web interface to see if the objects detected by CodeProject.AI are being displayed and tracked properly.
--- a/docs/docs/configuration/restream.md
+++ b/docs/docs/configuration/restream.md
@ -67,6 +67,7 @@ cameras:
          roles:
            - record
            - detect
+            - audio # <- only necessary if audio detection is enabled
  http_cam:
    ffmpeg:
      output_args:
@ -77,6 +78,7 @@ cameras:
          roles:
            - record
            - detect
+            - audio # <- only necessary if audio detection is enabled
 ```

 ### With Sub Stream
@ -112,6 +114,7 @@ cameras:
        - path: rtsp://127.0.0.1:8554/rtsp_cam_sub # <--- the name here must match the name of the camera_sub in restream
          input_args: preset-rtsp-restream
          roles:
+            - audio # <- only necessary if audio detection is enabled
            - detect
  http_cam:
    ffmpeg:
@ -125,6 +128,7 @@ cameras:
        - path: rtsp://127.0.0.1:8554/http_cam_sub # <--- the name here must match the name of the camera_sub in restream
          input_args: preset-rtsp-restream
          roles:
+            - audio # <- only necessary if audio detection is enabled
            - detect
 ```

--- a/docs/docs/frigate/hardware.md
+++ b/docs/docs/frigate/hardware.md
@ -50,7 +50,7 @@ The OpenVINO detector type is able to run on:
 - 6th Gen Intel Platforms and newer that have an iGPU
 - x86 & Arm64 hosts with VPU Hardware (ex: Intel NCS2)

-More information is available [in the detector docs](/configuration/detectors#openvino-detector)
+More information is available [in the detector docs](/configuration/object_detectors#openvino-detector)

 Inference speeds vary greatly depending on the CPU, GPU, or VPU used, some known examples are below:

@ -72,7 +72,7 @@ Inference speeds vary greatly depending on the CPU, GPU, or VPU used, some known

 ### TensorRT

-The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 11.x series of CUDA libraries. The minimum driver version on the host system must be `>=450.80.02`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/detectors#nvidia-tensorrt-detector).
+The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 11.x series of CUDA libraries. The minimum driver version on the host system must be `>=450.80.02`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/object_detectors#nvidia-tensorrt-detector).

 Inference speeds will vary greatly depending on the GPU and the model used.
 `tiny` variants are faster than the equivalent non-tiny model, some known examples are below:
--- a/docs/docs/guides/getting_started.md
+++ b/docs/docs/guides/getting_started.md
@ -71,7 +71,7 @@ cameras:
      ...
 ```

-More details on available detectors can be found [here](../configuration/detectors.md).
+More details on available detectors can be found [here](../configuration/object_detectors.md).

 Restart Frigate and you should start seeing detections for `person`. If you want to track other objects, they will need to be added according to the [configuration file reference](../configuration/index.md#full-configuration-reference).

--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@ -16,7 +16,8 @@ module.exports = {
    ],
    Configuration: [
      "configuration/index",
-      "configuration/detectors",
+      "configuration/object_detectors",
+      "configuration/audio_detectors",
      "configuration/cameras",
      "configuration/masks",
      "configuration/record",
--- a/frigate/app.py
+++ b/frigate/app.py
@ -29,6 +29,7 @@ from frigate.const import (
    MODEL_CACHE_DIR,
    RECORD_DIR,
 )
+from frigate.events.audio import listen_to_audio
 from frigate.events.cleanup import EventCleanup
 from frigate.events.external import ExternalEventProcessor
 from frigate.events.maintainer import EventProcessor
@ -44,7 +45,7 @@ from frigate.record.record import manage_recordings
 from frigate.stats import StatsEmitter, stats_init
 from frigate.storage import StorageMaintainer
 from frigate.timeline import TimelineProcessor
-from frigate.types import CameraMetricsTypes, RecordMetricsTypes
+from frigate.types import CameraMetricsTypes, FeatureMetricsTypes
 from frigate.version import VERSION
 from frigate.video import capture_camera, track_camera
 from frigate.watchdog import FrigateWatchdog
@ -62,7 +63,7 @@ class FrigateApp:
        self.log_queue: Queue = mp.Queue()
        self.plus_api = PlusApi()
        self.camera_metrics: dict[str, CameraMetricsTypes] = {}
-        self.record_metrics: dict[str, RecordMetricsTypes] = {}
+        self.feature_metrics: dict[str, FeatureMetricsTypes] = {}
        self.processes: dict[str, int] = {}

    def set_environment_vars(self) -> None:
@ -104,7 +105,7 @@ class FrigateApp:
        user_config = FrigateConfig.parse_file(config_file)
        self.config = user_config.runtime_config(self.plus_api)

-        for camera_name in self.config.cameras.keys():
+        for camera_name, camera_config in self.config.cameras.items():
            # create camera_metrics
            self.camera_metrics[camera_name] = {
                "camera_fps": mp.Value("d", 0.0),  # type: ignore[typeddict-item]
@ -159,13 +160,19 @@ class FrigateApp:
                "capture_process": None,
                "process": None,
            }
-            self.record_metrics[camera_name] = {
+            self.feature_metrics[camera_name] = {
+                "audio_enabled": mp.Value(  # type: ignore[typeddict-item]
+                    # issue https://github.com/python/typeshed/issues/8799
+                    # from mypy 0.981 onwards
+                    "i",
+                    self.config.cameras[camera_name].audio.enabled,
+                ),
                "record_enabled": mp.Value(  # type: ignore[typeddict-item]
                    # issue https://github.com/python/typeshed/issues/8799
                    # from mypy 0.981 onwards
                    "i",
                    self.config.cameras[camera_name].record.enabled,
-                )
+                ),
            }

    def set_log_levels(self) -> None:
@ -253,7 +260,7 @@ class FrigateApp:
        recording_process = mp.Process(
            target=manage_recordings,
            name="recording_manager",
-            args=(self.config, self.recordings_info_queue, self.record_metrics),
+            args=(self.config, self.recordings_info_queue, self.feature_metrics),
        )
        recording_process.daemon = True
        self.recording_process = recording_process
@ -312,7 +319,7 @@ class FrigateApp:
            self.config,
            self.onvif_controller,
            self.camera_metrics,
-            self.record_metrics,
+            self.feature_metrics,
            comms,
        )

@ -421,6 +428,17 @@ class FrigateApp:
            capture_process.start()
            logger.info(f"Capture process started for {name}: {capture_process.pid}")

+    def start_audio_processors(self) -> None:
+        if len([c for c in self.config.cameras.values() if c.audio.enabled]) > 0:
+            audio_process = mp.Process(
+                target=listen_to_audio,
+                name="audio_capture",
+                args=(self.config, self.feature_metrics),
+            )
+            audio_process.daemon = True
+            audio_process.start()
+            logger.info(f"Audio process started: {audio_process.pid}")
+
    def start_timeline_processor(self) -> None:
        self.timeline_processor = TimelineProcessor(
            self.config, self.timeline_queue, self.stop_event
@ -517,6 +535,7 @@ class FrigateApp:
        self.start_detected_frames_processor()
        self.start_camera_processors()
        self.start_camera_capture_processes()
+        self.start_audio_processors()
        self.start_storage_maintainer()
        self.init_stats()
        self.init_external_event_processor()
--- a/frigate/comms/dispatcher.py
+++ b/frigate/comms/dispatcher.py
@ -6,7 +6,7 @@ from typing import Any, Callable

 from frigate.config import FrigateConfig
 from frigate.ptz import OnvifCommandEnum, OnvifController
-from frigate.types import CameraMetricsTypes, RecordMetricsTypes
+from frigate.types import CameraMetricsTypes, FeatureMetricsTypes
 from frigate.util import restart_frigate

 logger = logging.getLogger(__name__)
@ -39,19 +39,20 @@ class Dispatcher:
        config: FrigateConfig,
        onvif: OnvifController,
        camera_metrics: dict[str, CameraMetricsTypes],
-        record_metrics: dict[str, RecordMetricsTypes],
+        feature_metrics: dict[str, FeatureMetricsTypes],
        communicators: list[Communicator],
    ) -> None:
        self.config = config
        self.onvif = onvif
        self.camera_metrics = camera_metrics
-        self.record_metrics = record_metrics
+        self.feature_metrics = feature_metrics
        self.comms = communicators

        for comm in self.comms:
            comm.subscribe(self._receive)

        self._camera_settings_handlers: dict[str, Callable] = {
+            "audio": self._on_audio_command,
            "detect": self._on_detect_command,
            "improve_contrast": self._on_motion_improve_contrast_command,
            "motion": self._on_motion_command,
@ -186,6 +187,29 @@ class Dispatcher:
        motion_settings.threshold = payload  # type: ignore[union-attr]
        self.publish(f"{camera_name}/motion_threshold/state", payload, retain=True)

+    def _on_audio_command(self, camera_name: str, payload: str) -> None:
+        """Callback for audio topic."""
+        audio_settings = self.config.cameras[camera_name].audio
+
+        if payload == "ON":
+            if not self.config.cameras[camera_name].audio.enabled_in_config:
+                logger.error(
+                    "Audio detection must be enabled in the config to be turned on via MQTT."
+                )
+                return
+
+            if not audio_settings.enabled:
+                logger.info(f"Turning on audio detection for {camera_name}")
+                audio_settings.enabled = True
+                self.feature_metrics[camera_name]["audio_enabled"].value = True
+        elif payload == "OFF":
+            if self.feature_metrics[camera_name]["audio_enabled"].value:
+                logger.info(f"Turning off audio detection for {camera_name}")
+                audio_settings.enabled = False
+                self.feature_metrics[camera_name]["audio_enabled"].value = False
+
+        self.publish(f"{camera_name}/audio/state", payload, retain=True)
+
    def _on_recordings_command(self, camera_name: str, payload: str) -> None:
        """Callback for recordings topic."""
        record_settings = self.config.cameras[camera_name].record
@ -200,12 +224,12 @@ class Dispatcher:
            if not record_settings.enabled:
                logger.info(f"Turning on recordings for {camera_name}")
                record_settings.enabled = True
-                self.record_metrics[camera_name]["record_enabled"].value = True
+                self.feature_metrics[camera_name]["record_enabled"].value = True
        elif payload == "OFF":
-            if self.record_metrics[camera_name]["record_enabled"].value:
+            if self.feature_metrics[camera_name]["record_enabled"].value:
                logger.info(f"Turning off recordings for {camera_name}")
                record_settings.enabled = False
-                self.record_metrics[camera_name]["record_enabled"].value = False
+                self.feature_metrics[camera_name]["record_enabled"].value = False

        self.publish(f"{camera_name}/recordings/state", payload, retain=True)

--- a/frigate/comms/mqtt.py
+++ b/frigate/comms/mqtt.py
@ -41,7 +41,7 @@ class MqttClient(Communicator):  # type: ignore[misc]
        for camera_name, camera in self.config.cameras.items():
            self.publish(
                f"{camera_name}/recordings/state",
-                "ON" if camera.record.enabled else "OFF",
+                "ON" if camera.record.enabled_in_config else "OFF",
                retain=True,
            )
            self.publish(
@ -49,6 +49,11 @@ class MqttClient(Communicator):  # type: ignore[misc]
                "ON" if camera.snapshots.enabled else "OFF",
                retain=True,
            )
+            self.publish(
+                f"{camera_name}/audio/state",
+                "ON" if camera.audio.enabled_in_config else "OFF",
+                retain=True,
+            )
            self.publish(
                f"{camera_name}/detect/state",
                "ON" if camera.detect.enabled else "OFF",
--- a/frigate/config.py
+++ b/frigate/config.py
@ -40,6 +40,7 @@ DEFAULT_TIME_FORMAT = "%m/%d/%Y %H:%M:%S"
 FRIGATE_ENV_VARS = {k: v for k, v in os.environ.items() if k.startswith("FRIGATE_")}

 DEFAULT_TRACKED_OBJECTS = ["person"]
+DEFAULT_LISTEN_AUDIO = ["bark", "speech", "yell", "scream"]
 DEFAULT_DETECTORS = {"cpu": {"type": "cpu"}}


@ -387,6 +388,19 @@ class ObjectConfig(FrigateBaseModel):
    mask: Union[str, List[str]] = Field(default="", title="Object mask.")


+class AudioConfig(FrigateBaseModel):
+    enabled: bool = Field(default=False, title="Enable audio events.")
+    max_not_heard: int = Field(
+        default=30, title="Seconds of not hearing the type of audio to end the event."
+    )
+    listen: List[str] = Field(
+        default=DEFAULT_LISTEN_AUDIO, title="Audio to listen for."
+    )
+    enabled_in_config: Optional[bool] = Field(
+        title="Keep track of original state of audio detection."
+    )
+
+
 class BirdseyeModeEnum(str, Enum):
    objects = "objects"
    motion = "motion"
@ -470,6 +484,7 @@ class FfmpegConfig(FrigateBaseModel):


 class CameraRoleEnum(str, Enum):
+    audio = "audio"
    record = "record"
    rtmp = "rtmp"
    detect = "detect"
@ -631,6 +646,9 @@ class CameraConfig(FrigateBaseModel):
    objects: ObjectConfig = Field(
        default_factory=ObjectConfig, title="Object configuration."
    )
+    audio: AudioConfig = Field(
+        default_factory=AudioConfig, title="Audio events configuration."
+    )
    motion: Optional[MotionConfig] = Field(title="Motion detection configuration.")
    detect: DetectConfig = Field(
        default_factory=DetectConfig, title="Object detection configuration."
@ -661,12 +679,16 @@ class CameraConfig(FrigateBaseModel):
        # add roles to the input if there is only one
        if len(config["ffmpeg"]["inputs"]) == 1:
            has_rtmp = "rtmp" in config["ffmpeg"]["inputs"][0].get("roles", [])
+            has_audio = "audio" in config["ffmpeg"]["inputs"][0].get("roles", [])

            config["ffmpeg"]["inputs"][0]["roles"] = [
                "record",
                "detect",
            ]

+            if has_audio:
+                config["ffmpeg"]["inputs"][0]["roles"].append("audio")
+
            if has_rtmp:
                config["ffmpeg"]["inputs"][0]["roles"].append("rtmp")

@ -799,6 +821,11 @@ def verify_config_roles(camera_config: CameraConfig) -> None:
            f"Camera {camera_config.name} has rtmp enabled, but rtmp is not assigned to an input."
        )

+    if camera_config.audio.enabled and "audio" not in assigned_roles:
+        raise ValueError(
+            f"Camera {camera_config.name} has audio events enabled, but audio is not assigned to an input."
+        )
+

 def verify_valid_live_stream_name(
    frigate_config: FrigateConfig, camera_config: CameraConfig
@ -911,6 +938,9 @@ class FrigateConfig(FrigateBaseModel):
    objects: ObjectConfig = Field(
        default_factory=ObjectConfig, title="Global object configuration."
    )
+    audio: AudioConfig = Field(
+        default_factory=AudioConfig, title="Global Audio events configuration."
+    )
    motion: Optional[MotionConfig] = Field(
        title="Global motion detection configuration."
    )
@ -935,6 +965,7 @@ class FrigateConfig(FrigateBaseModel):
        # Global config to propagate down to camera level
        global_config = config.dict(
            include={
+                "audio": ...,
                "birdseye": ...,
                "record": ...,
                "snapshots": ...,
@ -980,8 +1011,9 @@ class FrigateConfig(FrigateBaseModel):
                camera_config.onvif.password = camera_config.onvif.password.format(
                    **FRIGATE_ENV_VARS
                )
-            # set config recording value
+            # set config pre-value
            camera_config.record.enabled_in_config = camera_config.record.enabled
+            camera_config.audio.enabled_in_config = camera_config.audio.enabled

            # Add default filters
            object_keys = camera_config.objects.track
--- a/frigate/const.py
+++ b/frigate/const.py
@ -8,6 +8,7 @@ EXPORT_DIR = f"{BASE_DIR}/exports"
 BIRDSEYE_PIPE = "/tmp/cache/birdseye"
 CACHE_DIR = "/tmp/cache"
 YAML_EXT = (".yaml", ".yml")
+FRIGATE_LOCALHOST = "http://127.0.0.1:5000"
 PLUS_ENV_VAR = "PLUS_API_KEY"
 PLUS_API_HOST = "https://api.frigate.video"
 BTBN_PATH = "/usr/lib/btbn-ffmpeg"
@ -22,6 +23,13 @@ ALL_ATTRIBUTE_LABELS = [
    item for sublist in ATTRIBUTE_LABEL_MAP.values() for item in sublist
 ]

+# Audio Consts
+
+AUDIO_DURATION = 0.975
+AUDIO_FORMAT = "s16le"
+AUDIO_MAX_BIT_RANGE = 32768.0
+AUDIO_SAMPLE_RATE = 16000
+
 # Regex Consts

 REGEX_CAMERA_NAME = r"^[a-zA-Z0-9_-]+$"
--- a/frigate/events/audio.py
+++ b/frigate/events/audio.py
@ -0,0 +1,247 @@
+"""Handle creating audio events."""
+
+import datetime
+import logging
+import multiprocessing as mp
+import os
+import signal
+import threading
+from types import FrameType
+from typing import Optional
+
+import numpy as np
+import requests
+from setproctitle import setproctitle
+
+from frigate.config import CameraConfig, FrigateConfig
+from frigate.const import (
+    AUDIO_DURATION,
+    AUDIO_FORMAT,
+    AUDIO_MAX_BIT_RANGE,
+    AUDIO_SAMPLE_RATE,
+    CACHE_DIR,
+    FRIGATE_LOCALHOST,
+)
+from frigate.ffmpeg_presets import parse_preset_input
+from frigate.log import LogPipe
+from frigate.object_detection import load_labels
+from frigate.types import FeatureMetricsTypes
+from frigate.util import get_ffmpeg_arg_list, listen
+from frigate.video import start_or_restart_ffmpeg, stop_ffmpeg
+
+try:
+    from tflite_runtime.interpreter import Interpreter
+except ModuleNotFoundError:
+    from tensorflow.lite.python.interpreter import Interpreter
+
+logger = logging.getLogger(__name__)
+
+
+def get_ffmpeg_command(input_args: list[str], input_path: str, pipe: str) -> list[str]:
+    return get_ffmpeg_arg_list(
+        f"ffmpeg {{}} -i {{}} -f {AUDIO_FORMAT} -ar {AUDIO_SAMPLE_RATE} -ac 1 -y {{}}".format(
+            " ".join(input_args),
+            input_path,
+            pipe,
+        )
+    )
+
+
+def listen_to_audio(
+    config: FrigateConfig,
+    process_info: dict[str, FeatureMetricsTypes],
+) -> None:
+    stop_event = mp.Event()
+    audio_threads: list[threading.Thread] = []
+
+    def exit_process() -> None:
+        for thread in audio_threads:
+            thread.join()
+
+        logger.info("Exiting audio detector...")
+
+    def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None:
+        stop_event.set()
+        exit_process()
+
+    signal.signal(signal.SIGTERM, receiveSignal)
+    signal.signal(signal.SIGINT, receiveSignal)
+
+    threading.current_thread().name = "process:audio_manager"
+    setproctitle("frigate.audio_manager")
+    listen()
+
+    for camera in config.cameras.values():
+        if camera.enabled and camera.audio.enabled_in_config:
+            audio = AudioEventMaintainer(camera, process_info, stop_event)
+            audio_threads.append(audio)
+            audio.start()
+
+
+class AudioTfl:
+    def __init__(self, stop_event: mp.Event):
+        self.stop_event = stop_event
+        self.labels = load_labels("/audio-labelmap.txt")
+        self.interpreter = Interpreter(
+            model_path="/cpu_audio_model.tflite",
+            num_threads=2,
+        )
+
+        self.interpreter.allocate_tensors()
+
+        self.tensor_input_details = self.interpreter.get_input_details()
+        self.tensor_output_details = self.interpreter.get_output_details()
+
+    def _detect_raw(self, tensor_input):
+        self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
+        self.interpreter.invoke()
+        detections = np.zeros((20, 6), np.float32)
+
+        res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
+        non_zero_indices = res > 0
+        class_ids = np.argpartition(-res, 20)[:20]
+        class_ids = class_ids[np.argsort(-res[class_ids])]
+        class_ids = class_ids[non_zero_indices[class_ids]]
+        scores = res[class_ids]
+        boxes = np.full((scores.shape[0], 4), -1, np.float32)
+        count = len(scores)
+
+        for i in range(count):
+            if scores[i] < 0.4 or i == 20:
+                break
+            detections[i] = [
+                class_ids[i],
+                float(scores[i]),
+                boxes[i][0],
+                boxes[i][1],
+                boxes[i][2],
+                boxes[i][3],
+            ]
+
+        return detections
+
+    def detect(self, tensor_input, threshold=0.8):
+        detections = []
+
+        if self.stop_event.is_set():
+            return detections
+
+        raw_detections = self._detect_raw(tensor_input)
+
+        for d in raw_detections:
+            if d[1] < threshold:
+                break
+            detections.append(
+                (self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5]))
+            )
+        return detections
+
+
+class AudioEventMaintainer(threading.Thread):
+    def __init__(
+        self,
+        camera: CameraConfig,
+        feature_metrics: dict[str, FeatureMetricsTypes],
+        stop_event: mp.Event,
+    ) -> None:
+        threading.Thread.__init__(self)
+        self.name = f"{camera.name}_audio_event_processor"
+        self.config = camera
+        self.feature_metrics = feature_metrics
+        self.detections: dict[dict[str, any]] = feature_metrics
+        self.stop_event = stop_event
+        self.detector = AudioTfl(stop_event)
+        self.shape = (int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE)),)
+        self.chunk_size = int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE * 2))
+        self.pipe = f"{CACHE_DIR}/{self.config.name}-audio"
+        self.ffmpeg_cmd = get_ffmpeg_command(
+            get_ffmpeg_arg_list(self.config.ffmpeg.global_args)
+            + parse_preset_input("preset-rtsp-audio-only", 1),
+            [i.path for i in self.config.ffmpeg.inputs if "audio" in i.roles][0],
+            self.pipe,
+        )
+        self.pipe_file = None
+        self.logpipe = LogPipe(f"ffmpeg.{self.config.name}.audio")
+        self.audio_listener = None
+
+    def detect_audio(self, audio) -> None:
+        if not self.feature_metrics[self.config.name]["audio_enabled"].value:
+            return
+
+        waveform = (audio / AUDIO_MAX_BIT_RANGE).astype(np.float32)
+        model_detections = self.detector.detect(waveform)
+
+        for label, score, _ in model_detections:
+            if label not in self.config.audio.listen:
+                continue
+
+            self.handle_detection(label, score)
+
+        self.expire_detections()
+
+    def handle_detection(self, label: str, score: float) -> None:
+        if self.detections.get(label):
+            self.detections[label][
+                "last_detection"
+            ] = datetime.datetime.now().timestamp()
+        else:
+            resp = requests.post(
+                f"{FRIGATE_LOCALHOST}/api/events/{self.config.name}/{label}/create",
+                json={"duration": None},
+            )
+
+            if resp.status_code == 200:
+                event_id = resp.json()[0]["event_id"]
+                self.detections[label] = {
+                    "id": event_id,
+                    "label": label,
+                    "last_detection": datetime.datetime.now().timestamp(),
+                }
+
+    def expire_detections(self) -> None:
+        now = datetime.datetime.now().timestamp()
+
+        for detection in self.detections.values():
+            if (
+                now - detection.get("last_detection", now)
+                > self.config.audio.max_not_heard
+            ):
+                self.detections[detection["label"]] = None
+                requests.put(
+                    f"{FRIGATE_LOCALHOST}/api/events/{detection['id']}/end",
+                    json={
+                        "end_time": detection["last_detection"]
+                        + self.config.record.events.post_capture
+                    },
+                )
+
+    def restart_audio_pipe(self) -> None:
+        try:
+            os.mkfifo(self.pipe)
+        except FileExistsError:
+            pass
+
+        self.audio_listener = start_or_restart_ffmpeg(
+            self.ffmpeg_cmd, logger, self.logpipe, None, self.audio_listener
+        )
+
+    def read_audio(self) -> None:
+        if self.pipe_file is None:
+            self.pipe_file = open(self.pipe, "rb")
+
+        try:
+            audio = np.frombuffer(self.pipe_file.read(self.chunk_size), dtype=np.int16)
+            self.detect_audio(audio)
+        except BrokenPipeError:
+            self.logpipe.dump()
+            self.restart_audio_pipe()
+
+    def run(self) -> None:
+        self.restart_audio_pipe()
+
+        while not self.stop_event.is_set():
+            self.read_audio()
+
+        self.pipe_file.close()
+        stop_ffmpeg(self.audio_listener, logger)
+        self.logpipe.close()
--- a/frigate/events/external.py
+++ b/frigate/events/external.py
@ -67,11 +67,10 @@ class ExternalEventProcessor:

        return event_id

-    def finish_manual_event(self, event_id: str) -> None:
+    def finish_manual_event(self, event_id: str, end_time: float) -> None:
        """Finish external event with indeterminate duration."""
-        now = datetime.datetime.now().timestamp()
        self.queue.put(
-            (EventTypeEnum.api, "end", None, {"id": event_id, "end_time": now})
+            (EventTypeEnum.api, "end", None, {"id": event_id, "end_time": end_time})
        )

    def _write_images(
--- a/frigate/events/maintainer.py
+++ b/frigate/events/maintainer.py
@ -18,7 +18,6 @@ logger = logging.getLogger(__name__)

 class EventTypeEnum(str, Enum):
    api = "api"
-    # audio = "audio"
    tracked_object = "tracked_object"


@ -73,19 +72,21 @@ class EventProcessor(threading.Thread):
            except queue.Empty:
                continue

-            logger.debug(f"Event received: {event_type} {camera} {event_data['id']}")
-
-            self.timeline_queue.put(
-                (
-                    camera,
-                    source_type,
-                    event_type,
-                    self.events_in_process.get(event_data["id"]),
-                    event_data,
-                )
+            logger.debug(
+                f"Event received: {source_type} {event_type} {camera} {event_data['id']}"
            )

            if source_type == EventTypeEnum.tracked_object:
+                self.timeline_queue.put(
+                    (
+                        camera,
+                        source_type,
+                        event_type,
+                        self.events_in_process.get(event_data["id"]),
+                        event_data,
+                    )
+                )
+
                if event_type == "start":
                    self.events_in_process[event_data["id"]] = event_data
                    continue
@ -215,7 +216,7 @@ class EventProcessor(threading.Thread):
            del self.events_in_process[event_data["id"]]
            self.event_processed_queue.put((event_data["id"], camera))

-    def handle_external_detection(self, type: str, event_data: Event):
+    def handle_external_detection(self, type: str, event_data: Event) -> None:
        if type == "new":
            event = {
                Event.id: event_data["id"],
@ -230,20 +231,14 @@ class EventProcessor(threading.Thread):
                Event.zones: [],
                Event.data: {},
            }
+            Event.insert(event).execute()
        elif type == "end":
            event = {
                Event.id: event_data["id"],
                Event.end_time: event_data["end_time"],
            }

-        try:
-            (
-                Event.insert(event)
-                .on_conflict(
-                    conflict_target=[Event.id],
-                    update=event,
-                )
-                .execute()
-            )
-        except Exception:
-            logger.warning(f"Failed to update manual event: {event_data['id']}")
+            try:
+                Event.update(event).execute()
+            except Exception:
+                logger.warning(f"Failed to update manual event: {event_data['id']}")
--- a/frigate/ffmpeg_presets.py
+++ b/frigate/ffmpeg_presets.py
@ -282,6 +282,13 @@ PRESETS_INPUT = {
        "-use_wallclock_as_timestamps",
        "1",
    ],
+    "preset-rtsp-audio-only": [
+        "-rtsp_transport",
+        "tcp",
+        TIMEOUT_PARAM,
+        "5000000",
+        "-vn",
+    ],
    "preset-rtsp-restream": _user_agent_args
    + [
        "-rtsp_transport",
--- a/frigate/http.py
+++ b/frigate/http.py
@ -908,8 +908,11 @@ def create_event(camera_name, label):

@bp.route("/events/<event_id>/end", methods=["PUT"])
 def end_event(event_id):
+    json: dict[str, any] = request.get_json(silent=True) or {}
+
    try:
-        current_app.external_processor.finish_manual_event(event_id)
+        end_time = json.get("end_time", datetime.now().timestamp())
+        current_app.external_processor.finish_manual_event(event_id, end_time)
    except Exception:
        return jsonify(
            {"success": False, "message": f"{event_id} must be set and valid."}, 404
--- a/frigate/output.py
+++ b/frigate/output.py
@ -156,7 +156,12 @@ class BroadcastThread(threading.Thread):


 class BirdsEyeFrameManager:
-    def __init__(self, config: FrigateConfig, frame_manager: SharedMemoryFrameManager):
+    def __init__(
+        self,
+        config: FrigateConfig,
+        frame_manager: SharedMemoryFrameManager,
+        stop_event: mp.Event,
+    ):
        self.config = config
        self.mode = config.birdseye.mode
        self.frame_manager = frame_manager
@ -165,6 +170,7 @@ class BirdsEyeFrameManager:
        self.frame_shape = (height, width)
        self.yuv_shape = (height * 3 // 2, width)
        self.frame = np.ndarray(self.yuv_shape, dtype=np.uint8)
+        self.stop_event = stop_event

        # initialize the frame as black and with the Frigate logo
        self.blank_frame = np.zeros(self.yuv_shape, np.uint8)
@ -458,6 +464,9 @@ class BirdsEyeFrameManager:

                # decrease scaling coefficient until height of all cameras can fit into the birdseye canvas
                while calculating:
+                    if self.stop_event.is_set():
+                        return
+
                    layout_candidate = calculate_layout(
                        (canvas_width, canvas_height),
                        active_cameras_to_add,
@ -580,7 +589,7 @@ def output_frames(config: FrigateConfig, video_output_queue):
    for t in broadcasters.values():
        t.start()

-    birdseye_manager = BirdsEyeFrameManager(config, frame_manager)
+    birdseye_manager = BirdsEyeFrameManager(config, frame_manager, stop_event)

    if config.birdseye.restream:
        birdseye_buffer = frame_manager.create(
--- a/frigate/record/maintainer.py
+++ b/frigate/record/maintainer.py
@ -20,7 +20,7 @@ import psutil
 from frigate.config import FrigateConfig, RetainModeEnum
 from frigate.const import CACHE_DIR, MAX_SEGMENT_DURATION, RECORD_DIR
 from frigate.models import Event, Recordings
-from frigate.types import RecordMetricsTypes
+from frigate.types import FeatureMetricsTypes
 from frigate.util import area, get_video_properties

 logger = logging.getLogger(__name__)
@ -31,7 +31,7 @@ class RecordingMaintainer(threading.Thread):
        self,
        config: FrigateConfig,
        recordings_info_queue: mp.Queue,
-        process_info: dict[str, RecordMetricsTypes],
+        process_info: dict[str, FeatureMetricsTypes],
        stop_event: MpEvent,
    ):
        threading.Thread.__init__(self)
--- a/frigate/record/record.py
+++ b/frigate/record/record.py
@ -14,7 +14,7 @@ from frigate.config import FrigateConfig
 from frigate.models import Event, Recordings, RecordingsToDelete, Timeline
 from frigate.record.cleanup import RecordingCleanup
 from frigate.record.maintainer import RecordingMaintainer
-from frigate.types import RecordMetricsTypes
+from frigate.types import FeatureMetricsTypes
 from frigate.util import listen

 logger = logging.getLogger(__name__)
@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
 def manage_recordings(
    config: FrigateConfig,
    recordings_info_queue: mp.Queue,
-    process_info: dict[str, RecordMetricsTypes],
+    process_info: dict[str, FeatureMetricsTypes],
 ) -> None:
    stop_event = mp.Event()

--- a/frigate/types.py
+++ b/frigate/types.py
@ -25,7 +25,8 @@ class CameraMetricsTypes(TypedDict):
    skipped_fps: Synchronized


-class RecordMetricsTypes(TypedDict):
+class FeatureMetricsTypes(TypedDict):
+    audio_enabled: Synchronized
    record_enabled: Synchronized


--- a/web/test/handlers.js
+++ b/web/test/handlers.js
@ -16,6 +16,7 @@ export const handlers = [
          front: {
            name: 'front',
            objects: { track: ['taco', 'cat', 'dog'] },
+            audio: { enabled: false, enabled_in_config: false },
            record: { enabled: true, enabled_in_config: true },
            detect: { width: 1280, height: 720 },
            snapshots: {},
@ -25,6 +26,7 @@ export const handlers = [
          side: {
            name: 'side',
            objects: { track: ['taco', 'cat', 'dog'] },
+            audio: { enabled: false, enabled_in_config: false },
            record: { enabled: false, enabled_in_config: true },
            detect: { width: 1280, height: 720 },
            snapshots: {},
--- a/web/src/api/tests/ws.test.jsx
+++ b/web/src/api/tests/ws.test.jsx
@ -113,8 +113,8 @@ describe('WsProvider', () => {
    vi.spyOn(Date, 'now').mockReturnValue(123456);
    const config = {
      cameras: {
-        front: { name: 'front', detect: { enabled: true }, record: { enabled: false }, snapshots: { enabled: true } },
-        side: { name: 'side', detect: { enabled: false }, record: { enabled: false }, snapshots: { enabled: false } },
+        front: { name: 'front', detect: { enabled: true }, record: { enabled: false }, snapshots: { enabled: true }, audio: { enabled: false } },
+        side: { name: 'side', detect: { enabled: false }, record: { enabled: false }, snapshots: { enabled: false }, audio: { enabled: false } },
      },
    };
    render(
--- a/web/src/api/ws.jsx
+++ b/web/src/api/ws.jsx
@ -41,10 +41,11 @@ export function WsProvider({

  useEffect(() => {
    Object.keys(config.cameras).forEach((camera) => {
-      const { name, record, detect, snapshots } = config.cameras[camera];
+      const { name, record, detect, snapshots, audio } = config.cameras[camera];
      dispatch({ topic: `${name}/recordings/state`, payload: record.enabled ? 'ON' : 'OFF', retain: false });
      dispatch({ topic: `${name}/detect/state`, payload: detect.enabled ? 'ON' : 'OFF', retain: false });
      dispatch({ topic: `${name}/snapshots/state`, payload: snapshots.enabled ? 'ON' : 'OFF', retain: false });
+      dispatch({ topic: `${name}/audio/state`, payload: audio.enabled ? 'ON' : 'OFF', retain: false });
    });
  }, [config]);

@ -120,6 +121,15 @@ export function useSnapshotsState(camera) {
  return { payload, send, connected };
 }

+export function useAudioState(camera) {
+  const {
+    value: { payload },
+    send,
+    connected,
+  } = useWs(`${camera}/audio/state`, `${camera}/audio/set`);
+  return { payload, send, connected };
+}
+
 export function usePtzCommand(camera) {
  const {
    value: { payload },
--- a/web/src/icons/Audio.jsx
+++ b/web/src/icons/Audio.jsx
@ -0,0 +1,36 @@
+import { h } from 'preact';
+import { memo } from 'preact/compat';
+
+export function Snapshot({ className = 'h-6 w-6', stroke = 'currentColor', onClick = () => {} }) {
+  return (
+    <svg
+      xmlns="http://www.w3.org/2000/svg"
+      className={className}
+      fill="none"
+      viewBox="0 0 32 32"
+      stroke={stroke}
+      onClick={onClick}
+    >
+      <path
+        stroke-linecap="round"
+        stroke-linejoin="round"
+        stroke-width="2"
+        d="M18 30v-2a10.011 10.011 0 0010-10h2a12.013 12.013 0 01-12 12z"
+      />
+      <path
+        stroke-linecap="round"
+        stroke-linejoin="round"
+        stroke-width="2"
+        d="M18 22v-2a2.002 2.002 0 002-2h2a4.004 4.004 0 01-4 4zM10 2a9.01 9.01 0 00-9 9h2a7 7 0 0114 0 7.09 7.09 0 01-3.501 6.135l-.499.288v3.073a2.935 2.935 0 01-.9 2.151 4.182 4.182 0 01-4.633 1.03A4.092 4.092 0 015 20H3a6.116 6.116 0 003.67 5.512 5.782 5.782 0 002.314.486 6.585 6.585 0 004.478-1.888A4.94 4.94 0 0015 20.496v-1.942A9.108 9.108 0 0019 11a9.01 9.01 0 00-9-9z"
+      />
+      <path
+        stroke-linecap="round"
+        stroke-linejoin="round"
+        stroke-width="2"
+        d="M9.28 8.082A3.006 3.006 0 0113 11h2a4.979 4.979 0 00-1.884-3.911 5.041 5.041 0 00-4.281-.957 4.95 4.95 0 00-3.703 3.703 5.032 5.032 0 002.304 5.458A3.078 3.078 0 019 17.924V20h2v-2.077a5.06 5.06 0 00-2.537-4.346 3.002 3.002 0 01.817-5.494z"
+      />
+    </svg>
+  );
+}
+
+export default memo(Snapshot);
--- a/web/src/routes/Cameras.jsx
+++ b/web/src/routes/Cameras.jsx
@ -2,10 +2,11 @@ import { h, Fragment } from 'preact';
 import ActivityIndicator from '../components/ActivityIndicator';
 import Card from '../components/Card';
 import CameraImage from '../components/CameraImage';
+import AudioIcon from '../icons/Audio';
 import ClipIcon from '../icons/Clip';
 import MotionIcon from '../icons/Motion';
 import SnapshotIcon from '../icons/Snapshot';
-import { useDetectState, useRecordingsState, useSnapshotsState } from '../api/ws';
+import { useAudioState, useDetectState, useRecordingsState, useSnapshotsState } from '../api/ws';
 import { useMemo } from 'preact/hooks';
 import useSWR from 'swr';

@ -43,6 +44,7 @@ function Camera({ name, config }) {
  const { payload: detectValue, send: sendDetect } = useDetectState(name);
  const { payload: recordValue, send: sendRecordings } = useRecordingsState(name);
  const { payload: snapshotValue, send: sendSnapshots } = useSnapshotsState(name);
+  const { payload: audioValue, send: sendAudio } = useAudioState(name);
  const href = `/cameras/${name}`;
  const buttons = useMemo(() => {
    return [
@ -50,10 +52,9 @@ function Camera({ name, config }) {
      { name: 'Recordings', href: `/recording/${name}` },
    ];
  }, [name]);
-  const cleanName = useMemo(
-    () => { return `${name.replaceAll('_', ' ')}` },
-    [name]
-  );
+  const cleanName = useMemo(() => {
+    return `${name.replaceAll('_', ' ')}`;
+  }, [name]);
  const icons = useMemo(
    () => [
      {
@ -65,7 +66,9 @@ function Camera({ name, config }) {
        },
      },
      {
-        name: config.record.enabled_in_config ? `Toggle recordings ${recordValue === 'ON' ? 'off' : 'on'}` : 'Recordings must be enabled in the config to be turned on in the UI.',
+        name: config.record.enabled_in_config
+          ? `Toggle recordings ${recordValue === 'ON' ? 'off' : 'on'}`
+          : 'Recordings must be enabled in the config to be turned on in the UI.',
        icon: ClipIcon,
        color: config.record.enabled_in_config ? (recordValue === 'ON' ? 'blue' : 'gray') : 'red',
        onClick: () => {
@ -82,11 +85,27 @@ function Camera({ name, config }) {
          sendSnapshots(snapshotValue === 'ON' ? 'OFF' : 'ON', true);
        },
      },
-    ],
-    [config, detectValue, sendDetect, recordValue, sendRecordings, snapshotValue, sendSnapshots]
+      config.audio.enabled_in_config
+        ? {
+          name: `Toggle audio detection ${audioValue === 'ON' ? 'off' : 'on'}`,
+          icon: AudioIcon,
+          color: audioValue === 'ON' ? 'blue' : 'gray',
+          onClick: () => {
+            sendAudio(audioValue === 'ON' ? 'OFF' : 'ON', true);
+          },
+        }
+        : null,
+    ].filter((button) => button != null),
+    [config, audioValue, sendAudio, detectValue, sendDetect, recordValue, sendRecordings, snapshotValue, sendSnapshots]
  );

  return (
-    <Card buttons={buttons} href={href} header={cleanName} icons={icons} media={<CameraImage camera={name} stretch />} />
+    <Card
+      buttons={buttons}
+      href={href}
+      header={cleanName}
+      icons={icons}
+      media={<CameraImage camera={name} stretch />}
+    />
  );
 }