Improve audio detection debugging (#19753)

* create audio activity manager move publishing logic out of audio detector * dispatcher changes * correctly publish full array of audio detections in onConnect * frontend websocket hooks * line graph * debug tab and i18n * docs * clean up * fix i18n key
2025-09-14 17:52:10 +02:00 · 2025-08-25 13:40:21 -05:00 · 2025-08-25 13:40:21 -05:00 · c260642604
commit c260642604
parent 1636fee36a
11 changed files with 437 additions and 92 deletions
--- a/docs/docs/configuration/audio_detectors.md
+++ b/docs/docs/configuration/audio_detectors.md
@ -50,7 +50,7 @@ cameras:

 ### Configuring Minimum Volume

-The audio detector uses volume levels in the same way that motion in a camera feed is used for object detection. This means that frigate will not run audio detection unless the audio volume is above the configured level in order to reduce resource usage. Audio levels can vary widely between camera models so it is important to run tests to see what volume levels are. MQTT explorer can be used on the audio topic to see what volume level is being detected.
+The audio detector uses volume levels in the same way that motion in a camera feed is used for object detection. This means that frigate will not run audio detection unless the audio volume is above the configured level in order to reduce resource usage. Audio levels can vary widely between camera models so it is important to run tests to see what volume levels are. The Debug view in the Frigate UI has an Audio tab for cameras that have the `audio` role assigned where a graph and the current levels are is displayed. The `min_volume` parameter should be set to the minimum the `RMS` level required to run audio detection.

 :::tip

--- a/frigate/camera/activity_manager.py
+++ b/frigate/camera/activity_manager.py
@ -1,10 +1,21 @@
 """Manage camera activity and updating listeners."""

+import datetime
+import json
+import logging
+import random
+import string
 from collections import Counter
 from typing import Any, Callable

+from frigate.comms.event_metadata_updater import (
+    EventMetadataPublisher,
+    EventMetadataTypeEnum,
+)
 from frigate.config import CameraConfig, FrigateConfig

+logger = logging.getLogger(__name__)
+

 class CameraActivityManager:
    def __init__(
@ -139,3 +150,106 @@ class CameraActivityManager:
        if any_changed:
            self.publish(f"{camera}/all", sum(list(all_objects.values())))
            self.publish(f"{camera}/all/active", sum(list(active_objects.values())))
+
+
+class AudioActivityManager:
+    def __init__(
+        self, config: FrigateConfig, publish: Callable[[str, Any], None]
+    ) -> None:
+        self.config = config
+        self.publish = publish
+        self.current_audio_detections: dict[str, dict[str, dict[str, Any]]] = {}
+        self.event_metadata_publisher = EventMetadataPublisher()
+
+        for camera_config in config.cameras.values():
+            if not camera_config.audio.enabled_in_config:
+                continue
+
+            self.__init_camera(camera_config)
+
+    def __init_camera(self, camera_config: CameraConfig) -> None:
+        self.current_audio_detections[camera_config.name] = {}
+
+    def update_activity(self, new_activity: dict[str, dict[str, Any]]) -> None:
+        now = datetime.datetime.now().timestamp()
+
+        for camera in new_activity.keys():
+            # handle cameras that were added dynamically
+            if camera not in self.current_audio_detections:
+                self.__init_camera(self.config.cameras[camera])
+
+            new_detections = new_activity[camera].get("detections", [])
+            if self.compare_audio_activity(camera, new_detections, now):
+                logger.debug(f"Audio detections for {camera}: {new_activity}")
+                self.publish(
+                    "audio_detections",
+                    json.dumps(self.current_audio_detections),
+                )
+
+    def compare_audio_activity(
+        self, camera: str, new_detections: list[tuple[str, float]], now: float
+    ) -> None:
+        max_not_heard = self.config.cameras[camera].audio.max_not_heard
+        current = self.current_audio_detections[camera]
+
+        any_changed = False
+
+        for label, score in new_detections:
+            any_changed = True
+            if label in current:
+                current[label]["last_detection"] = now
+                current[label]["score"] = score
+            else:
+                rand_id = "".join(
+                    random.choices(string.ascii_lowercase + string.digits, k=6)
+                )
+                event_id = f"{now}-{rand_id}"
+                self.publish(f"{camera}/audio/{label}", "ON")
+
+                self.event_metadata_publisher.publish(
+                    (
+                        now,
+                        camera,
+                        label,
+                        event_id,
+                        True,
+                        score,
+                        None,
+                        None,
+                        "audio",
+                        {},
+                    ),
+                    EventMetadataTypeEnum.manual_event_create.value,
+                )
+                current[label] = {
+                    "id": event_id,
+                    "score": score,
+                    "last_detection": now,
+                }
+
+        # expire detections
+        for label in list(current.keys()):
+            if now - current[label]["last_detection"] > max_not_heard:
+                any_changed = True
+                self.publish(f"{camera}/audio/{label}", "OFF")
+
+                self.event_metadata_publisher.publish(
+                    (current[label]["id"], now),
+                    EventMetadataTypeEnum.manual_event_end.value,
+                )
+                del current[label]
+
+        return any_changed
+
+    def expire_all(self, camera: str) -> None:
+        now = datetime.datetime.now().timestamp()
+        current = self.current_audio_detections.get(camera, {})
+
+        for label in list(current.keys()):
+            self.publish(f"{camera}/audio/{label}", "OFF")
+
+            self.event_metadata_publisher.publish(
+                (current[label]["id"], now),
+                EventMetadataTypeEnum.manual_event_end.value,
+            )
+            del current[label]
--- a/frigate/comms/dispatcher.py
+++ b/frigate/comms/dispatcher.py
@ -6,7 +6,7 @@ import logging
 from typing import Any, Callable, Optional, cast

 from frigate.camera import PTZMetrics
-from frigate.camera.activity_manager import CameraActivityManager
+from frigate.camera.activity_manager import AudioActivityManager, CameraActivityManager
 from frigate.comms.base_communicator import Communicator
 from frigate.comms.webpush import WebPushClient
 from frigate.config import BirdseyeModeEnum, FrigateConfig
@ -17,10 +17,12 @@ from frigate.config.camera.updater import (
 )
 from frigate.const import (
    CLEAR_ONGOING_REVIEW_SEGMENTS,
+    EXPIRE_AUDIO_ACTIVITY,
    INSERT_MANY_RECORDINGS,
    INSERT_PREVIEW,
    NOTIFICATION_TEST,
    REQUEST_REGION_GRID,
+    UPDATE_AUDIO_ACTIVITY,
    UPDATE_BIRDSEYE_LAYOUT,
    UPDATE_CAMERA_ACTIVITY,
    UPDATE_EMBEDDINGS_REINDEX_PROGRESS,
@ -55,6 +57,7 @@ class Dispatcher:
        self.ptz_metrics = ptz_metrics
        self.comms = communicators
        self.camera_activity = CameraActivityManager(config, self.publish)
+        self.audio_activity = AudioActivityManager(config, self.publish)
        self.model_state: dict[str, ModelStatusTypesEnum] = {}
        self.embeddings_reindex: dict[str, Any] = {}
        self.birdseye_layout: dict[str, Any] = {}
@ -135,6 +138,12 @@ class Dispatcher:
        def handle_update_camera_activity() -> None:
            self.camera_activity.update_activity(payload)

+        def handle_update_audio_activity() -> None:
+            self.audio_activity.update_activity(payload)
+
+        def handle_expire_audio_activity() -> None:
+            self.audio_activity.expire_all(payload)
+
        def handle_update_event_description() -> None:
            event: Event = Event.get(Event.id == payload["id"])
            cast(dict, event.data)["description"] = payload["description"]
@ -192,6 +201,7 @@ class Dispatcher:

        def handle_on_connect() -> None:
            camera_status = self.camera_activity.last_camera_activity.copy()
+            audio_detections = self.audio_activity.current_audio_detections.copy()
            cameras_with_status = camera_status.keys()

            for camera in self.config.cameras.keys():
@ -234,6 +244,7 @@ class Dispatcher:
                json.dumps(self.embeddings_reindex.copy()),
            )
            self.publish("birdseye_layout", json.dumps(self.birdseye_layout.copy()))
+            self.publish("audio_detections", json.dumps(audio_detections))

        def handle_notification_test() -> None:
            self.publish("notification_test", "Test notification")
@ -246,6 +257,8 @@ class Dispatcher:
            UPSERT_REVIEW_SEGMENT: handle_upsert_review_segment,
            CLEAR_ONGOING_REVIEW_SEGMENTS: handle_clear_ongoing_review_segments,
            UPDATE_CAMERA_ACTIVITY: handle_update_camera_activity,
+            UPDATE_AUDIO_ACTIVITY: handle_update_audio_activity,
+            EXPIRE_AUDIO_ACTIVITY: handle_expire_audio_activity,
            UPDATE_EVENT_DESCRIPTION: handle_update_event_description,
            UPDATE_REVIEW_DESCRIPTION: handle_update_review_description,
            UPDATE_MODEL_STATE: handle_update_model_state,
--- a/frigate/const.py
+++ b/frigate/const.py
@ -110,6 +110,8 @@ REQUEST_REGION_GRID = "request_region_grid"
 UPSERT_REVIEW_SEGMENT = "upsert_review_segment"
 CLEAR_ONGOING_REVIEW_SEGMENTS = "clear_ongoing_review_segments"
 UPDATE_CAMERA_ACTIVITY = "update_camera_activity"
+UPDATE_AUDIO_ACTIVITY = "update_audio_activity"
+EXPIRE_AUDIO_ACTIVITY = "expire_audio_activity"
 UPDATE_EVENT_DESCRIPTION = "update_event_description"
 UPDATE_REVIEW_DESCRIPTION = "update_review_description"
 UPDATE_MODEL_STATE = "update_model_state"
--- a/frigate/events/audio.py
+++ b/frigate/events/audio.py
@ -2,21 +2,15 @@

 import datetime
 import logging
-import random
-import string
 import threading
 import time
 from multiprocessing.managers import DictProxy
 from multiprocessing.synchronize import Event as MpEvent
-from typing import Any, Tuple
+from typing import Tuple

 import numpy as np

 from frigate.comms.detections_updater import DetectionPublisher, DetectionTypeEnum
-from frigate.comms.event_metadata_updater import (
-    EventMetadataPublisher,
-    EventMetadataTypeEnum,
-)
 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.config import CameraConfig, CameraInput, FfmpegConfig, FrigateConfig
 from frigate.config.camera.updater import (
@ -29,7 +23,9 @@ from frigate.const import (
    AUDIO_MAX_BIT_RANGE,
    AUDIO_MIN_CONFIDENCE,
    AUDIO_SAMPLE_RATE,
+    EXPIRE_AUDIO_ACTIVITY,
    PROCESS_PRIORITY_HIGH,
+    UPDATE_AUDIO_ACTIVITY,
 )
 from frigate.data_processing.common.audio_transcription.model import (
    AudioTranscriptionModelRunner,
@ -159,7 +155,6 @@ class AudioEventMaintainer(threading.Thread):
        self.config = config
        self.camera_config = camera
        self.camera_metrics = camera_metrics
-        self.detections: dict[dict[str, Any]] = {}
        self.stop_event = stop_event
        self.detector = AudioTfl(stop_event, self.camera_config.audio.num_threads)
        self.shape = (int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE)),)
@ -184,7 +179,6 @@ class AudioEventMaintainer(threading.Thread):
            ],
        )
        self.detection_publisher = DetectionPublisher(DetectionTypeEnum.audio.value)
-        self.event_metadata_publisher = EventMetadataPublisher()

        if self.camera_config.audio_transcription.enabled_in_config:
            # init the transcription processor for this camera
@ -216,12 +210,13 @@ class AudioEventMaintainer(threading.Thread):
        self.camera_metrics[self.camera_config.name].audio_rms.value = rms
        self.camera_metrics[self.camera_config.name].audio_dBFS.value = dBFS

+        audio_detections: list[Tuple[str, float]] = []
+
        # only run audio detection when volume is above min_volume
        if rms >= self.camera_config.audio.min_volume:
            # create waveform relative to max range and look for detections
            waveform = (audio / AUDIO_MAX_BIT_RANGE).astype(np.float32)
            model_detections = self.detector.detect(waveform)
-            audio_detections = []

            for label, score, _ in model_detections:
                self.logger.debug(
@ -234,8 +229,7 @@ class AudioEventMaintainer(threading.Thread):
                if score > dict(
                    (self.camera_config.audio.filters or {}).get(label, {})
                ).get("threshold", 0.8):
-                    self.handle_detection(label, score)
-                    audio_detections.append(label)
+                    audio_detections.append((label, score))

            # send audio detection data
            self.detection_publisher.publish(
@ -243,10 +237,16 @@ class AudioEventMaintainer(threading.Thread):
                    self.camera_config.name,
                    datetime.datetime.now().timestamp(),
                    dBFS,
-                    audio_detections,
+                    [label for label, _ in audio_detections],
                )
            )

+        # send audio activity update
+        self.requestor.send_data(
+            UPDATE_AUDIO_ACTIVITY,
+            {self.camera_config.name: {"detections": audio_detections}},
+        )
+
        # run audio transcription
        if self.transcription_processor is not None:
            if self.camera_config.audio_transcription.live_enabled:
@ -261,8 +261,6 @@ class AudioEventMaintainer(threading.Thread):
            else:
                self.transcription_processor.check_unload_model()

-        self.expire_detections()
-
    def calculate_audio_levels(self, audio_as_float: np.float32) -> Tuple[float, float]:
        # Calculate RMS (Root-Mean-Square) which represents the average signal amplitude
        # Note: np.float32 isn't serializable, we must use np.float64 to publish the message
@ -279,75 +277,6 @@ class AudioEventMaintainer(threading.Thread):

        return float(rms), float(dBFS)

-    def handle_detection(self, label: str, score: float) -> None:
-        if self.detections.get(label):
-            self.detections[label]["last_detection"] = (
-                datetime.datetime.now().timestamp()
-            )
-        else:
-            now = datetime.datetime.now().timestamp()
-            rand_id = "".join(
-                random.choices(string.ascii_lowercase + string.digits, k=6)
-            )
-            event_id = f"{now}-{rand_id}"
-            self.requestor.send_data(f"{self.camera_config.name}/audio/{label}", "ON")
-
-            self.event_metadata_publisher.publish(
-                (
-                    now,
-                    self.camera_config.name,
-                    label,
-                    event_id,
-                    True,
-                    score,
-                    None,
-                    None,
-                    "audio",
-                    {},
-                ),
-                EventMetadataTypeEnum.manual_event_create.value,
-            )
-            self.detections[label] = {
-                "id": event_id,
-                "label": label,
-                "last_detection": now,
-            }
-
-    def expire_detections(self) -> None:
-        now = datetime.datetime.now().timestamp()
-
-        for detection in self.detections.values():
-            if not detection:
-                continue
-
-            if (
-                now - detection.get("last_detection", now)
-                > self.camera_config.audio.max_not_heard
-            ):
-                self.requestor.send_data(
-                    f"{self.camera_config.name}/audio/{detection['label']}", "OFF"
-                )
-
-                self.event_metadata_publisher.publish(
-                    (detection["id"], detection["last_detection"]),
-                    EventMetadataTypeEnum.manual_event_end.value,
-                )
-                self.detections[detection["label"]] = None
-
-    def expire_all_detections(self) -> None:
-        """Immediately end all current detections"""
-        now = datetime.datetime.now().timestamp()
-        for label, detection in list(self.detections.items()):
-            if detection:
-                self.requestor.send_data(
-                    f"{self.camera_config.name}/audio/{label}", "OFF"
-                )
-                self.event_metadata_publisher.publish(
-                    (detection["id"], now),
-                    EventMetadataTypeEnum.manual_event_end.value,
-                )
-                self.detections[label] = None
-
    def start_or_restart_ffmpeg(self) -> None:
        self.audio_listener = start_or_restart_ffmpeg(
            self.ffmpeg_cmd,
@ -406,7 +335,9 @@ class AudioEventMaintainer(threading.Thread):
                    self.logger.debug(
                        f"Disabling audio detections for {self.camera_config.name}, ending events"
                    )
-                    self.expire_all_detections()
+                    self.requestor.send_data(
+                        EXPIRE_AUDIO_ACTIVITY, self.camera_config.name
+                    )
                    stop_ffmpeg(self.audio_listener, self.logger)
                    self.audio_listener = None
                self.was_enabled = enabled
--- a/web/public/locales/en/views/settings.json
+++ b/web/public/locales/en/views/settings.json
@ -411,6 +411,13 @@
    "debugging": "Debugging",
    "objectList": "Object List",
    "noObjects": "No objects",
+    "audio": {
+      "title": "Audio",
+      "noAudioDetections": "No audio detections",
+      "score": "score",
+      "currentRMS": "Current RMS",
+      "currentdbFS": "Current dbFS"
+    },
    "boundingBoxes": {
      "title": "Bounding boxes",
      "desc": "Show bounding boxes around tracked objects",
--- a/web/src/api/ws.tsx
+++ b/web/src/api/ws.tsx
@ -10,6 +10,7 @@ import {
  ToggleableSetting,
  TrackedObjectUpdateReturnType,
  TriggerStatus,
+  FrigateAudioDetections,
 } from "@/types/ws";
 import { FrigateStats } from "@/types/stats";
 import { createContainer } from "react-tracked";
@ -341,6 +342,13 @@ export function useFrigateEvents(): { payload: FrigateEvent } {
  return { payload: JSON.parse(payload as string) };
 }

+export function useAudioDetections(): { payload: FrigateAudioDetections } {
+  const {
+    value: { payload },
+  } = useWs("audio_detections", "");
+  return { payload: JSON.parse(payload as string) };
+}
+
 export function useFrigateReviews(): FrigateReview {
  const {
    value: { payload },
--- a/web/src/components/audio/AudioLevelGraph.tsx
+++ b/web/src/components/audio/AudioLevelGraph.tsx
@ -0,0 +1,165 @@
+import { useEffect, useMemo, useState, useCallback } from "react";
+import { MdCircle } from "react-icons/md";
+import Chart from "react-apexcharts";
+import { useTheme } from "@/context/theme-provider";
+import { useWs } from "@/api/ws";
+import { useDateLocale } from "@/hooks/use-date-locale";
+import { formatUnixTimestampToDateTime } from "@/utils/dateUtil";
+import useSWR from "swr";
+import { FrigateConfig } from "@/types/frigateConfig";
+import { useTranslation } from "react-i18next";
+
+const GRAPH_COLORS = ["#3b82f6", "#ef4444"]; // RMS, dBFS
+
+interface AudioLevelGraphProps {
+  cameraName: string;
+}
+
+export function AudioLevelGraph({ cameraName }: AudioLevelGraphProps) {
+  const [audioData, setAudioData] = useState<
+    { timestamp: number; rms: number; dBFS: number }[]
+  >([]);
+  const [maxDataPoints] = useState(50);
+
+  // config for time formatting
+  const { data: config } = useSWR<FrigateConfig>("config", {
+    revalidateOnFocus: false,
+  });
+  const locale = useDateLocale();
+  const { t } = useTranslation(["common"]);
+
+  const {
+    value: { payload: audioRms },
+  } = useWs(`${cameraName}/audio/rms`, "");
+  const {
+    value: { payload: audioDBFS },
+  } = useWs(`${cameraName}/audio/dBFS`, "");
+
+  useEffect(() => {
+    if (typeof audioRms === "number") {
+      const now = Date.now();
+      setAudioData((prev) => {
+        const next = [
+          ...prev,
+          {
+            timestamp: now,
+            rms: audioRms,
+            dBFS: typeof audioDBFS === "number" ? audioDBFS : 0,
+          },
+        ];
+        return next.slice(-maxDataPoints);
+      });
+    }
+  }, [audioRms, audioDBFS, maxDataPoints]);
+
+  const series = useMemo(
+    () => [
+      {
+        name: "RMS",
+        data: audioData.map((p) => ({ x: p.timestamp, y: p.rms })),
+      },
+      {
+        name: "dBFS",
+        data: audioData.map((p) => ({ x: p.timestamp, y: p.dBFS })),
+      },
+    ],
+    [audioData],
+  );
+
+  const lastValues = useMemo(() => {
+    if (!audioData.length) return undefined;
+    const last = audioData[audioData.length - 1];
+    return [last.rms, last.dBFS];
+  }, [audioData]);
+
+  const timeFormat = config?.ui.time_format === "24hour" ? "24hour" : "12hour";
+  const formatString = useMemo(
+    () =>
+      t(`time.formattedTimestampHourMinuteSecond.${timeFormat}`, {
+        ns: "common",
+      }),
+    [t, timeFormat],
+  );
+
+  const formatTime = useCallback(
+    (val: unknown) => {
+      const seconds = Math.round(Number(val) / 1000);
+      return formatUnixTimestampToDateTime(seconds, {
+        timezone: config?.ui.timezone,
+        date_format: formatString,
+        locale,
+      });
+    },
+    [config?.ui.timezone, formatString, locale],
+  );
+
+  const { theme, systemTheme } = useTheme();
+
+  const options = useMemo(() => {
+    return {
+      chart: {
+        id: `${cameraName}-audio`,
+        selection: { enabled: false },
+        toolbar: { show: false },
+        zoom: { enabled: false },
+        animations: { enabled: false },
+      },
+      colors: GRAPH_COLORS,
+      grid: {
+        show: true,
+        borderColor: "#374151",
+        strokeDashArray: 3,
+        xaxis: { lines: { show: true } },
+        yaxis: { lines: { show: true } },
+      },
+      legend: { show: false },
+      dataLabels: { enabled: false },
+      stroke: { width: 1 },
+      markers: { size: 0 },
+      tooltip: {
+        theme: systemTheme || theme,
+        x: { formatter: (val: number) => formatTime(val) },
+        y: { formatter: (v: number) => v.toFixed(1) },
+      },
+      xaxis: {
+        type: "datetime",
+        labels: {
+          rotate: 0,
+          formatter: formatTime,
+          style: { colors: "#6B6B6B", fontSize: "10px" },
+        },
+        axisBorder: { show: false },
+        axisTicks: { show: false },
+      },
+      yaxis: {
+        show: true,
+        labels: {
+          formatter: (val: number) => Math.round(val).toString(),
+          style: { colors: "#6B6B6B", fontSize: "10px" },
+        },
+      },
+    } as ApexCharts.ApexOptions;
+  }, [cameraName, theme, systemTheme, formatTime]);
+
+  return (
+    <div className="my-4 flex flex-col">
+      {lastValues && (
+        <div className="mb-2 flex flex-wrap items-center gap-2.5">
+          {["RMS", "dBFS"].map((label, idx) => (
+            <div key={label} className="flex items-center gap-1">
+              <MdCircle
+                className="size-2"
+                style={{ color: GRAPH_COLORS[idx] }}
+              />
+              <div className="text-xs text-secondary-foreground">{label}</div>
+              <div className="text-xs text-primary">
+                {lastValues[idx].toFixed(1)}
+              </div>
+            </div>
+          ))}
+        </div>
+      )}
+      <Chart type="line" options={options} series={series} />
+    </div>
+  );
+}
--- a/web/src/hooks/use-camera-activity.ts
+++ b/web/src/hooks/use-camera-activity.ts
@ -1,4 +1,5 @@
 import {
+  useAudioDetections,
  useEnabledState,
  useFrigateEvents,
  useInitialCameraState,
@ -8,7 +9,7 @@ import { CameraConfig, FrigateConfig } from "@/types/frigateConfig";
 import { MotionData, ReviewSegment } from "@/types/review";
 import { useCallback, useEffect, useMemo, useState } from "react";
 import { useTimelineUtils } from "./use-timeline-utils";
-import { ObjectType } from "@/types/ws";
+import { AudioDetection, ObjectType } from "@/types/ws";
 import useDeepMemo from "./use-deep-memo";
 import { isEqual } from "lodash";
 import { useAutoFrigateStats } from "./use-stats";
@ -20,6 +21,7 @@ type useCameraActivityReturn = {
  activeTracking: boolean;
  activeMotion: boolean;
  objects: ObjectType[];
+  audio_detections: AudioDetection[];
  offline: boolean;
 };

@ -38,6 +40,9 @@ export function useCameraActivity(
    return getAttributeLabels(config);
  }, [config]);
  const [objects, setObjects] = useState<ObjectType[] | undefined>([]);
+  const [audioDetections, setAudioDetections] = useState<
+    AudioDetection[] | undefined
+  >([]);

  // init camera activity

@ -51,6 +56,15 @@ export function useCameraActivity(
    }
  }, [updatedCameraState, camera]);

+  const { payload: updatedAudioState } = useAudioDetections();
+  const memoizedAudioState = useDeepMemo(updatedAudioState);
+
+  useEffect(() => {
+    if (memoizedAudioState) {
+      setAudioDetections(memoizedAudioState[camera.name]);
+    }
+  }, [memoizedAudioState, camera]);
+
  // handle camera activity

  const hasActiveObjects = useMemo(
@ -160,6 +174,7 @@ export function useCameraActivity(
        : updatedCameraState?.motion === true
      : false,
    objects: isCameraEnabled ? (objects ?? []) : [],
+    audio_detections: isCameraEnabled ? (audioDetections ?? []) : [],
    offline,
  };
 }
--- a/web/src/types/ws.ts
+++ b/web/src/types/ws.ts
@ -51,6 +51,12 @@ export type ObjectType = {
  sub_label: string;
 };

+export type AudioDetection = {
+  id: string;
+  label: string;
+  score: number;
+};
+
 export interface FrigateCameraState {
  config: {
    enabled: boolean;
@ -69,6 +75,10 @@ export interface FrigateCameraState {
  };
  motion: boolean;
  objects: ObjectType[];
+  audio_detections: AudioDetection[];
+}
+export interface FrigateAudioDetections {
+  [camera: string]: AudioDetection[];
 }

 export type ModelState =
--- a/web/src/views/settings/ObjectSettingsView.tsx
+++ b/web/src/views/settings/ObjectSettingsView.tsx
@ -16,7 +16,7 @@ import {
  PopoverContent,
  PopoverTrigger,
 } from "@/components/ui/popover";
-import { ObjectType } from "@/types/ws";
+import { AudioDetection, ObjectType } from "@/types/ws";
 import useDeepMemo from "@/hooks/use-deep-memo";
 import { Card } from "@/components/ui/card";
 import { getIconForLabel } from "@/utils/iconUtil";
@ -30,6 +30,8 @@ import { isDesktop } from "react-device-detect";
 import { Trans, useTranslation } from "react-i18next";
 import { useDocDomain } from "@/hooks/use-doc-domain";
 import { getTranslatedLabel } from "@/utils/i18n";
+import { AudioLevelGraph } from "@/components/audio/AudioLevelGraph";
+import { useWs } from "@/api/ws";

 type ObjectSettingsViewProps = {
  selectedCamera?: string;
@ -126,9 +128,12 @@ export default function ObjectSettingsView({
    }
  }, [config, selectedCamera]);

-  const { objects } = useCameraActivity(cameraConfig ?? ({} as CameraConfig));
+  const { objects, audio_detections } = useCameraActivity(
+    cameraConfig ?? ({} as CameraConfig),
+  );

  const memoizedObjects = useDeepMemo(objects);
+  const memoizedAudio = useDeepMemo(audio_detections);

  const searchParams = useMemo(() => {
    if (!optionsLoaded) {
@ -189,11 +194,18 @@ export default function ObjectSettingsView({
        )}

        <Tabs defaultValue="debug" className="w-full">
-          <TabsList className="grid w-full grid-cols-2">
+          <TabsList
+            className={`grid w-full ${cameraConfig.ffmpeg.inputs.some((input) => input.roles.includes("audio")) ? "grid-cols-3" : "grid-cols-2"}`}
+          >
            <TabsTrigger value="debug">{t("debug.debugging")}</TabsTrigger>
            <TabsTrigger value="objectlist">
              {t("debug.objectList")}
            </TabsTrigger>
+            {cameraConfig.ffmpeg.inputs.some((input) =>
+              input.roles.includes("audio"),
+            ) && (
+              <TabsTrigger value="audio">{t("debug.audio.title")}</TabsTrigger>
+            )}
          </TabsList>
          <TabsContent value="debug">
            <div className="flex w-full flex-col space-y-6">
@ -304,6 +316,16 @@ export default function ObjectSettingsView({
          <TabsContent value="objectlist">
            <ObjectList cameraConfig={cameraConfig} objects={memoizedObjects} />
          </TabsContent>
+          {cameraConfig.ffmpeg.inputs.some((input) =>
+            input.roles.includes("audio"),
+          ) && (
+            <TabsContent value="audio">
+              <AudioList
+                cameraConfig={cameraConfig}
+                audioDetections={memoizedAudio}
+              />
+            </TabsContent>
+          )}
        </Tabs>
      </div>

@ -362,7 +384,7 @@ function ObjectList({ cameraConfig, objects }: ObjectListProps) {
  return (
    <div className="scrollbar-container flex w-full flex-col overflow-y-auto">
      {objects && objects.length > 0 ? (
-        objects.map((obj) => {
+        objects.map((obj: ObjectType) => {
          return (
            <Card className="mb-1 p-2 text-sm" key={obj.id}>
              <div className="flex flex-row items-center gap-3 pb-1">
@ -438,3 +460,61 @@ function ObjectList({ cameraConfig, objects }: ObjectListProps) {
    </div>
  );
 }
+
+type AudioListProps = {
+  cameraConfig: CameraConfig;
+  audioDetections?: AudioDetection[];
+};
+
+function AudioList({ cameraConfig, audioDetections }: AudioListProps) {
+  const { t } = useTranslation(["views/settings"]);
+
+  // Get audio levels directly from ws hooks
+  const {
+    value: { payload: audioRms },
+  } = useWs(`${cameraConfig.name}/audio/rms`, "");
+  const {
+    value: { payload: audioDBFS },
+  } = useWs(`${cameraConfig.name}/audio/dBFS`, "");
+
+  return (
+    <div className="scrollbar-container flex w-full flex-col overflow-y-auto">
+      {audioDetections && Object.keys(audioDetections).length > 0 ? (
+        Object.entries(audioDetections).map(([key, obj]) => (
+          <Card className="mb-1 p-2 text-sm" key={obj.id ?? key}>
+            <div className="flex flex-row items-center gap-3 pb-1">
+              <div className="flex flex-1 flex-row items-center justify-start p-3 pl-1">
+                <div className="rounded-lg bg-selected p-2">
+                  {getIconForLabel(key, "size-5 text-white")}
+                </div>
+                <div className="ml-3 text-lg">{getTranslatedLabel(key)}</div>
+              </div>
+              <div className="flex w-8/12 flex-row items-center justify-end">
+                <div className="text-md mr-2 w-1/3">
+                  <div className="flex flex-col items-end justify-end">
+                    <p className="mb-1.5 text-sm text-primary-variant">
+                      {t("debug.audio.score")}
+                    </p>
+                    {obj.score ? (obj.score * 100).toFixed(1).toString() : "-"}%
+                  </div>
+                </div>
+              </div>
+            </div>
+          </Card>
+        ))
+      ) : (
+        <div className="p-3 text-center">
+          <p className="mb-2">{t("debug.audio.noAudioDetections")}</p>
+          <p className="text-xs text-muted-foreground">
+            {t("debug.audio.currentRMS")}{" "}
+            {(typeof audioRms === "number" ? audioRms : 0).toFixed(1)} |{" "}
+            {t("debug.audio.currentdbFS")}{" "}
+            {(typeof audioDBFS === "number" ? audioDBFS : 0).toFixed(1)}
+          </p>
+        </div>
+      )}
+
+      <AudioLevelGraph cameraName={cameraConfig.name} />
+    </div>
+  );
+}