Audio transcription support (#18398)

* install new packages for transcription support

* add config options

* audio maintainer modifications to support transcription

* pass main config to audio process

* embeddings support

* api and transcription post processor

* embeddings maintainer support for post processor

* live audio transcription with sherpa and faster-whisper

* update dispatcher with live transcription topic

* frontend websocket

* frontend live transcription

* frontend changes for speech events

* i18n changes

* docs

* mqtt docs

* fix linter

* use float16 and small model on gpu for real-time

* fix return value and use requestor to embed description instead of passing embeddings

* run real-time transcription in its own thread

* tweaks

* publish live transcriptions on their own topic instead of tracked_object_update

* config validator and docs

* clarify docs
This commit is contained in:
Josh Hawkins
2025-05-27 10:26:00 -05:00
committed by Blake Blackshear
parent 2385c403ee
commit 6dc36fcbb4
29 changed files with 2322 additions and 51 deletions

View File

@@ -8,6 +8,7 @@ import {
FrigateReview,
ModelState,
ToggleableSetting,
TrackedObjectUpdateReturnType,
} from "@/types/ws";
import { FrigateStats } from "@/types/stats";
import { createContainer } from "react-tracked";
@@ -60,6 +61,7 @@ function useValue(): useValueReturn {
enabled,
snapshots,
audio,
audio_transcription,
notifications,
notifications_suspended,
autotracking,
@@ -71,6 +73,9 @@ function useValue(): useValueReturn {
cameraStates[`${name}/detect/state`] = detect ? "ON" : "OFF";
cameraStates[`${name}/snapshots/state`] = snapshots ? "ON" : "OFF";
cameraStates[`${name}/audio/state`] = audio ? "ON" : "OFF";
cameraStates[`${name}/audio_transcription/state`] = audio_transcription
? "ON"
: "OFF";
cameraStates[`${name}/notifications/state`] = notifications
? "ON"
: "OFF";
@@ -220,6 +225,20 @@ export function useAudioState(camera: string): {
return { payload: payload as ToggleableSetting, send };
}
export function useAudioTranscriptionState(camera: string): {
payload: ToggleableSetting;
send: (payload: ToggleableSetting, retain?: boolean) => void;
} {
const {
value: { payload },
send,
} = useWs(
`${camera}/audio_transcription/state`,
`${camera}/audio_transcription/set`,
);
return { payload: payload as ToggleableSetting, send };
}
export function useAutotrackingState(camera: string): {
payload: ToggleableSetting;
send: (payload: ToggleableSetting, retain?: boolean) => void;
@@ -421,6 +440,15 @@ export function useAudioActivity(camera: string): { payload: number } {
return { payload: payload as number };
}
export function useAudioLiveTranscription(camera: string): {
payload: string;
} {
const {
value: { payload },
} = useWs(`${camera}/audio/transcription`, "");
return { payload: payload as string };
}
export function useMotionThreshold(camera: string): {
payload: string;
send: (payload: number, retain?: boolean) => void;
@@ -463,11 +491,16 @@ export function useImproveContrast(camera: string): {
return { payload: payload as ToggleableSetting, send };
}
export function useTrackedObjectUpdate(): { payload: string } {
export function useTrackedObjectUpdate(): {
payload: TrackedObjectUpdateReturnType;
} {
const {
value: { payload },
} = useWs("tracked_object_update", "");
return useDeepMemo(JSON.parse(payload as string));
const parsed = payload
? JSON.parse(payload as string)
: { type: "", id: "", camera: "" };
return { payload: useDeepMemo(parsed) };
}
export function useNotifications(camera: string): {