mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-05-04 23:14:12 +02:00
Audio transcription support (#18398)
* install new packages for transcription support * add config options * audio maintainer modifications to support transcription * pass main config to audio process * embeddings support * api and transcription post processor * embeddings maintainer support for post processor * live audio transcription with sherpa and faster-whisper * update dispatcher with live transcription topic * frontend websocket * frontend live transcription * frontend changes for speech events * i18n changes * docs * mqtt docs * fix linter * use float16 and small model on gpu for real-time * fix return value and use requestor to embed description instead of passing embeddings * run real-time transcription in its own thread * tweaks * publish live transcriptions on their own topic instead of tracked_object_update * config validator and docs * clarify docs
This commit is contained in:
committed by
Blake Blackshear
parent
2385c403ee
commit
6dc36fcbb4
@@ -75,13 +75,13 @@ export default function ExploreView({
|
||||
}, {});
|
||||
}, [events]);
|
||||
|
||||
const trackedObjectUpdate = useTrackedObjectUpdate();
|
||||
const { payload: wsUpdate } = useTrackedObjectUpdate();
|
||||
|
||||
useEffect(() => {
|
||||
mutate();
|
||||
// mutate / revalidate when event description updates come in
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [trackedObjectUpdate]);
|
||||
if (wsUpdate && wsUpdate.type == "description") {
|
||||
mutate();
|
||||
}
|
||||
}, [wsUpdate, mutate]);
|
||||
|
||||
// update search detail when results change
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import {
|
||||
useAudioLiveTranscription,
|
||||
useAudioState,
|
||||
useAudioTranscriptionState,
|
||||
useAutotrackingState,
|
||||
useDetectState,
|
||||
useEnabledState,
|
||||
@@ -90,6 +92,8 @@ import {
|
||||
LuX,
|
||||
} from "react-icons/lu";
|
||||
import {
|
||||
MdClosedCaption,
|
||||
MdClosedCaptionDisabled,
|
||||
MdNoPhotography,
|
||||
MdOutlineRestartAlt,
|
||||
MdPersonOff,
|
||||
@@ -197,6 +201,29 @@ export default function LiveCameraView({
|
||||
const { payload: enabledState } = useEnabledState(camera.name);
|
||||
const cameraEnabled = enabledState === "ON";
|
||||
|
||||
// for audio transcriptions
|
||||
|
||||
const { payload: audioTranscriptionState, send: sendTranscription } =
|
||||
useAudioTranscriptionState(camera.name);
|
||||
const { payload: transcription } = useAudioLiveTranscription(camera.name);
|
||||
const transcriptionRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (transcription) {
|
||||
if (transcriptionRef.current) {
|
||||
transcriptionRef.current.scrollTop =
|
||||
transcriptionRef.current.scrollHeight;
|
||||
}
|
||||
}
|
||||
}, [transcription]);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
// disable transcriptions when unmounting
|
||||
if (audioTranscriptionState == "ON") sendTranscription("OFF");
|
||||
};
|
||||
}, [audioTranscriptionState, sendTranscription]);
|
||||
|
||||
// click overlay for ptzs
|
||||
|
||||
const [clickOverlay, setClickOverlay] = useState(false);
|
||||
@@ -567,6 +594,9 @@ export default function LiveCameraView({
|
||||
autotrackingEnabled={
|
||||
camera.onvif.autotracking.enabled_in_config
|
||||
}
|
||||
transcriptionEnabled={
|
||||
camera.audio_transcription.enabled_in_config
|
||||
}
|
||||
fullscreen={fullscreen}
|
||||
streamName={streamName ?? ""}
|
||||
setStreamName={setStreamName}
|
||||
@@ -626,6 +656,16 @@ export default function LiveCameraView({
|
||||
/>
|
||||
</div>
|
||||
</TransformComponent>
|
||||
{camera?.audio?.enabled_in_config &&
|
||||
audioTranscriptionState == "ON" &&
|
||||
transcription != null && (
|
||||
<div
|
||||
ref={transcriptionRef}
|
||||
className="text-md scrollbar-container absolute bottom-4 left-1/2 max-h-[15vh] w-[75%] -translate-x-1/2 overflow-y-auto rounded-lg bg-black/70 p-2 text-white md:w-[50%]"
|
||||
>
|
||||
{transcription}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
{camera.onvif.host != "" && (
|
||||
@@ -984,6 +1024,7 @@ type FrigateCameraFeaturesProps = {
|
||||
recordingEnabled: boolean;
|
||||
audioDetectEnabled: boolean;
|
||||
autotrackingEnabled: boolean;
|
||||
transcriptionEnabled: boolean;
|
||||
fullscreen: boolean;
|
||||
streamName: string;
|
||||
setStreamName?: (value: string | undefined) => void;
|
||||
@@ -1003,6 +1044,7 @@ function FrigateCameraFeatures({
|
||||
recordingEnabled,
|
||||
audioDetectEnabled,
|
||||
autotrackingEnabled,
|
||||
transcriptionEnabled,
|
||||
fullscreen,
|
||||
streamName,
|
||||
setStreamName,
|
||||
@@ -1035,6 +1077,8 @@ function FrigateCameraFeatures({
|
||||
const { payload: audioState, send: sendAudio } = useAudioState(camera.name);
|
||||
const { payload: autotrackingState, send: sendAutotracking } =
|
||||
useAutotrackingState(camera.name);
|
||||
const { payload: transcriptionState, send: sendTranscription } =
|
||||
useAudioTranscriptionState(camera.name);
|
||||
|
||||
// roles
|
||||
|
||||
@@ -1198,6 +1242,27 @@ function FrigateCameraFeatures({
|
||||
disabled={!cameraEnabled}
|
||||
/>
|
||||
)}
|
||||
{audioDetectEnabled && transcriptionEnabled && (
|
||||
<CameraFeatureToggle
|
||||
className="p-2 md:p-0"
|
||||
variant={fullscreen ? "overlay" : "primary"}
|
||||
Icon={
|
||||
transcriptionState == "ON"
|
||||
? MdClosedCaption
|
||||
: MdClosedCaptionDisabled
|
||||
}
|
||||
isActive={transcriptionState == "ON"}
|
||||
title={
|
||||
transcriptionState == "ON"
|
||||
? t("transcription.disable")
|
||||
: t("transcription.enable")
|
||||
}
|
||||
onClick={() =>
|
||||
sendTranscription(transcriptionState == "ON" ? "OFF" : "ON")
|
||||
}
|
||||
disabled={!cameraEnabled || audioState == "OFF"}
|
||||
/>
|
||||
)}
|
||||
{autotrackingEnabled && (
|
||||
<CameraFeatureToggle
|
||||
className="p-2 md:p-0"
|
||||
@@ -1562,6 +1627,16 @@ function FrigateCameraFeatures({
|
||||
}
|
||||
/>
|
||||
)}
|
||||
{audioDetectEnabled && transcriptionEnabled && (
|
||||
<FilterSwitch
|
||||
label={t("cameraSettings.transcription")}
|
||||
disabled={audioState == "OFF"}
|
||||
isChecked={transcriptionState == "ON"}
|
||||
onCheckedChange={() =>
|
||||
sendTranscription(transcriptionState == "ON" ? "OFF" : "ON")
|
||||
}
|
||||
/>
|
||||
)}
|
||||
{autotrackingEnabled && (
|
||||
<FilterSwitch
|
||||
label={t("cameraSettings.autotracking")}
|
||||
|
||||
Reference in New Issue
Block a user