mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-04-28 23:06:13 +02:00
Audio transcription support (#18398)
* install new packages for transcription support * add config options * audio maintainer modifications to support transcription * pass main config to audio process * embeddings support * api and transcription post processor * embeddings maintainer support for post processor * live audio transcription with sherpa and faster-whisper * update dispatcher with live transcription topic * frontend websocket * frontend live transcription * frontend changes for speech events * i18n changes * docs * mqtt docs * fix linter * use float16 and small model on gpu for real-time * fix return value and use requestor to embed description instead of passing embeddings * run real-time transcription in its own thread * tweaks * publish live transcriptions on their own topic instead of tracked_object_update * config validator and docs * clarify docs
This commit is contained in:
committed by
Blake Blackshear
parent
2385c403ee
commit
6dc36fcbb4
@@ -14,7 +14,10 @@ from peewee import DoesNotExist
|
||||
from playhouse.shortcuts import model_to_dict
|
||||
|
||||
from frigate.api.auth import require_role
|
||||
from frigate.api.defs.request.classification_body import RenameFaceBody
|
||||
from frigate.api.defs.request.classification_body import (
|
||||
AudioTranscriptionBody,
|
||||
RenameFaceBody,
|
||||
)
|
||||
from frigate.api.defs.tags import Tags
|
||||
from frigate.config.camera import DetectConfig
|
||||
from frigate.const import FACE_DIR
|
||||
@@ -384,3 +387,58 @@ def reindex_embeddings(request: Request):
|
||||
},
|
||||
status_code=500,
|
||||
)
|
||||
|
||||
|
||||
@router.put("/audio/transcribe")
|
||||
def transcribe_audio(request: Request, body: AudioTranscriptionBody):
|
||||
event_id = body.event_id
|
||||
|
||||
try:
|
||||
event = Event.get(Event.id == event_id)
|
||||
except DoesNotExist:
|
||||
message = f"Event {event_id} not found"
|
||||
logger.error(message)
|
||||
return JSONResponse(
|
||||
content=({"success": False, "message": message}), status_code=404
|
||||
)
|
||||
|
||||
if not request.app.frigate_config.cameras[event.camera].audio_transcription.enabled:
|
||||
message = f"Audio transcription is not enabled for {event.camera}."
|
||||
logger.error(message)
|
||||
return JSONResponse(
|
||||
content=(
|
||||
{
|
||||
"success": False,
|
||||
"message": message,
|
||||
}
|
||||
),
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
context: EmbeddingsContext = request.app.embeddings
|
||||
response = context.transcribe_audio(model_to_dict(event))
|
||||
|
||||
if response == "started":
|
||||
return JSONResponse(
|
||||
content={
|
||||
"success": True,
|
||||
"message": "Audio transcription has started.",
|
||||
},
|
||||
status_code=202, # 202 Accepted
|
||||
)
|
||||
elif response == "in_progress":
|
||||
return JSONResponse(
|
||||
content={
|
||||
"success": False,
|
||||
"message": "Audio transcription for a speech event is currently in progress. Try again later.",
|
||||
},
|
||||
status_code=409, # 409 Conflict
|
||||
)
|
||||
else:
|
||||
return JSONResponse(
|
||||
content={
|
||||
"success": False,
|
||||
"message": "Failed to transcribe audio.",
|
||||
},
|
||||
status_code=500,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user