Save audio scores and add audio filter config (#7185)

* Send and save score for external events

* Add audio filters config

* Fix access

* Add docs

* Cleanup
This commit is contained in:
Nicolas Mowen 2023-07-17 05:07:15 -06:00 committed by GitHub
parent dacf45cd88
commit c5b8d13beb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 39 additions and 5 deletions

View File

@ -154,6 +154,12 @@ audio:
- scream - scream
- speech - speech
- yell - yell
# Optional: Filters to configure detection.
filters:
# Label that matches label in listen config.
speech:
# Minimum score that triggers an audio event (default: shown below)
threshold: 0.8
# Optional: logger verbosity settings # Optional: logger verbosity settings
logger: logger:

View File

@ -11,7 +11,13 @@ import numpy as np
from pydantic import BaseModel, Extra, Field, parse_obj_as, validator from pydantic import BaseModel, Extra, Field, parse_obj_as, validator
from pydantic.fields import PrivateAttr from pydantic.fields import PrivateAttr
from frigate.const import CACHE_DIR, DEFAULT_DB_PATH, REGEX_CAMERA_NAME, YAML_EXT from frigate.const import (
AUDIO_MIN_CONFIDENCE,
CACHE_DIR,
DEFAULT_DB_PATH,
REGEX_CAMERA_NAME,
YAML_EXT,
)
from frigate.detectors import DetectorConfig, ModelConfig from frigate.detectors import DetectorConfig, ModelConfig
from frigate.detectors.detector_config import BaseDetectorConfig from frigate.detectors.detector_config import BaseDetectorConfig
from frigate.ffmpeg_presets import ( from frigate.ffmpeg_presets import (
@ -334,6 +340,15 @@ class FilterConfig(FrigateBaseModel):
) )
class AudioFilterConfig(FrigateBaseModel):
threshold: float = Field(
default=0.8,
ge=AUDIO_MIN_CONFIDENCE,
lt=1.0,
title="Minimum detection confidence threshold for audio to be counted.",
)
class RuntimeFilterConfig(FilterConfig): class RuntimeFilterConfig(FilterConfig):
mask: Optional[np.ndarray] mask: Optional[np.ndarray]
raw_mask: Optional[Union[str, List[str]]] raw_mask: Optional[Union[str, List[str]]]
@ -424,6 +439,7 @@ class AudioConfig(FrigateBaseModel):
listen: List[str] = Field( listen: List[str] = Field(
default=DEFAULT_LISTEN_AUDIO, title="Audio to listen for." default=DEFAULT_LISTEN_AUDIO, title="Audio to listen for."
) )
filters: Optional[Dict[str, AudioFilterConfig]] = Field(title="Audio filters.")
enabled_in_config: Optional[bool] = Field( enabled_in_config: Optional[bool] = Field(
title="Keep track of original state of audio detection." title="Keep track of original state of audio detection."
) )

View File

@ -28,6 +28,7 @@ AUDIO_DURATION = 0.975
AUDIO_FORMAT = "s16le" AUDIO_FORMAT = "s16le"
AUDIO_MAX_BIT_RANGE = 32768.0 AUDIO_MAX_BIT_RANGE = 32768.0
AUDIO_SAMPLE_RATE = 16000 AUDIO_SAMPLE_RATE = 16000
AUDIO_MIN_CONFIDENCE = 0.5
# Regex Consts # Regex Consts

View File

@ -19,6 +19,7 @@ from frigate.const import (
AUDIO_DURATION, AUDIO_DURATION,
AUDIO_FORMAT, AUDIO_FORMAT,
AUDIO_MAX_BIT_RANGE, AUDIO_MAX_BIT_RANGE,
AUDIO_MIN_CONFIDENCE,
AUDIO_SAMPLE_RATE, AUDIO_SAMPLE_RATE,
CACHE_DIR, CACHE_DIR,
FRIGATE_LOCALHOST, FRIGATE_LOCALHOST,
@ -130,7 +131,7 @@ class AudioTfl:
return detections return detections
def detect(self, tensor_input, threshold=0.8): def detect(self, tensor_input, threshold=AUDIO_MIN_CONFIDENCE):
detections = [] detections = []
if self.stop_event.is_set(): if self.stop_event.is_set():
@ -200,6 +201,9 @@ class AudioEventMaintainer(threading.Thread):
if label not in self.config.audio.listen: if label not in self.config.audio.listen:
continue continue
if score > (self.config.audio.filters or {}).get(label, {}).get(
"threshold", 0.8
):
self.handle_detection(label, score) self.handle_detection(label, score)
self.expire_detections() self.expire_detections()
@ -233,7 +237,7 @@ class AudioEventMaintainer(threading.Thread):
resp = requests.post( resp = requests.post(
f"{FRIGATE_LOCALHOST}/api/events/{self.config.name}/{label}/create", f"{FRIGATE_LOCALHOST}/api/events/{self.config.name}/{label}/create",
json={"duration": None, "source_type": "audio"}, json={"duration": None, "score": score, "source_type": "audio"},
) )
if resp.status_code == 200: if resp.status_code == 200:

View File

@ -31,6 +31,7 @@ class ExternalEventProcessor:
label: str, label: str,
source_type: str, source_type: str,
sub_label: Optional[str], sub_label: Optional[str],
score: int,
duration: Optional[int], duration: Optional[int],
include_recording: bool, include_recording: bool,
draw: dict[str, any], draw: dict[str, any],
@ -56,6 +57,7 @@ class ExternalEventProcessor:
"id": event_id, "id": event_id,
"label": label, "label": label,
"sub_label": sub_label, "sub_label": sub_label,
"score": score,
"camera": camera, "camera": camera,
"start_time": now - camera_config.record.events.pre_capture, "start_time": now - camera_config.record.events.pre_capture,
"end_time": now "end_time": now

View File

@ -230,7 +230,11 @@ class EventProcessor(threading.Thread):
Event.has_clip: event_data["has_clip"], Event.has_clip: event_data["has_clip"],
Event.has_snapshot: event_data["has_snapshot"], Event.has_snapshot: event_data["has_snapshot"],
Event.zones: [], Event.zones: [],
Event.data: {"type": event_data["type"]}, Event.data: {
"type": event_data["type"],
"score": event_data["score"],
"top_score": event_data["score"],
},
} }
Event.insert(event).execute() Event.insert(event).execute()
elif event_type == "end": elif event_type == "end":

View File

@ -899,6 +899,7 @@ def create_event(camera_name, label):
label, label,
json.get("source_type", "api"), json.get("source_type", "api"),
json.get("sub_label", None), json.get("sub_label", None),
json.get("score", 0),
json.get("duration", 30), json.get("duration", 30),
json.get("include_recording", True), json.get("include_recording", True),
json.get("draw", {}), json.get("draw", {}),