mirror of
https://github.com/blakeblackshear/frigate.git
synced 2024-11-21 19:07:46 +01:00
Save audio scores and add audio filter config (#7185)
* Send and save score for external events * Add audio filters config * Fix access * Add docs * Cleanup
This commit is contained in:
parent
dacf45cd88
commit
c5b8d13beb
@ -154,6 +154,12 @@ audio:
|
|||||||
- scream
|
- scream
|
||||||
- speech
|
- speech
|
||||||
- yell
|
- yell
|
||||||
|
# Optional: Filters to configure detection.
|
||||||
|
filters:
|
||||||
|
# Label that matches label in listen config.
|
||||||
|
speech:
|
||||||
|
# Minimum score that triggers an audio event (default: shown below)
|
||||||
|
threshold: 0.8
|
||||||
|
|
||||||
# Optional: logger verbosity settings
|
# Optional: logger verbosity settings
|
||||||
logger:
|
logger:
|
||||||
|
@ -11,7 +11,13 @@ import numpy as np
|
|||||||
from pydantic import BaseModel, Extra, Field, parse_obj_as, validator
|
from pydantic import BaseModel, Extra, Field, parse_obj_as, validator
|
||||||
from pydantic.fields import PrivateAttr
|
from pydantic.fields import PrivateAttr
|
||||||
|
|
||||||
from frigate.const import CACHE_DIR, DEFAULT_DB_PATH, REGEX_CAMERA_NAME, YAML_EXT
|
from frigate.const import (
|
||||||
|
AUDIO_MIN_CONFIDENCE,
|
||||||
|
CACHE_DIR,
|
||||||
|
DEFAULT_DB_PATH,
|
||||||
|
REGEX_CAMERA_NAME,
|
||||||
|
YAML_EXT,
|
||||||
|
)
|
||||||
from frigate.detectors import DetectorConfig, ModelConfig
|
from frigate.detectors import DetectorConfig, ModelConfig
|
||||||
from frigate.detectors.detector_config import BaseDetectorConfig
|
from frigate.detectors.detector_config import BaseDetectorConfig
|
||||||
from frigate.ffmpeg_presets import (
|
from frigate.ffmpeg_presets import (
|
||||||
@ -334,6 +340,15 @@ class FilterConfig(FrigateBaseModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AudioFilterConfig(FrigateBaseModel):
|
||||||
|
threshold: float = Field(
|
||||||
|
default=0.8,
|
||||||
|
ge=AUDIO_MIN_CONFIDENCE,
|
||||||
|
lt=1.0,
|
||||||
|
title="Minimum detection confidence threshold for audio to be counted.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RuntimeFilterConfig(FilterConfig):
|
class RuntimeFilterConfig(FilterConfig):
|
||||||
mask: Optional[np.ndarray]
|
mask: Optional[np.ndarray]
|
||||||
raw_mask: Optional[Union[str, List[str]]]
|
raw_mask: Optional[Union[str, List[str]]]
|
||||||
@ -424,6 +439,7 @@ class AudioConfig(FrigateBaseModel):
|
|||||||
listen: List[str] = Field(
|
listen: List[str] = Field(
|
||||||
default=DEFAULT_LISTEN_AUDIO, title="Audio to listen for."
|
default=DEFAULT_LISTEN_AUDIO, title="Audio to listen for."
|
||||||
)
|
)
|
||||||
|
filters: Optional[Dict[str, AudioFilterConfig]] = Field(title="Audio filters.")
|
||||||
enabled_in_config: Optional[bool] = Field(
|
enabled_in_config: Optional[bool] = Field(
|
||||||
title="Keep track of original state of audio detection."
|
title="Keep track of original state of audio detection."
|
||||||
)
|
)
|
||||||
|
@ -28,6 +28,7 @@ AUDIO_DURATION = 0.975
|
|||||||
AUDIO_FORMAT = "s16le"
|
AUDIO_FORMAT = "s16le"
|
||||||
AUDIO_MAX_BIT_RANGE = 32768.0
|
AUDIO_MAX_BIT_RANGE = 32768.0
|
||||||
AUDIO_SAMPLE_RATE = 16000
|
AUDIO_SAMPLE_RATE = 16000
|
||||||
|
AUDIO_MIN_CONFIDENCE = 0.5
|
||||||
|
|
||||||
# Regex Consts
|
# Regex Consts
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ from frigate.const import (
|
|||||||
AUDIO_DURATION,
|
AUDIO_DURATION,
|
||||||
AUDIO_FORMAT,
|
AUDIO_FORMAT,
|
||||||
AUDIO_MAX_BIT_RANGE,
|
AUDIO_MAX_BIT_RANGE,
|
||||||
|
AUDIO_MIN_CONFIDENCE,
|
||||||
AUDIO_SAMPLE_RATE,
|
AUDIO_SAMPLE_RATE,
|
||||||
CACHE_DIR,
|
CACHE_DIR,
|
||||||
FRIGATE_LOCALHOST,
|
FRIGATE_LOCALHOST,
|
||||||
@ -130,7 +131,7 @@ class AudioTfl:
|
|||||||
|
|
||||||
return detections
|
return detections
|
||||||
|
|
||||||
def detect(self, tensor_input, threshold=0.8):
|
def detect(self, tensor_input, threshold=AUDIO_MIN_CONFIDENCE):
|
||||||
detections = []
|
detections = []
|
||||||
|
|
||||||
if self.stop_event.is_set():
|
if self.stop_event.is_set():
|
||||||
@ -200,6 +201,9 @@ class AudioEventMaintainer(threading.Thread):
|
|||||||
if label not in self.config.audio.listen:
|
if label not in self.config.audio.listen:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if score > (self.config.audio.filters or {}).get(label, {}).get(
|
||||||
|
"threshold", 0.8
|
||||||
|
):
|
||||||
self.handle_detection(label, score)
|
self.handle_detection(label, score)
|
||||||
|
|
||||||
self.expire_detections()
|
self.expire_detections()
|
||||||
@ -233,7 +237,7 @@ class AudioEventMaintainer(threading.Thread):
|
|||||||
|
|
||||||
resp = requests.post(
|
resp = requests.post(
|
||||||
f"{FRIGATE_LOCALHOST}/api/events/{self.config.name}/{label}/create",
|
f"{FRIGATE_LOCALHOST}/api/events/{self.config.name}/{label}/create",
|
||||||
json={"duration": None, "source_type": "audio"},
|
json={"duration": None, "score": score, "source_type": "audio"},
|
||||||
)
|
)
|
||||||
|
|
||||||
if resp.status_code == 200:
|
if resp.status_code == 200:
|
||||||
|
@ -31,6 +31,7 @@ class ExternalEventProcessor:
|
|||||||
label: str,
|
label: str,
|
||||||
source_type: str,
|
source_type: str,
|
||||||
sub_label: Optional[str],
|
sub_label: Optional[str],
|
||||||
|
score: int,
|
||||||
duration: Optional[int],
|
duration: Optional[int],
|
||||||
include_recording: bool,
|
include_recording: bool,
|
||||||
draw: dict[str, any],
|
draw: dict[str, any],
|
||||||
@ -56,6 +57,7 @@ class ExternalEventProcessor:
|
|||||||
"id": event_id,
|
"id": event_id,
|
||||||
"label": label,
|
"label": label,
|
||||||
"sub_label": sub_label,
|
"sub_label": sub_label,
|
||||||
|
"score": score,
|
||||||
"camera": camera,
|
"camera": camera,
|
||||||
"start_time": now - camera_config.record.events.pre_capture,
|
"start_time": now - camera_config.record.events.pre_capture,
|
||||||
"end_time": now
|
"end_time": now
|
||||||
|
@ -230,7 +230,11 @@ class EventProcessor(threading.Thread):
|
|||||||
Event.has_clip: event_data["has_clip"],
|
Event.has_clip: event_data["has_clip"],
|
||||||
Event.has_snapshot: event_data["has_snapshot"],
|
Event.has_snapshot: event_data["has_snapshot"],
|
||||||
Event.zones: [],
|
Event.zones: [],
|
||||||
Event.data: {"type": event_data["type"]},
|
Event.data: {
|
||||||
|
"type": event_data["type"],
|
||||||
|
"score": event_data["score"],
|
||||||
|
"top_score": event_data["score"],
|
||||||
|
},
|
||||||
}
|
}
|
||||||
Event.insert(event).execute()
|
Event.insert(event).execute()
|
||||||
elif event_type == "end":
|
elif event_type == "end":
|
||||||
|
@ -899,6 +899,7 @@ def create_event(camera_name, label):
|
|||||||
label,
|
label,
|
||||||
json.get("source_type", "api"),
|
json.get("source_type", "api"),
|
||||||
json.get("sub_label", None),
|
json.get("sub_label", None),
|
||||||
|
json.get("score", 0),
|
||||||
json.get("duration", 30),
|
json.get("duration", 30),
|
||||||
json.get("include_recording", True),
|
json.get("include_recording", True),
|
||||||
json.get("draw", {}),
|
json.get("draw", {}),
|
||||||
|
Loading…
Reference in New Issue
Block a user