Improve face recognition (#14537)

* Increase requirements for face to be set

* Manage faces properly

* Add basic docs

* Simplify

* Separate out face recognition frome semantic search

* Update docs

* Formatting
This commit is contained in:
Nicolas Mowen 2024-10-23 09:03:18 -06:00
parent 13e90fc6e0
commit 5be98a2457
7 changed files with 96 additions and 34 deletions

View File

@ -0,0 +1,21 @@
---
id: face_recognition
title: Face Recognition
---
Face recognition allows people to be assigned names and when their face is recognized Frigate will assign the person's name as a sub label. This information is included in the UI, filters, as well as in notifications.
Frigate has support for FaceNet to create face embeddings, which runs locally. Embeddings are then saved to Frigate's database.
## Minimum System Requirements
Face recognition works by running a large AI model locally on your system. Systems without a GPU will not run Face Recognition reliably or at all.
## Configuration
Face recognition is disabled by default and requires semantic search to be enabled, face recognition must be enabled in your config file before it can be used. Semantic Search and face recognition are global configuration settings.
```yaml
face_recognition:
enabled: true
```

View File

@ -522,6 +522,14 @@ semantic_search:
# NOTE: small model runs on CPU and large model runs on GPU
model_size: "small"
# Optional: Configuration for face recognition capability
face_recognition:
# Optional: Enable semantic search (default: shown below)
enabled: False
# Optional: Set the model size used for embeddings. (default: shown below)
# NOTE: small model runs on CPU and large model runs on GPU
model_size: "small"
# Optional: Configuration for AI generated tracked object descriptions
# NOTE: Semantic Search must be enabled for this to do anything.
# WARNING: Depending on the provider, this will send thumbnails over the internet

View File

@ -36,6 +36,7 @@ const sidebars: SidebarsConfig = {
'Semantic Search': [
'configuration/semantic_search',
'configuration/genai',
'configuration/face_recognition',
],
Cameras: [
'configuration/cameras',

View File

@ -56,7 +56,7 @@ from .logger import LoggerConfig
from .mqtt import MqttConfig
from .notification import NotificationConfig
from .proxy import ProxyConfig
from .semantic_search import SemanticSearchConfig
from .semantic_search import FaceRecognitionConfig, SemanticSearchConfig
from .telemetry import TelemetryConfig
from .tls import TlsConfig
from .ui import UIConfig
@ -159,6 +159,16 @@ class RestreamConfig(BaseModel):
model_config = ConfigDict(extra="allow")
def verify_semantic_search_dependent_configs(config: FrigateConfig) -> None:
"""Verify that semantic search is enabled if required features are enabled."""
if not config.semantic_search.enabled:
if config.genai.enabled:
raise ValueError("Genai requires semantic search to be enabled.")
if config.face_recognition.enabled:
raise ValueError("Face recognition requires semantic to be enabled.")
def verify_config_roles(camera_config: CameraConfig) -> None:
"""Verify that roles are setup in the config correctly."""
assigned_roles = list(
@ -316,6 +326,9 @@ class FrigateConfig(FrigateBaseModel):
semantic_search: SemanticSearchConfig = Field(
default_factory=SemanticSearchConfig, title="Semantic search configuration."
)
face_recognition: FaceRecognitionConfig = Field(
default_factory=FaceRecognitionConfig, title="Face recognition config."
)
ui: UIConfig = Field(default_factory=UIConfig, title="UI configuration.")
# Detector config
@ -621,6 +634,7 @@ class FrigateConfig(FrigateBaseModel):
detector_config.model.compute_model_hash()
self.detectors[key] = detector_config
verify_semantic_search_dependent_configs(self)
return self
@field_validator("cameras")

View File

@ -7,6 +7,16 @@ from .base import FrigateBaseModel
__all__ = ["FaceRecognitionConfig", "SemanticSearchConfig"]
class SemanticSearchConfig(FrigateBaseModel):
enabled: bool = Field(default=False, title="Enable semantic search.")
reindex: Optional[bool] = Field(
default=False, title="Reindex all detections on startup."
)
model_size: str = Field(
default="small", title="The size of the embeddings model used."
)
class FaceRecognitionConfig(FrigateBaseModel):
enabled: bool = Field(default=False, title="Enable face recognition.")
threshold: float = Field(
@ -15,16 +25,3 @@ class FaceRecognitionConfig(FrigateBaseModel):
min_area: int = Field(
default=500, title="Min area of face box to consider running face recognition."
)
class SemanticSearchConfig(FrigateBaseModel):
enabled: bool = Field(default=False, title="Enable semantic search.")
reindex: Optional[bool] = Field(
default=False, title="Reindex all detections on startup."
)
face_recognition: FaceRecognitionConfig = Field(
default_factory=FaceRecognitionConfig, title="Face recognition config."
)
model_size: str = Field(
default="small", title="The size of the embeddings model used."
)

View File

@ -11,7 +11,7 @@ from numpy import ndarray
from playhouse.shortcuts import model_to_dict
from frigate.comms.inter_process import InterProcessRequestor
from frigate.config.semantic_search import SemanticSearchConfig
from frigate.config import FrigateConfig
from frigate.const import (
CONFIG_DIR,
FACE_DIR,
@ -62,9 +62,7 @@ def get_metadata(event: Event) -> dict:
class Embeddings:
"""SQLite-vec embeddings database."""
def __init__(
self, config: SemanticSearchConfig, db: SqliteVecQueueDatabase
) -> None:
def __init__(self, config: FrigateConfig, db: SqliteVecQueueDatabase) -> None:
self.config = config
self.db = db
self.requestor = InterProcessRequestor()
@ -76,7 +74,7 @@ class Embeddings:
"jinaai/jina-clip-v1-text_model_fp16.onnx",
"jinaai/jina-clip-v1-tokenizer",
"jinaai/jina-clip-v1-vision_model_fp16.onnx"
if config.model_size == "large"
if config.semantic_search.model_size == "large"
else "jinaai/jina-clip-v1-vision_model_quantized.onnx",
"jinaai/jina-clip-v1-preprocessor_config.json",
]
@ -97,7 +95,7 @@ class Embeddings:
download_urls={
"text_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/text_model_fp16.onnx",
},
model_size=config.model_size,
model_size=config.semantic_search.model_size,
model_type=ModelTypeEnum.text,
requestor=self.requestor,
device="CPU",
@ -105,7 +103,7 @@ class Embeddings:
model_file = (
"vision_model_fp16.onnx"
if self.config.model_size == "large"
if self.config.semantic_search.model_size == "large"
else "vision_model_quantized.onnx"
)

View File

@ -33,6 +33,7 @@ from .embeddings import Embeddings
logger = logging.getLogger(__name__)
REQUIRED_FACES = 2
MAX_THUMBNAILS = 10
@ -47,7 +48,7 @@ class EmbeddingMaintainer(threading.Thread):
) -> None:
super().__init__(name="embeddings_maintainer")
self.config = config
self.embeddings = Embeddings(config.semantic_search, db)
self.embeddings = Embeddings(config, db)
# Check if we need to re-index events
if config.semantic_search.reindex:
@ -62,10 +63,9 @@ class EmbeddingMaintainer(threading.Thread):
self.frame_manager = SharedMemoryFrameManager()
# set face recognition conditions
self.face_recognition_enabled = (
self.config.semantic_search.face_recognition.enabled
)
self.face_recognition_enabled = self.config.face_recognition.enabled
self.requires_face_detection = "face" not in self.config.model.all_attributes
self.detected_faces: dict[str, float] = {}
# create communication for updating event descriptions
self.requestor = InterProcessRequestor()
@ -183,6 +183,9 @@ class EmbeddingMaintainer(threading.Thread):
event_id, camera, updated_db = ended
camera_config = self.config.cameras[camera]
if event_id in self.detected_faces:
self.detected_faces.pop(event_id)
if updated_db:
try:
event: Event = Event.get(Event.id == event_id)
@ -276,25 +279,28 @@ class EmbeddingMaintainer(threading.Thread):
def _search_face(self, query_embedding: bytes) -> list:
"""Search for the face most closely matching the embedding."""
sql_query = """
sql_query = f"""
SELECT
id,
distance
FROM vec_faces
WHERE face_embedding MATCH ?
AND k = 10 ORDER BY distance
AND k = {REQUIRED_FACES} ORDER BY distance
"""
return self.embeddings.db.execute_sql(sql_query, [query_embedding]).fetchall()
def _process_face(self, obj_data: dict[str, any], frame: np.ndarray) -> None:
"""Look for faces in image."""
id = obj_data["id"]
# don't run for non person objects
if obj_data.get("label") != "person":
logger.debug("Not a processing face for non person object.")
return
# don't overwrite sub label for objects that have one
if obj_data.get("sub_label"):
# don't overwrite sub label for objects that have a sub label
# that is not a face
if obj_data.get("sub_label") and id not in self.detected_faces:
logger.debug(
f"Not processing face due to existing sub label: {obj_data.get('sub_label')}."
)
@ -348,18 +354,35 @@ class EmbeddingMaintainer(threading.Thread):
best_faces = self._search_face(query_embedding)
logger.debug(f"Detected best faces for person as: {best_faces}")
if not best_faces:
if not best_faces or len(best_faces) < REQUIRED_FACES:
return
sub_label = str(best_faces[0][0]).split("-")[0]
score = 1.0 - best_faces[0][1]
avg_score = 0
if score < self.config.semantic_search.face_recognition.threshold:
for face in best_faces:
score = 1.0 - face[1]
if face[0] != sub_label:
logger.debug("Detected multiple faces, result is not valid.")
return None
avg_score += score
avg_score = avg_score / REQUIRED_FACES
if avg_score < self.config.semantic_search.face_recognition.threshold or (
id in self.detected_faces and avg_score <= self.detected_faces[id]
):
logger.debug(
"Detected face does not score higher than threshold / previous face."
)
return None
self.detected_faces[id] = avg_score
requests.post(
f"{FRIGATE_LOCALHOST}/api/events/{obj_data['id']}/sub_label",
json={"subLabel": sub_label, "subLabelScore": score},
f"{FRIGATE_LOCALHOST}/api/events/{id}/sub_label",
json={"subLabel": sub_label, "subLabelScore": avg_score},
)
def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]: