From e3d4b84803fce7e9f345592c797a8cdc62022dbb Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Wed, 26 Mar 2025 07:23:01 -0600 Subject: [PATCH] Face recognition improvements (#17387) * Increase frequency of updates when internal face detection is used * Adjust number of required faces based on detection type * Adjust min_score config to unknown_score * Only for person * Improve typing * Update face rec docs * Cleanup ui colors * Cleanup --- docs/docs/configuration/face_recognition.md | 19 +++--- docs/docs/configuration/reference.md | 4 +- frigate/camera/state.py | 20 ++++-- frigate/config/classification.py | 4 +- frigate/data_processing/common/face/model.py | 9 +-- frigate/data_processing/real_time/face.py | 66 +++++++++++--------- web/src/pages/FaceLibrary.tsx | 25 ++++++-- web/src/types/frigateConfig.ts | 15 +++-- 8 files changed, 97 insertions(+), 65 deletions(-) diff --git a/docs/docs/configuration/face_recognition.md b/docs/docs/configuration/face_recognition.md index a3cb4e308..af6fd1eff 100644 --- a/docs/docs/configuration/face_recognition.md +++ b/docs/docs/configuration/face_recognition.md @@ -22,16 +22,16 @@ Frigate needs to first detect a `face` before it can recognize a face. ### Face Recognition Frigate has support for two face recognition model types: -- **small**: Frigate will use CV2 Local Binary Pattern Face Recognizer to recognize faces, which runs locally on the CPU. -- **large**: Frigate will run a face embedding model, this is only recommended to be run when an integrated or dedicated GPU is available. -In both cases a lightweight face landmark detection model is also used to align faces before running them through the face recognizer. +- **small**: Frigate will use CV2 Local Binary Pattern Face Recognizer to recognize faces, which runs locally on the CPU. This model is optimized for efficiency and is not as accurate. +- **large**: Frigate will run a face embedding model, this model is optimized for accuracy. It is only recommended to be run when an integrated or dedicated GPU is available. + +In both cases a lightweight face landmark detection model is also used to align faces before running the recognition model. ## Minimum System Requirements -Face recognition is lightweight and runs on the CPU, there are no significantly different system requirements than running Frigate itself when using the `small` model. - -When using the `large` model an integrated or discrete GPU is recommended. +The `small` model is optimized for efficiency and runs on the CPU, there are no significantly different system requirements. +The `large` model is optimized for accuracy and an integrated or discrete GPU is highly recommended. ## Configuration @@ -58,6 +58,8 @@ Fine-tune face recognition with these optional parameters: ### Recognition - `model_size`: Which model size to use, options are `small` or `large` +- `unknown_score`: Min score to mark a person as a potential match, matches below this will be marked as unknown. + - Default: `0.8`. - `recognition_threshold`: Recognition confidence score required to add the face to the object as a sub label. - Default: `0.9`. - `blur_confidence_filter`: Enables a filter that calculates how blurry the face is and adjusts the confidence based on this. @@ -108,13 +110,14 @@ Once straight-on images are performing well, start choosing slightly off-angle i ## FAQ -### Why is every face tagged as a known face and not unknown? +### Why can't I bulk upload photos? -Any recognized face with a score >= `min_score` will show in the `Train` tab along with the recognition score. A low scoring face is effectively the same as `unknown`, but includes more information. This does not mean the recognition is not working well, and is part of the importance of choosing the correct `recognition_threshold`. +It is important to methodically add photos to the library, bulk importing photos (especially from a general photo library) will lead to overfitting in that particular scenario and hurt recognition performance. ### Why do unknown people score similarly to known people? This can happen for a few different reasons, but this is usually an indicator that the training set needs to be improved. This is often related to overfitting: + - If you train with only a few images per person, especially if those images are very similar, the recognition model becomes overly specialized to those specific images. - When you provide images with different poses, lighting, and expressions, the algorithm extracts features that are consistent across those variations. - By training on a diverse set of images, the algorithm becomes less sensitive to minor variations and noise in the input image. diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md index 8e24db0b7..e30ee1619 100644 --- a/docs/docs/configuration/reference.md +++ b/docs/docs/configuration/reference.md @@ -547,8 +547,8 @@ semantic_search: face_recognition: # Optional: Enable semantic search (default: shown below) enabled: False - # Optional: Minimum face distance score required to save the attempt (default: shown below) - min_score: 0.8 + # Optional: Minimum face distance score required to mark as a potential match (default: shown below) + unknown_score: 0.8 # Optional: Minimum face detection score required to detect a face (default: shown below) # NOTE: This only applies when not running a Frigate+ model detection_threshold: 0.7 diff --git a/frigate/camera/state.py b/frigate/camera/state.py index 98f808bf6..65a3dcf5d 100644 --- a/frigate/camera/state.py +++ b/frigate/camera/state.py @@ -5,7 +5,7 @@ import logging import os import threading from collections import defaultdict -from typing import Callable +from typing import Any, Callable import cv2 import numpy as np @@ -53,8 +53,19 @@ class CameraState: self.callbacks = defaultdict(list) self.ptz_autotracker_thread = ptz_autotracker_thread self.prev_enabled = self.camera_config.enabled + self.requires_face_detection = ( + self.config.face_recognition.enabled + and "face" not in self.config.objects.all_objects + ) - def get_current_frame(self, draw_options={}): + def get_max_update_frequency(self, obj: TrackedObject) -> int: + return ( + 1 + if self.requires_face_detection and obj.obj_data["label"] == "person" + else 5 + ) + + def get_current_frame(self, draw_options: dict[str, Any] = {}): with self.current_frame_lock: frame_copy = np.copy(self._current_frame) frame_time = self.current_frame_time @@ -283,11 +294,12 @@ class CameraState: updated_obj.last_updated = frame_time - # if it has been more than 5 seconds since the last thumb update + # if it has been more than max_update_frequency seconds since the last thumb update # and the last update is greater than the last publish or # the object has changed significantly if ( - frame_time - updated_obj.last_published > 5 + frame_time - updated_obj.last_published + > self.get_max_update_frequency(updated_obj) and updated_obj.last_updated > updated_obj.last_published ) or significant_update: # call event handlers diff --git a/frigate/config/classification.py b/frigate/config/classification.py index 25c379546..aecbf6537 100644 --- a/frigate/config/classification.py +++ b/frigate/config/classification.py @@ -54,8 +54,8 @@ class FaceRecognitionConfig(FrigateBaseModel): model_size: str = Field( default="small", title="The size of the embeddings model used." ) - min_score: float = Field( - title="Minimum face distance score required to save the attempt.", + unknown_score: float = Field( + title="Minimum face distance score required to be marked as a potential match.", default=0.8, gt=0.0, le=1.0, diff --git a/frigate/data_processing/common/face/model.py b/frigate/data_processing/common/face/model.py index a006734ce..5e15a2441 100644 --- a/frigate/data_processing/common/face/model.py +++ b/frigate/data_processing/common/face/model.py @@ -164,9 +164,7 @@ class LBPHRecognizer(FaceRecognizer): return self.recognizer: cv2.face.LBPHFaceRecognizer = ( - cv2.face.LBPHFaceRecognizer_create( - radius=2, threshold=(1 - self.config.face_recognition.min_score) * 1000 - ) + cv2.face.LBPHFaceRecognizer_create(radius=2, threshold=400) ) self.recognizer.train(faces, np.array(labels)) @@ -243,6 +241,8 @@ class ArcFaceRecognizer(FaceRecognizer): for name, embs in face_embeddings_map.items(): self.mean_embs[name] = stats.trim_mean(embs, 0.15) + logger.debug("Finished building ArcFace model") + def similarity_to_confidence( self, cosine_similarity: float, median=0.3, range_width=0.6, slope_factor=12 ): @@ -302,7 +302,4 @@ class ArcFaceRecognizer(FaceRecognizer): score = confidence label = name - if score < self.config.face_recognition.min_score: - return None - return label, round(score * blur_factor, 2) diff --git a/frigate/data_processing/real_time/face.py b/frigate/data_processing/real_time/face.py index e20dad633..9b479a527 100644 --- a/frigate/data_processing/real_time/face.py +++ b/frigate/data_processing/real_time/face.py @@ -36,36 +36,6 @@ MAX_DETECTION_HEIGHT = 1080 MIN_MATCHING_FACES = 2 -def weighted_average_by_area(results_list: list[tuple[str, float, int]]): - if len(results_list) < 3: - return "unknown", 0.0 - - score_count = {} - weighted_scores = {} - total_face_areas = {} - - for name, score, face_area in results_list: - if name not in weighted_scores: - score_count[name] = 1 - weighted_scores[name] = 0.0 - total_face_areas[name] = 0.0 - else: - score_count[name] += 1 - - weighted_scores[name] += score * face_area - total_face_areas[name] += face_area - - prominent_name = max(score_count) - - # if a single name is not prominent in the history then we are not confident - if score_count[prominent_name] / len(results_list) < 0.65: - return "unknown", 0.0 - - return prominent_name, weighted_scores[prominent_name] / total_face_areas[ - prominent_name - ] - - class FaceRealTimeProcessor(RealTimeProcessorApi): def __init__( self, @@ -271,6 +241,9 @@ class FaceRealTimeProcessor(RealTimeProcessorApi): sub_label, score = res + if score < self.face_config.unknown_score: + sub_label = "unknown" + logger.debug( f"Detected best face for person as: {sub_label} with probability {score}" ) @@ -288,7 +261,7 @@ class FaceRealTimeProcessor(RealTimeProcessorApi): self.person_face_history[id].append( (sub_label, score, face_frame.shape[0] * face_frame.shape[1]) ) - (weighted_sub_label, weighted_score) = weighted_average_by_area( + (weighted_sub_label, weighted_score) = self.weighted_average_by_area( self.person_face_history[id] ) @@ -415,3 +388,34 @@ class FaceRealTimeProcessor(RealTimeProcessorApi): def expire_object(self, object_id: str): if object_id in self.person_face_history: self.person_face_history.pop(object_id) + + def weighted_average_by_area(self, results_list: list[tuple[str, float, int]]): + min_faces = 1 if self.requires_face_detection else 3 + + if len(results_list) < min_faces: + return "unknown", 0.0 + + score_count = {} + weighted_scores = {} + total_face_areas = {} + + for name, score, face_area in results_list: + if name not in weighted_scores: + score_count[name] = 1 + weighted_scores[name] = 0.0 + total_face_areas[name] = 0.0 + else: + score_count[name] += 1 + + weighted_scores[name] += score * face_area + total_face_areas[name] += face_area + + prominent_name = max(score_count) + + # if a single name is not prominent in the history then we are not confident + if score_count[prominent_name] / len(results_list) < 0.65: + return "unknown", 0.0 + + return prominent_name, weighted_scores[prominent_name] / total_face_areas[ + prominent_name + ] diff --git a/web/src/pages/FaceLibrary.tsx b/web/src/pages/FaceLibrary.tsx index 0ac937283..696691997 100644 --- a/web/src/pages/FaceLibrary.tsx +++ b/web/src/pages/FaceLibrary.tsx @@ -33,7 +33,7 @@ import useKeyboardListener from "@/hooks/use-keyboard-listener"; import useOptimisticState from "@/hooks/use-optimistic-state"; import { cn } from "@/lib/utils"; import { FaceLibraryData, RecognizedFaceData } from "@/types/face"; -import { FrigateConfig } from "@/types/frigateConfig"; +import { FaceRecognitionConfig, FrigateConfig } from "@/types/frigateConfig"; import axios from "axios"; import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { isDesktop, isMobile } from "react-device-detect"; @@ -451,7 +451,7 @@ function TrainingGrid({ key={image} image={image} faceNames={faceNames} - threshold={config.face_recognition.recognition_threshold} + recognitionConfig={config.face_recognition} selected={selectedFaces.includes(image)} onClick={(data, meta) => { if (meta) { @@ -471,7 +471,7 @@ function TrainingGrid({ type FaceAttemptProps = { image: string; faceNames: string[]; - threshold: number; + recognitionConfig: FaceRecognitionConfig; selected: boolean; onClick: (data: RecognizedFaceData, meta: boolean) => void; onRefresh: () => void; @@ -479,7 +479,7 @@ type FaceAttemptProps = { function FaceAttempt({ image, faceNames, - threshold, + recognitionConfig, selected, onClick, onRefresh, @@ -496,6 +496,16 @@ function FaceAttempt({ }; }, [image]); + const scoreStatus = useMemo(() => { + if (data.score >= recognitionConfig.recognition_threshold) { + return "match"; + } else if (data.score >= recognitionConfig.unknown_score) { + return "potential"; + } else { + return "unknown"; + } + }, [data, recognitionConfig]); + // interaction const imgRef = useRef(null); @@ -579,10 +589,13 @@ function FaceAttempt({
{data.name}
= threshold ? "text-success" : "text-danger", + "", + scoreStatus == "match" && "text-success", + scoreStatus == "potential" && "text-orange-400", + scoreStatus == "unknown" && "text-danger", )} > - {data.score * 100}% + {Math.round(data.score * 100)}%
diff --git a/web/src/types/frigateConfig.ts b/web/src/types/frigateConfig.ts index 5312bed8c..d66d5edcb 100644 --- a/web/src/types/frigateConfig.ts +++ b/web/src/types/frigateConfig.ts @@ -20,6 +20,14 @@ export interface BirdseyeConfig { width: number; } +export interface FaceRecognitionConfig { + enabled: boolean; + model_size: SearchModelSize; + unknown_score: number; + detection_threshold: number; + recognition_threshold: number; +} + export type SearchModel = "jinav1" | "jinav2"; export type SearchModelSize = "small" | "large"; @@ -331,12 +339,7 @@ export interface FrigateConfig { environment_vars: Record; - face_recognition: { - enabled: boolean; - model_size: SearchModelSize; - detection_threshold: number; - recognition_threshold: number; - }; + face_recognition: FaceRecognitionConfig; ffmpeg: { global_args: string[];