mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-07-26 13:47:03 +02:00
Face recognition improvements (#17387)
* Increase frequency of updates when internal face detection is used * Adjust number of required faces based on detection type * Adjust min_score config to unknown_score * Only for person * Improve typing * Update face rec docs * Cleanup ui colors * Cleanup
This commit is contained in:
parent
395fc33ccc
commit
e3d4b84803
@ -22,16 +22,16 @@ Frigate needs to first detect a `face` before it can recognize a face.
|
||||
### Face Recognition
|
||||
|
||||
Frigate has support for two face recognition model types:
|
||||
- **small**: Frigate will use CV2 Local Binary Pattern Face Recognizer to recognize faces, which runs locally on the CPU.
|
||||
- **large**: Frigate will run a face embedding model, this is only recommended to be run when an integrated or dedicated GPU is available.
|
||||
|
||||
In both cases a lightweight face landmark detection model is also used to align faces before running them through the face recognizer.
|
||||
- **small**: Frigate will use CV2 Local Binary Pattern Face Recognizer to recognize faces, which runs locally on the CPU. This model is optimized for efficiency and is not as accurate.
|
||||
- **large**: Frigate will run a face embedding model, this model is optimized for accuracy. It is only recommended to be run when an integrated or dedicated GPU is available.
|
||||
|
||||
In both cases a lightweight face landmark detection model is also used to align faces before running the recognition model.
|
||||
|
||||
## Minimum System Requirements
|
||||
|
||||
Face recognition is lightweight and runs on the CPU, there are no significantly different system requirements than running Frigate itself when using the `small` model.
|
||||
|
||||
When using the `large` model an integrated or discrete GPU is recommended.
|
||||
The `small` model is optimized for efficiency and runs on the CPU, there are no significantly different system requirements.
|
||||
The `large` model is optimized for accuracy and an integrated or discrete GPU is highly recommended.
|
||||
|
||||
## Configuration
|
||||
|
||||
@ -58,6 +58,8 @@ Fine-tune face recognition with these optional parameters:
|
||||
### Recognition
|
||||
|
||||
- `model_size`: Which model size to use, options are `small` or `large`
|
||||
- `unknown_score`: Min score to mark a person as a potential match, matches below this will be marked as unknown.
|
||||
- Default: `0.8`.
|
||||
- `recognition_threshold`: Recognition confidence score required to add the face to the object as a sub label.
|
||||
- Default: `0.9`.
|
||||
- `blur_confidence_filter`: Enables a filter that calculates how blurry the face is and adjusts the confidence based on this.
|
||||
@ -108,13 +110,14 @@ Once straight-on images are performing well, start choosing slightly off-angle i
|
||||
|
||||
## FAQ
|
||||
|
||||
### Why is every face tagged as a known face and not unknown?
|
||||
### Why can't I bulk upload photos?
|
||||
|
||||
Any recognized face with a score >= `min_score` will show in the `Train` tab along with the recognition score. A low scoring face is effectively the same as `unknown`, but includes more information. This does not mean the recognition is not working well, and is part of the importance of choosing the correct `recognition_threshold`.
|
||||
It is important to methodically add photos to the library, bulk importing photos (especially from a general photo library) will lead to overfitting in that particular scenario and hurt recognition performance.
|
||||
|
||||
### Why do unknown people score similarly to known people?
|
||||
|
||||
This can happen for a few different reasons, but this is usually an indicator that the training set needs to be improved. This is often related to overfitting:
|
||||
|
||||
- If you train with only a few images per person, especially if those images are very similar, the recognition model becomes overly specialized to those specific images.
|
||||
- When you provide images with different poses, lighting, and expressions, the algorithm extracts features that are consistent across those variations.
|
||||
- By training on a diverse set of images, the algorithm becomes less sensitive to minor variations and noise in the input image.
|
||||
|
@ -547,8 +547,8 @@ semantic_search:
|
||||
face_recognition:
|
||||
# Optional: Enable semantic search (default: shown below)
|
||||
enabled: False
|
||||
# Optional: Minimum face distance score required to save the attempt (default: shown below)
|
||||
min_score: 0.8
|
||||
# Optional: Minimum face distance score required to mark as a potential match (default: shown below)
|
||||
unknown_score: 0.8
|
||||
# Optional: Minimum face detection score required to detect a face (default: shown below)
|
||||
# NOTE: This only applies when not running a Frigate+ model
|
||||
detection_threshold: 0.7
|
||||
|
@ -5,7 +5,7 @@ import logging
|
||||
import os
|
||||
import threading
|
||||
from collections import defaultdict
|
||||
from typing import Callable
|
||||
from typing import Any, Callable
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@ -53,8 +53,19 @@ class CameraState:
|
||||
self.callbacks = defaultdict(list)
|
||||
self.ptz_autotracker_thread = ptz_autotracker_thread
|
||||
self.prev_enabled = self.camera_config.enabled
|
||||
self.requires_face_detection = (
|
||||
self.config.face_recognition.enabled
|
||||
and "face" not in self.config.objects.all_objects
|
||||
)
|
||||
|
||||
def get_current_frame(self, draw_options={}):
|
||||
def get_max_update_frequency(self, obj: TrackedObject) -> int:
|
||||
return (
|
||||
1
|
||||
if self.requires_face_detection and obj.obj_data["label"] == "person"
|
||||
else 5
|
||||
)
|
||||
|
||||
def get_current_frame(self, draw_options: dict[str, Any] = {}):
|
||||
with self.current_frame_lock:
|
||||
frame_copy = np.copy(self._current_frame)
|
||||
frame_time = self.current_frame_time
|
||||
@ -283,11 +294,12 @@ class CameraState:
|
||||
|
||||
updated_obj.last_updated = frame_time
|
||||
|
||||
# if it has been more than 5 seconds since the last thumb update
|
||||
# if it has been more than max_update_frequency seconds since the last thumb update
|
||||
# and the last update is greater than the last publish or
|
||||
# the object has changed significantly
|
||||
if (
|
||||
frame_time - updated_obj.last_published > 5
|
||||
frame_time - updated_obj.last_published
|
||||
> self.get_max_update_frequency(updated_obj)
|
||||
and updated_obj.last_updated > updated_obj.last_published
|
||||
) or significant_update:
|
||||
# call event handlers
|
||||
|
@ -54,8 +54,8 @@ class FaceRecognitionConfig(FrigateBaseModel):
|
||||
model_size: str = Field(
|
||||
default="small", title="The size of the embeddings model used."
|
||||
)
|
||||
min_score: float = Field(
|
||||
title="Minimum face distance score required to save the attempt.",
|
||||
unknown_score: float = Field(
|
||||
title="Minimum face distance score required to be marked as a potential match.",
|
||||
default=0.8,
|
||||
gt=0.0,
|
||||
le=1.0,
|
||||
|
@ -164,9 +164,7 @@ class LBPHRecognizer(FaceRecognizer):
|
||||
return
|
||||
|
||||
self.recognizer: cv2.face.LBPHFaceRecognizer = (
|
||||
cv2.face.LBPHFaceRecognizer_create(
|
||||
radius=2, threshold=(1 - self.config.face_recognition.min_score) * 1000
|
||||
)
|
||||
cv2.face.LBPHFaceRecognizer_create(radius=2, threshold=400)
|
||||
)
|
||||
self.recognizer.train(faces, np.array(labels))
|
||||
|
||||
@ -243,6 +241,8 @@ class ArcFaceRecognizer(FaceRecognizer):
|
||||
for name, embs in face_embeddings_map.items():
|
||||
self.mean_embs[name] = stats.trim_mean(embs, 0.15)
|
||||
|
||||
logger.debug("Finished building ArcFace model")
|
||||
|
||||
def similarity_to_confidence(
|
||||
self, cosine_similarity: float, median=0.3, range_width=0.6, slope_factor=12
|
||||
):
|
||||
@ -302,7 +302,4 @@ class ArcFaceRecognizer(FaceRecognizer):
|
||||
score = confidence
|
||||
label = name
|
||||
|
||||
if score < self.config.face_recognition.min_score:
|
||||
return None
|
||||
|
||||
return label, round(score * blur_factor, 2)
|
||||
|
@ -36,36 +36,6 @@ MAX_DETECTION_HEIGHT = 1080
|
||||
MIN_MATCHING_FACES = 2
|
||||
|
||||
|
||||
def weighted_average_by_area(results_list: list[tuple[str, float, int]]):
|
||||
if len(results_list) < 3:
|
||||
return "unknown", 0.0
|
||||
|
||||
score_count = {}
|
||||
weighted_scores = {}
|
||||
total_face_areas = {}
|
||||
|
||||
for name, score, face_area in results_list:
|
||||
if name not in weighted_scores:
|
||||
score_count[name] = 1
|
||||
weighted_scores[name] = 0.0
|
||||
total_face_areas[name] = 0.0
|
||||
else:
|
||||
score_count[name] += 1
|
||||
|
||||
weighted_scores[name] += score * face_area
|
||||
total_face_areas[name] += face_area
|
||||
|
||||
prominent_name = max(score_count)
|
||||
|
||||
# if a single name is not prominent in the history then we are not confident
|
||||
if score_count[prominent_name] / len(results_list) < 0.65:
|
||||
return "unknown", 0.0
|
||||
|
||||
return prominent_name, weighted_scores[prominent_name] / total_face_areas[
|
||||
prominent_name
|
||||
]
|
||||
|
||||
|
||||
class FaceRealTimeProcessor(RealTimeProcessorApi):
|
||||
def __init__(
|
||||
self,
|
||||
@ -271,6 +241,9 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
|
||||
|
||||
sub_label, score = res
|
||||
|
||||
if score < self.face_config.unknown_score:
|
||||
sub_label = "unknown"
|
||||
|
||||
logger.debug(
|
||||
f"Detected best face for person as: {sub_label} with probability {score}"
|
||||
)
|
||||
@ -288,7 +261,7 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
|
||||
self.person_face_history[id].append(
|
||||
(sub_label, score, face_frame.shape[0] * face_frame.shape[1])
|
||||
)
|
||||
(weighted_sub_label, weighted_score) = weighted_average_by_area(
|
||||
(weighted_sub_label, weighted_score) = self.weighted_average_by_area(
|
||||
self.person_face_history[id]
|
||||
)
|
||||
|
||||
@ -415,3 +388,34 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
|
||||
def expire_object(self, object_id: str):
|
||||
if object_id in self.person_face_history:
|
||||
self.person_face_history.pop(object_id)
|
||||
|
||||
def weighted_average_by_area(self, results_list: list[tuple[str, float, int]]):
|
||||
min_faces = 1 if self.requires_face_detection else 3
|
||||
|
||||
if len(results_list) < min_faces:
|
||||
return "unknown", 0.0
|
||||
|
||||
score_count = {}
|
||||
weighted_scores = {}
|
||||
total_face_areas = {}
|
||||
|
||||
for name, score, face_area in results_list:
|
||||
if name not in weighted_scores:
|
||||
score_count[name] = 1
|
||||
weighted_scores[name] = 0.0
|
||||
total_face_areas[name] = 0.0
|
||||
else:
|
||||
score_count[name] += 1
|
||||
|
||||
weighted_scores[name] += score * face_area
|
||||
total_face_areas[name] += face_area
|
||||
|
||||
prominent_name = max(score_count)
|
||||
|
||||
# if a single name is not prominent in the history then we are not confident
|
||||
if score_count[prominent_name] / len(results_list) < 0.65:
|
||||
return "unknown", 0.0
|
||||
|
||||
return prominent_name, weighted_scores[prominent_name] / total_face_areas[
|
||||
prominent_name
|
||||
]
|
||||
|
@ -33,7 +33,7 @@ import useKeyboardListener from "@/hooks/use-keyboard-listener";
|
||||
import useOptimisticState from "@/hooks/use-optimistic-state";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { FaceLibraryData, RecognizedFaceData } from "@/types/face";
|
||||
import { FrigateConfig } from "@/types/frigateConfig";
|
||||
import { FaceRecognitionConfig, FrigateConfig } from "@/types/frigateConfig";
|
||||
import axios from "axios";
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
||||
import { isDesktop, isMobile } from "react-device-detect";
|
||||
@ -451,7 +451,7 @@ function TrainingGrid({
|
||||
key={image}
|
||||
image={image}
|
||||
faceNames={faceNames}
|
||||
threshold={config.face_recognition.recognition_threshold}
|
||||
recognitionConfig={config.face_recognition}
|
||||
selected={selectedFaces.includes(image)}
|
||||
onClick={(data, meta) => {
|
||||
if (meta) {
|
||||
@ -471,7 +471,7 @@ function TrainingGrid({
|
||||
type FaceAttemptProps = {
|
||||
image: string;
|
||||
faceNames: string[];
|
||||
threshold: number;
|
||||
recognitionConfig: FaceRecognitionConfig;
|
||||
selected: boolean;
|
||||
onClick: (data: RecognizedFaceData, meta: boolean) => void;
|
||||
onRefresh: () => void;
|
||||
@ -479,7 +479,7 @@ type FaceAttemptProps = {
|
||||
function FaceAttempt({
|
||||
image,
|
||||
faceNames,
|
||||
threshold,
|
||||
recognitionConfig,
|
||||
selected,
|
||||
onClick,
|
||||
onRefresh,
|
||||
@ -496,6 +496,16 @@ function FaceAttempt({
|
||||
};
|
||||
}, [image]);
|
||||
|
||||
const scoreStatus = useMemo(() => {
|
||||
if (data.score >= recognitionConfig.recognition_threshold) {
|
||||
return "match";
|
||||
} else if (data.score >= recognitionConfig.unknown_score) {
|
||||
return "potential";
|
||||
} else {
|
||||
return "unknown";
|
||||
}
|
||||
}, [data, recognitionConfig]);
|
||||
|
||||
// interaction
|
||||
|
||||
const imgRef = useRef<HTMLImageElement | null>(null);
|
||||
@ -579,10 +589,13 @@ function FaceAttempt({
|
||||
<div className="capitalize">{data.name}</div>
|
||||
<div
|
||||
className={cn(
|
||||
data.score >= threshold ? "text-success" : "text-danger",
|
||||
"",
|
||||
scoreStatus == "match" && "text-success",
|
||||
scoreStatus == "potential" && "text-orange-400",
|
||||
scoreStatus == "unknown" && "text-danger",
|
||||
)}
|
||||
>
|
||||
{data.score * 100}%
|
||||
{Math.round(data.score * 100)}%
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex flex-row items-start justify-end gap-5 md:gap-4">
|
||||
|
@ -20,6 +20,14 @@ export interface BirdseyeConfig {
|
||||
width: number;
|
||||
}
|
||||
|
||||
export interface FaceRecognitionConfig {
|
||||
enabled: boolean;
|
||||
model_size: SearchModelSize;
|
||||
unknown_score: number;
|
||||
detection_threshold: number;
|
||||
recognition_threshold: number;
|
||||
}
|
||||
|
||||
export type SearchModel = "jinav1" | "jinav2";
|
||||
export type SearchModelSize = "small" | "large";
|
||||
|
||||
@ -331,12 +339,7 @@ export interface FrigateConfig {
|
||||
|
||||
environment_vars: Record<string, unknown>;
|
||||
|
||||
face_recognition: {
|
||||
enabled: boolean;
|
||||
model_size: SearchModelSize;
|
||||
detection_threshold: number;
|
||||
recognition_threshold: number;
|
||||
};
|
||||
face_recognition: FaceRecognitionConfig;
|
||||
|
||||
ffmpeg: {
|
||||
global_args: string[];
|
||||
|
Loading…
Reference in New Issue
Block a user