From e3d4b84803fce7e9f345592c797a8cdc62022dbb Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Wed, 26 Mar 2025 07:23:01 -0600
Subject: [PATCH] Face recognition improvements (#17387)

* Increase frequency of updates when internal face detection is used

* Adjust number of required faces based on detection type

* Adjust min_score config to unknown_score

* Only for person

* Improve typing

* Update face rec docs

* Cleanup ui colors

* Cleanup
---
 docs/docs/configuration/face_recognition.md  | 19 +++---
 docs/docs/configuration/reference.md         |  4 +-
 frigate/camera/state.py                      | 20 ++++--
 frigate/config/classification.py             |  4 +-
 frigate/data_processing/common/face/model.py |  9 +--
 frigate/data_processing/real_time/face.py    | 66 +++++++++++---------
 web/src/pages/FaceLibrary.tsx                | 25 ++++++--
 web/src/types/frigateConfig.ts               | 15 +++--
 8 files changed, 97 insertions(+), 65 deletions(-)

diff --git a/docs/docs/configuration/face_recognition.md b/docs/docs/configuration/face_recognition.md
index a3cb4e308..af6fd1eff 100644
--- a/docs/docs/configuration/face_recognition.md
+++ b/docs/docs/configuration/face_recognition.md
@@ -22,16 +22,16 @@ Frigate needs to first detect a `face` before it can recognize a face.
 ### Face Recognition
 
 Frigate has support for two face recognition model types:
-- **small**: Frigate will use CV2 Local Binary Pattern Face Recognizer to recognize faces, which runs locally on the CPU.
-- **large**: Frigate will run a face embedding model, this is only recommended to be run when an integrated or dedicated GPU is available.
 
-In both cases a lightweight face landmark detection model is also used to align faces before running them through the face recognizer.
+- **small**: Frigate will use CV2 Local Binary Pattern Face Recognizer to recognize faces, which runs locally on the CPU. This model is optimized for efficiency and is not as accurate.
+- **large**: Frigate will run a face embedding model, this model is optimized for accuracy. It is only recommended to be run when an integrated or dedicated GPU is available.
+
+In both cases a lightweight face landmark detection model is also used to align faces before running the recognition model.
 
 ## Minimum System Requirements
 
-Face recognition is lightweight and runs on the CPU, there are no significantly different system requirements than running Frigate itself when using the `small` model.
-
-When using the `large` model an integrated or discrete GPU is recommended.
+The `small` model is optimized for efficiency and runs on the CPU, there are no significantly different system requirements.
+The `large` model is optimized for accuracy and an integrated or discrete GPU is highly recommended.
 
 ## Configuration
 
@@ -58,6 +58,8 @@ Fine-tune face recognition with these optional parameters:
 ### Recognition
 
 - `model_size`: Which model size to use, options are `small` or `large`
+- `unknown_score`: Min score to mark a person as a potential match, matches below this will be marked as unknown.
+  - Default: `0.8`.
 - `recognition_threshold`: Recognition confidence score required to add the face to the object as a sub label.
   - Default: `0.9`.
 - `blur_confidence_filter`: Enables a filter that calculates how blurry the face is and adjusts the confidence based on this.
@@ -108,13 +110,14 @@ Once straight-on images are performing well, start choosing slightly off-angle i
 
 ## FAQ
 
-### Why is every face tagged as a known face and not unknown?
+### Why can't I bulk upload photos?
 
-Any recognized face with a score >= `min_score` will show in the `Train` tab along with the recognition score. A low scoring face is effectively the same as `unknown`, but includes more information. This does not mean the recognition is not working well, and is part of the importance of choosing the correct `recognition_threshold`.
+It is important to methodically add photos to the library, bulk importing photos (especially from a general photo library) will lead to overfitting in that particular scenario and hurt recognition performance.
 
 ### Why do unknown people score similarly to known people?
 
 This can happen for a few different reasons, but this is usually an indicator that the training set needs to be improved. This is often related to overfitting:
+
 - If you train with only a few images per person, especially if those images are very similar, the recognition model becomes overly specialized to those specific images.
 - When you provide images with different poses, lighting, and expressions, the algorithm extracts features that are consistent across those variations.
 - By training on a diverse set of images, the algorithm becomes less sensitive to minor variations and noise in the input image.
diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md
index 8e24db0b7..e30ee1619 100644
--- a/docs/docs/configuration/reference.md
+++ b/docs/docs/configuration/reference.md
@@ -547,8 +547,8 @@ semantic_search:
 face_recognition:
   # Optional: Enable semantic search (default: shown below)
   enabled: False
-  # Optional: Minimum face distance score required to save the attempt (default: shown below)
-  min_score: 0.8
+  # Optional: Minimum face distance score required to mark as a potential match (default: shown below)
+  unknown_score: 0.8
   # Optional: Minimum face detection score required to detect a face (default: shown below)
   # NOTE: This only applies when not running a Frigate+ model
   detection_threshold: 0.7
diff --git a/frigate/camera/state.py b/frigate/camera/state.py
index 98f808bf6..65a3dcf5d 100644
--- a/frigate/camera/state.py
+++ b/frigate/camera/state.py
@@ -5,7 +5,7 @@ import logging
 import os
 import threading
 from collections import defaultdict
-from typing import Callable
+from typing import Any, Callable
 
 import cv2
 import numpy as np
@@ -53,8 +53,19 @@ class CameraState:
         self.callbacks = defaultdict(list)
         self.ptz_autotracker_thread = ptz_autotracker_thread
         self.prev_enabled = self.camera_config.enabled
+        self.requires_face_detection = (
+            self.config.face_recognition.enabled
+            and "face" not in self.config.objects.all_objects
+        )
 
-    def get_current_frame(self, draw_options={}):
+    def get_max_update_frequency(self, obj: TrackedObject) -> int:
+        return (
+            1
+            if self.requires_face_detection and obj.obj_data["label"] == "person"
+            else 5
+        )
+
+    def get_current_frame(self, draw_options: dict[str, Any] = {}):
         with self.current_frame_lock:
             frame_copy = np.copy(self._current_frame)
             frame_time = self.current_frame_time
@@ -283,11 +294,12 @@ class CameraState:
 
                 updated_obj.last_updated = frame_time
 
-            # if it has been more than 5 seconds since the last thumb update
+            # if it has been more than max_update_frequency seconds since the last thumb update
             # and the last update is greater than the last publish or
             # the object has changed significantly
             if (
-                frame_time - updated_obj.last_published > 5
+                frame_time - updated_obj.last_published
+                > self.get_max_update_frequency(updated_obj)
                 and updated_obj.last_updated > updated_obj.last_published
             ) or significant_update:
                 # call event handlers
diff --git a/frigate/config/classification.py b/frigate/config/classification.py
index 25c379546..aecbf6537 100644
--- a/frigate/config/classification.py
+++ b/frigate/config/classification.py
@@ -54,8 +54,8 @@ class FaceRecognitionConfig(FrigateBaseModel):
     model_size: str = Field(
         default="small", title="The size of the embeddings model used."
     )
-    min_score: float = Field(
-        title="Minimum face distance score required to save the attempt.",
+    unknown_score: float = Field(
+        title="Minimum face distance score required to be marked as a potential match.",
         default=0.8,
         gt=0.0,
         le=1.0,
diff --git a/frigate/data_processing/common/face/model.py b/frigate/data_processing/common/face/model.py
index a006734ce..5e15a2441 100644
--- a/frigate/data_processing/common/face/model.py
+++ b/frigate/data_processing/common/face/model.py
@@ -164,9 +164,7 @@ class LBPHRecognizer(FaceRecognizer):
             return
 
         self.recognizer: cv2.face.LBPHFaceRecognizer = (
-            cv2.face.LBPHFaceRecognizer_create(
-                radius=2, threshold=(1 - self.config.face_recognition.min_score) * 1000
-            )
+            cv2.face.LBPHFaceRecognizer_create(radius=2, threshold=400)
         )
         self.recognizer.train(faces, np.array(labels))
 
@@ -243,6 +241,8 @@ class ArcFaceRecognizer(FaceRecognizer):
         for name, embs in face_embeddings_map.items():
             self.mean_embs[name] = stats.trim_mean(embs, 0.15)
 
+        logger.debug("Finished building ArcFace model")
+
     def similarity_to_confidence(
         self, cosine_similarity: float, median=0.3, range_width=0.6, slope_factor=12
     ):
@@ -302,7 +302,4 @@ class ArcFaceRecognizer(FaceRecognizer):
                 score = confidence
                 label = name
 
-        if score < self.config.face_recognition.min_score:
-            return None
-
         return label, round(score * blur_factor, 2)
diff --git a/frigate/data_processing/real_time/face.py b/frigate/data_processing/real_time/face.py
index e20dad633..9b479a527 100644
--- a/frigate/data_processing/real_time/face.py
+++ b/frigate/data_processing/real_time/face.py
@@ -36,36 +36,6 @@ MAX_DETECTION_HEIGHT = 1080
 MIN_MATCHING_FACES = 2
 
 
-def weighted_average_by_area(results_list: list[tuple[str, float, int]]):
-    if len(results_list) < 3:
-        return "unknown", 0.0
-
-    score_count = {}
-    weighted_scores = {}
-    total_face_areas = {}
-
-    for name, score, face_area in results_list:
-        if name not in weighted_scores:
-            score_count[name] = 1
-            weighted_scores[name] = 0.0
-            total_face_areas[name] = 0.0
-        else:
-            score_count[name] += 1
-
-        weighted_scores[name] += score * face_area
-        total_face_areas[name] += face_area
-
-    prominent_name = max(score_count)
-
-    # if a single name is not prominent in the history then we are not confident
-    if score_count[prominent_name] / len(results_list) < 0.65:
-        return "unknown", 0.0
-
-    return prominent_name, weighted_scores[prominent_name] / total_face_areas[
-        prominent_name
-    ]
-
-
 class FaceRealTimeProcessor(RealTimeProcessorApi):
     def __init__(
         self,
@@ -271,6 +241,9 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
 
         sub_label, score = res
 
+        if score < self.face_config.unknown_score:
+            sub_label = "unknown"
+
         logger.debug(
             f"Detected best face for person as: {sub_label} with probability {score}"
         )
@@ -288,7 +261,7 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
         self.person_face_history[id].append(
             (sub_label, score, face_frame.shape[0] * face_frame.shape[1])
         )
-        (weighted_sub_label, weighted_score) = weighted_average_by_area(
+        (weighted_sub_label, weighted_score) = self.weighted_average_by_area(
             self.person_face_history[id]
         )
 
@@ -415,3 +388,34 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
     def expire_object(self, object_id: str):
         if object_id in self.person_face_history:
             self.person_face_history.pop(object_id)
+
+    def weighted_average_by_area(self, results_list: list[tuple[str, float, int]]):
+        min_faces = 1 if self.requires_face_detection else 3
+
+        if len(results_list) < min_faces:
+            return "unknown", 0.0
+
+        score_count = {}
+        weighted_scores = {}
+        total_face_areas = {}
+
+        for name, score, face_area in results_list:
+            if name not in weighted_scores:
+                score_count[name] = 1
+                weighted_scores[name] = 0.0
+                total_face_areas[name] = 0.0
+            else:
+                score_count[name] += 1
+
+            weighted_scores[name] += score * face_area
+            total_face_areas[name] += face_area
+
+        prominent_name = max(score_count)
+
+        # if a single name is not prominent in the history then we are not confident
+        if score_count[prominent_name] / len(results_list) < 0.65:
+            return "unknown", 0.0
+
+        return prominent_name, weighted_scores[prominent_name] / total_face_areas[
+            prominent_name
+        ]
diff --git a/web/src/pages/FaceLibrary.tsx b/web/src/pages/FaceLibrary.tsx
index 0ac937283..696691997 100644
--- a/web/src/pages/FaceLibrary.tsx
+++ b/web/src/pages/FaceLibrary.tsx
@@ -33,7 +33,7 @@ import useKeyboardListener from "@/hooks/use-keyboard-listener";
 import useOptimisticState from "@/hooks/use-optimistic-state";
 import { cn } from "@/lib/utils";
 import { FaceLibraryData, RecognizedFaceData } from "@/types/face";
-import { FrigateConfig } from "@/types/frigateConfig";
+import { FaceRecognitionConfig, FrigateConfig } from "@/types/frigateConfig";
 import axios from "axios";
 import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import { isDesktop, isMobile } from "react-device-detect";
@@ -451,7 +451,7 @@ function TrainingGrid({
             key={image}
             image={image}
             faceNames={faceNames}
-            threshold={config.face_recognition.recognition_threshold}
+            recognitionConfig={config.face_recognition}
             selected={selectedFaces.includes(image)}
             onClick={(data, meta) => {
               if (meta) {
@@ -471,7 +471,7 @@ function TrainingGrid({
 type FaceAttemptProps = {
   image: string;
   faceNames: string[];
-  threshold: number;
+  recognitionConfig: FaceRecognitionConfig;
   selected: boolean;
   onClick: (data: RecognizedFaceData, meta: boolean) => void;
   onRefresh: () => void;
@@ -479,7 +479,7 @@ type FaceAttemptProps = {
 function FaceAttempt({
   image,
   faceNames,
-  threshold,
+  recognitionConfig,
   selected,
   onClick,
   onRefresh,
@@ -496,6 +496,16 @@ function FaceAttempt({
     };
   }, [image]);
 
+  const scoreStatus = useMemo(() => {
+    if (data.score >= recognitionConfig.recognition_threshold) {
+      return "match";
+    } else if (data.score >= recognitionConfig.unknown_score) {
+      return "potential";
+    } else {
+      return "unknown";
+    }
+  }, [data, recognitionConfig]);
+
   // interaction
 
   const imgRef = useRef<HTMLImageElement | null>(null);
@@ -579,10 +589,13 @@ function FaceAttempt({
             <div className="capitalize">{data.name}</div>
             <div
               className={cn(
-                data.score >= threshold ? "text-success" : "text-danger",
+                "",
+                scoreStatus == "match" && "text-success",
+                scoreStatus == "potential" && "text-orange-400",
+                scoreStatus == "unknown" && "text-danger",
               )}
             >
-              {data.score * 100}%
+              {Math.round(data.score * 100)}%
             </div>
           </div>
           <div className="flex flex-row items-start justify-end gap-5 md:gap-4">
diff --git a/web/src/types/frigateConfig.ts b/web/src/types/frigateConfig.ts
index 5312bed8c..d66d5edcb 100644
--- a/web/src/types/frigateConfig.ts
+++ b/web/src/types/frigateConfig.ts
@@ -20,6 +20,14 @@ export interface BirdseyeConfig {
   width: number;
 }
 
+export interface FaceRecognitionConfig {
+  enabled: boolean;
+  model_size: SearchModelSize;
+  unknown_score: number;
+  detection_threshold: number;
+  recognition_threshold: number;
+}
+
 export type SearchModel = "jinav1" | "jinav2";
 export type SearchModelSize = "small" | "large";
 
@@ -331,12 +339,7 @@ export interface FrigateConfig {
 
   environment_vars: Record<string, unknown>;
 
-  face_recognition: {
-    enabled: boolean;
-    model_size: SearchModelSize;
-    detection_threshold: number;
-    recognition_threshold: number;
-  };
+  face_recognition: FaceRecognitionConfig;
 
   ffmpeg: {
     global_args: string[];