From 95e141ed15f1eb4b5d6a049fb8157a5933fc68bb Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Mon, 17 Mar 2025 08:05:53 -0600
Subject: [PATCH] Improve face detection (#17202)

---
 frigate/data_processing/real_time/face.py | 25 +++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/frigate/data_processing/real_time/face.py b/frigate/data_processing/real_time/face.py
index 7d97f8586..e70801812 100644
--- a/frigate/data_processing/real_time/face.py
+++ b/frigate/data_processing/real_time/face.py
@@ -27,6 +27,7 @@ from .api import RealTimeProcessorApi
 logger = logging.getLogger(__name__)
 
 
+MAX_DETECTION_HEIGHT = 1080
 MIN_MATCHING_FACES = 2
 
 
@@ -88,7 +89,7 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
             os.path.join(MODEL_CACHE_DIR, "facedet/facedet.onnx"),
             config="",
             input_size=(320, 320),
-            score_threshold=self.face_config.detection_threshold,
+            score_threshold=0.5,
             nms_threshold=0.3,
         )
         self.landmark_detector = cv2.face.createFacemarkLBF()
@@ -212,11 +213,21 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
         self.face_recognizer = None
         self.label_map = {}
 
-    def __detect_face(self, input: np.ndarray) -> tuple[int, int, int, int]:
+    def __detect_face(
+        self, input: np.ndarray, threshold: float
+    ) -> tuple[int, int, int, int]:
         """Detect faces in input image."""
         if not self.face_detector:
             return None
 
+        # YN face detector fails at extreme definitions
+        # this rescales to a size that can properly detect faces
+        # still retaining plenty of detail
+        if input.shape[0] > MAX_DETECTION_HEIGHT:
+            scale_factor = MAX_DETECTION_HEIGHT / input.shape[0]
+            new_width = int(scale_factor * input.shape[1])
+            input = cv2.resize(input, (new_width, MAX_DETECTION_HEIGHT))
+
         self.face_detector.setInputSize((input.shape[1], input.shape[0]))
         faces = self.face_detector.detect(input)
 
@@ -226,6 +237,9 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
         face = None
 
         for _, potential_face in enumerate(faces[1]):
+            if potential_face[-1] < threshold:
+                continue
+
             raw_bbox = potential_face[0:4].astype(np.uint16)
             x: int = max(raw_bbox[0], 0)
             y: int = max(raw_bbox[1], 0)
@@ -300,7 +314,7 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
             rgb = cv2.cvtColor(frame, cv2.COLOR_YUV2RGB_I420)
             left, top, right, bottom = person_box
             person = rgb[top:bottom, left:right]
-            face_box = self.__detect_face(person)
+            face_box = self.__detect_face(person, self.face_config.detection_threshold)
 
             if not face_box:
                 logger.debug("Detected no faces for person object.")
@@ -406,7 +420,10 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
                     ),
                     cv2.IMREAD_COLOR,
                 )
-                face_box = self.__detect_face(img)
+
+                # detect faces with lower confidence since we expect the face
+                # to be visible in uploaded images
+                face_box = self.__detect_face(img, 0.5)
 
                 if not face_box:
                     return {