blakeblackshear.frigate/frigate/data_processing/common/face/model.py
Nicolas Mowen b18d1fb970
Refactor face recognition (#17368)
* Refactor face recognition to allow for running lbph or embedding

* Cleanup

* Use weighted average for faces

* Set correct url

* Cleanup

* Update docs

* Update docs

* Use scipy trimmed mean

* Normalize

* Handle color and gray landmark detection

* Upgrade to new arcface model

* Implement sigmoid function

* Rename

* Rename to arcface

* Fix

* Add face recognition model size to ui config

* Update toast
2025-03-25 19:59:03 -05:00

309 lines
9.4 KiB
Python

import logging
import os
from abc import ABC, abstractmethod
import cv2
import numpy as np
from scipy import stats
from frigate.config import FrigateConfig
from frigate.const import MODEL_CACHE_DIR
from frigate.embeddings.onnx.facenet import ArcfaceEmbedding
logger = logging.getLogger(__name__)
class FaceRecognizer(ABC):
"""Face recognition runner."""
def __init__(self, config: FrigateConfig) -> None:
self.config = config
self.landmark_detector = cv2.face.createFacemarkLBF()
self.landmark_detector.loadModel(
os.path.join(MODEL_CACHE_DIR, "facedet/landmarkdet.yaml")
)
@abstractmethod
def build(self) -> None:
"""Build face recognition model."""
pass
@abstractmethod
def clear(self) -> None:
"""Clear current built model."""
pass
@abstractmethod
def classify(self, face_image: np.ndarray) -> tuple[str, float] | None:
pass
def align_face(
self,
image: np.ndarray,
output_width: int,
output_height: int,
) -> np.ndarray:
# landmark is run on grayscale images
if image.ndim == 3:
land_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
land_image = image
_, lands = self.landmark_detector.fit(
land_image, np.array([(0, 0, land_image.shape[1], land_image.shape[0])])
)
landmarks: np.ndarray = lands[0][0]
# get landmarks for eyes
leftEyePts = landmarks[42:48]
rightEyePts = landmarks[36:42]
# compute the center of mass for each eye
leftEyeCenter = leftEyePts.mean(axis=0).astype("int")
rightEyeCenter = rightEyePts.mean(axis=0).astype("int")
# compute the angle between the eye centroids
dY = rightEyeCenter[1] - leftEyeCenter[1]
dX = rightEyeCenter[0] - leftEyeCenter[0]
angle = np.degrees(np.arctan2(dY, dX)) - 180
# compute the desired right eye x-coordinate based on the
# desired x-coordinate of the left eye
desiredRightEyeX = 1.0 - 0.35
# determine the scale of the new resulting image by taking
# the ratio of the distance between eyes in the *current*
# image to the ratio of distance between eyes in the
# *desired* image
dist = np.sqrt((dX**2) + (dY**2))
desiredDist = desiredRightEyeX - 0.35
desiredDist *= output_width
scale = desiredDist / dist
# compute center (x, y)-coordinates (i.e., the median point)
# between the two eyes in the input image
# grab the rotation matrix for rotating and scaling the face
eyesCenter = (
int((leftEyeCenter[0] + rightEyeCenter[0]) // 2),
int((leftEyeCenter[1] + rightEyeCenter[1]) // 2),
)
M = cv2.getRotationMatrix2D(eyesCenter, angle, scale)
# update the translation component of the matrix
tX = output_width * 0.5
tY = output_height * 0.35
M[0, 2] += tX - eyesCenter[0]
M[1, 2] += tY - eyesCenter[1]
# apply the affine transformation
return cv2.warpAffine(
image, M, (output_width, output_height), flags=cv2.INTER_CUBIC
)
def get_blur_factor(self, input: np.ndarray) -> float:
"""Calculates the factor for the confidence based on the blur of the image."""
if not self.config.face_recognition.blur_confidence_filter:
return 1.0
variance = cv2.Laplacian(input, cv2.CV_64F).var()
if variance < 60: # image is very blurry
return 0.96
elif variance < 70: # image moderately blurry
return 0.98
elif variance < 80: # image is slightly blurry
return 0.99
else:
return 1.0
class LBPHRecognizer(FaceRecognizer):
def __init__(self, config: FrigateConfig):
super().__init__(config)
self.label_map: dict[int, str] = {}
self.recognizer: cv2.face.LBPHFaceRecognizer | None = None
def clear(self) -> None:
self.face_recognizer = None
self.label_map = {}
def build(self):
if not self.landmark_detector:
return None
labels = []
faces = []
idx = 0
dir = "/media/frigate/clips/faces"
for name in os.listdir(dir):
if name == "train":
continue
face_folder = os.path.join(dir, name)
if not os.path.isdir(face_folder):
continue
self.label_map[idx] = name
for image in os.listdir(face_folder):
img = cv2.imread(os.path.join(face_folder, image))
if img is None:
continue
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = self.align_face(img, img.shape[1], img.shape[0])
faces.append(img)
labels.append(idx)
idx += 1
if not faces:
return
self.recognizer: cv2.face.LBPHFaceRecognizer = (
cv2.face.LBPHFaceRecognizer_create(
radius=2, threshold=(1 - self.config.face_recognition.min_score) * 1000
)
)
self.recognizer.train(faces, np.array(labels))
def classify(self, face_image: np.ndarray) -> tuple[str, float] | None:
if not self.landmark_detector:
return None
if not self.label_map or not self.recognizer:
self.build()
if not self.recognizer:
return None
# face recognition is best run on grayscale images
img = cv2.cvtColor(face_image, cv2.COLOR_BGR2GRAY)
# get blur factor before aligning face
blur_factor = self.get_blur_factor(img)
logger.debug(f"face detected with bluriness {blur_factor}")
# align face and run recognition
img = self.align_face(img, img.shape[1], img.shape[0])
index, distance = self.recognizer.predict(img)
if index == -1:
return None
score = (1.0 - (distance / 1000)) * blur_factor
return self.label_map[index], round(score, 2)
class ArcFaceRecognizer(FaceRecognizer):
def __init__(self, config: FrigateConfig):
super().__init__(config)
self.mean_embs: dict[int, np.ndarray] = {}
self.face_embedder: ArcfaceEmbedding = ArcfaceEmbedding()
def clear(self) -> None:
self.mean_embs = {}
def build(self):
if not self.landmark_detector:
return None
face_embeddings_map: dict[str, list[np.ndarray]] = {}
idx = 0
dir = "/media/frigate/clips/faces"
for name in os.listdir(dir):
if name == "train":
continue
face_folder = os.path.join(dir, name)
if not os.path.isdir(face_folder):
continue
face_embeddings_map[name] = []
for image in os.listdir(face_folder):
img = cv2.imread(os.path.join(face_folder, image))
if img is None:
continue
img = self.align_face(img, img.shape[1], img.shape[0])
emb = self.face_embedder([img])[0].squeeze()
face_embeddings_map[name].append(emb)
idx += 1
if not face_embeddings_map:
return
for name, embs in face_embeddings_map.items():
self.mean_embs[name] = stats.trim_mean(embs, 0.15)
def similarity_to_confidence(
self, cosine_similarity: float, median=0.3, range_width=0.6, slope_factor=12
):
"""
Default sigmoid function to map cosine similarity to confidence.
Args:
cosine_similarity (float): The input cosine similarity.
median (float): Assumed median of cosine similarity distribution.
range_width (float): Assumed range of cosine similarity distribution (90th percentile - 10th percentile).
slope_factor (float): Adjusts the steepness of the curve.
Returns:
float: The confidence score.
"""
# Calculate slope and bias
slope = slope_factor / range_width
bias = median
# Calculate confidence
confidence = 1 / (1 + np.exp(-slope * (cosine_similarity - bias)))
return confidence
def classify(self, face_image):
if not self.landmark_detector:
return None
if not self.mean_embs:
self.build()
if not self.mean_embs:
return None
# face recognition is best run on grayscale images
# get blur factor before aligning face
blur_factor = self.get_blur_factor(face_image)
logger.debug(f"face detected with bluriness {blur_factor}")
# align face and run recognition
img = self.align_face(face_image, face_image.shape[1], face_image.shape[0])
embedding = self.face_embedder([img])[0].squeeze()
score = 0
label = ""
for name, mean_emb in self.mean_embs.items():
dot_product = np.dot(embedding, mean_emb)
magnitude_A = np.linalg.norm(embedding)
magnitude_B = np.linalg.norm(mean_emb)
cosine_similarity = dot_product / (magnitude_A * magnitude_B)
confidence = self.similarity_to_confidence(cosine_similarity)
if cosine_similarity > score:
score = confidence
label = name
if score < self.config.face_recognition.min_score:
return None
return label, round(score * blur_factor, 2)