From f39475a383151e7dd97e11537b90d47302aa457a Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Thu, 21 Aug 2025 06:18:55 -0600
Subject: [PATCH] Support face recognition via RKNN (#19687)

* Add support for face recognition via RKNN

* Fix crash when adding camera in via UI

* Update docs regarding support for face recognition

* Formatting
---
 docs/docs/configuration/face_recognition.md   |  4 +--
 .../hardware_acceleration_enrichments.md      |  2 +-
 frigate/embeddings/onnx/runner.py             | 26 +++----------------
 frigate/util/rknn_converter.py                | 14 ++++++++++
 web/src/hooks/use-camera-activity.ts          |  4 ++-
 5 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/docs/docs/configuration/face_recognition.md b/docs/docs/configuration/face_recognition.md
index 118008aa6..d14946eaf 100644
--- a/docs/docs/configuration/face_recognition.md
+++ b/docs/docs/configuration/face_recognition.md
@@ -24,7 +24,7 @@ Frigate needs to first detect a `person` before it can detect and recognize a fa
 Frigate has support for two face recognition model types:
 
 - **small**: Frigate will run a FaceNet embedding model to recognize faces, which runs locally on the CPU. This model is optimized for efficiency and is not as accurate.
-- **large**: Frigate will run a large ArcFace embedding model that is optimized for accuracy. It is only recommended to be run when an integrated or dedicated GPU is available.
+- **large**: Frigate will run a large ArcFace embedding model that is optimized for accuracy. It is only recommended to be run when an integrated or dedicated GPU / NPU is available.
 
 In both cases, a lightweight face landmark detection model is also used to align faces before running recognition.
 
@@ -34,7 +34,7 @@ All of these features run locally on your system.
 
 The `small` model is optimized for efficiency and runs on the CPU, most CPUs should run the model efficiently.
 
-The `large` model is optimized for accuracy, an integrated or discrete GPU is required. See the [Hardware Accelerated Enrichments](/configuration/hardware_acceleration_enrichments.md) documentation.
+The `large` model is optimized for accuracy, an integrated or discrete GPU / NPU is required. See the [Hardware Accelerated Enrichments](/configuration/hardware_acceleration_enrichments.md) documentation.
 
 ## Configuration
 
diff --git a/docs/docs/configuration/hardware_acceleration_enrichments.md b/docs/docs/configuration/hardware_acceleration_enrichments.md
index 552dbd2e7..84688b8b4 100644
--- a/docs/docs/configuration/hardware_acceleration_enrichments.md
+++ b/docs/docs/configuration/hardware_acceleration_enrichments.md
@@ -24,7 +24,7 @@ Object detection and enrichments (like Semantic Search, Face Recognition, and Li
   - Jetson devices will automatically be detected and used for enrichments in the `-tensorrt-jp6` Frigate image.
 
 - **RockChip**
-  - RockChip NPU will automatically be detected and used for semantic search (v1 only) in the `-rk` Frigate image.
+  - RockChip NPU will automatically be detected and used for semantic search v1 and face recognition in the `-rk` Frigate image.
 
 Utilizing a GPU for enrichments does not require you to use the same GPU for object detection. For example, you can run the `tensorrt` Docker image for enrichments and still use other dedicated hardware like a Coral or Hailo for object detection. However, one combination that is not supported is TensorRT for object detection and OpenVINO for enrichments.
 
diff --git a/frigate/embeddings/onnx/runner.py b/frigate/embeddings/onnx/runner.py
index 3a2acc7f6..9158be9f4 100644
--- a/frigate/embeddings/onnx/runner.py
+++ b/frigate/embeddings/onnx/runner.py
@@ -184,6 +184,8 @@ class RKNNModelRunner:
 
         if "vision" in model_name:
             return ["pixel_values"]
+        elif "arcface" in model_name:
+            return ["data"]
         else:
             # Default fallback - try to infer from model type
             if self.model_type and "jina-clip" in self.model_type:
@@ -199,6 +201,8 @@ class RKNNModelRunner:
         model_name = os.path.basename(self.model_path).lower()
         if "vision" in model_name:
             return 224  # CLIP V1 uses 224x224
+        elif "arcface" in model_name:
+            return 112
         return -1
 
     def run(self, inputs: dict[str, Any]) -> Any:
@@ -222,28 +226,6 @@ class RKNNModelRunner:
                         rknn_inputs.append(pixel_data)
                     else:
                         rknn_inputs.append(inputs[name])
-                else:
-                    logger.warning(f"Input '{name}' not found in inputs, using default")
-
-                    if name == "pixel_values":
-                        batch_size = 1
-                        if inputs:
-                            for val in inputs.values():
-                                if hasattr(val, "shape") and len(val.shape) > 0:
-                                    batch_size = val.shape[0]
-                                    break
-                        # Create default in NHWC format as expected by RKNN
-                        rknn_inputs.append(
-                            np.zeros((batch_size, 224, 224, 3), dtype=np.float32)
-                        )
-                    else:
-                        batch_size = 1
-                        if inputs:
-                            for val in inputs.values():
-                                if hasattr(val, "shape") and len(val.shape) > 0:
-                                    batch_size = val.shape[0]
-                                    break
-                        rknn_inputs.append(np.zeros((batch_size, 1), dtype=np.float32))
 
             outputs = self.rknn.inference(inputs=rknn_inputs)
             return outputs
diff --git a/frigate/util/rknn_converter.py b/frigate/util/rknn_converter.py
index e42547320..d9c42bf0b 100644
--- a/frigate/util/rknn_converter.py
+++ b/frigate/util/rknn_converter.py
@@ -32,6 +32,11 @@ MODEL_TYPE_CONFIGS = {
         "std_values": [[0.26862954 * 255, 0.26130258 * 255, 0.27577711 * 255]],
         "target_platform": None,  # Will be set dynamically
     },
+    "arcface-r100": {
+        "mean_values": [[127.5, 127.5, 127.5]],
+        "std_values": [[127.5, 127.5, 127.5]],
+        "target_platform": None,  # Will be set dynamically
+    },
 }
 
 
@@ -41,6 +46,9 @@ def get_rknn_model_type(model_path: str) -> str | None:
 
     model_name = os.path.basename(str(model_path)).lower()
 
+    if "arcface" in model_name:
+        return "arcface-r100"
+
     if any(keyword in model_name for keyword in ["yolo", "yolox", "yolonas"]):
         return model_name
 
@@ -184,6 +192,12 @@ def convert_onnx_to_rknn(
                 inputs=["pixel_values"],
                 input_size_list=[[1, 3, 224, 224]],
             )
+        elif model_type == "arcface-r100":
+            load_output = rknn.load_onnx(
+                model=onnx_path,
+                inputs=["data"],
+                input_size_list=[[1, 3, 112, 112]],
+            )
         else:
             load_output = rknn.load_onnx(model=onnx_path)
 
diff --git a/web/src/hooks/use-camera-activity.ts b/web/src/hooks/use-camera-activity.ts
index b81ad54b0..baec4e228 100644
--- a/web/src/hooks/use-camera-activity.ts
+++ b/web/src/hooks/use-camera-activity.ts
@@ -144,7 +144,9 @@ export function useCameraActivity(
       return false;
     }
 
-    return cameras[camera.name].camera_fps == 0 && stats["service"].uptime > 60;
+    return (
+      cameras[camera.name]?.camera_fps == 0 && stats["service"].uptime > 60
+    );
   }, [camera, stats]);
 
   const isCameraEnabled = cameraEnabled ? cameraEnabled === "ON" : true;