From 33f3ea3b592a0c39c2f8a2f9daba4e53e545b875 Mon Sep 17 00:00:00 2001
From: baudneo <86508179+baudneo@users.noreply.github.com>
Date: Mon, 18 Aug 2025 17:43:53 -0600
Subject: [PATCH] Enrichments: Allow targeting a specific GPU ID (#19342)

---
 docs/docs/configuration/face_recognition.md   |  3 +++
 .../license_plate_recognition.md              |  4 ++--
 docs/docs/configuration/reference.md          |  7 +++++++
 docs/docs/configuration/semantic_search.md    |  8 ++++++--
 frigate/config/classification.py              | 19 +++++++++++++++----
 frigate/data_processing/common/face/model.py  |  2 +-
 frigate/data_processing/real_time/face.py     |  2 +-
 frigate/embeddings/embeddings.py              |  8 ++++----
 frigate/embeddings/onnx/face_embedding.py     |  6 ++++--
 frigate/embeddings/onnx/jina_v1_embedding.py  |  2 --
 frigate/embeddings/onnx/jina_v2_embedding.py  |  1 -
 11 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/docs/docs/configuration/face_recognition.md b/docs/docs/configuration/face_recognition.md
index 3026615d4..118008aa6 100644
--- a/docs/docs/configuration/face_recognition.md
+++ b/docs/docs/configuration/face_recognition.md
@@ -73,6 +73,9 @@ Fine-tune face recognition with these optional parameters at the global level of
   - Default: `100`.
 - `blur_confidence_filter`: Enables a filter that calculates how blurry the face is and adjusts the confidence based on this.
   - Default: `True`.
+- `device`: Target a specific device to run the face recognition model on (multi-GPU installation).
+  - Default: `None`.
+  - Note: This setting is only applicable when using the `large` model. See [onnxruntime's provider options](https://onnxruntime.ai/docs/execution-providers/)
 
 ## Usage
 
diff --git a/docs/docs/configuration/license_plate_recognition.md b/docs/docs/configuration/license_plate_recognition.md
index 933fd72d3..cbc3781f9 100644
--- a/docs/docs/configuration/license_plate_recognition.md
+++ b/docs/docs/configuration/license_plate_recognition.md
@@ -67,9 +67,9 @@ Fine-tune the LPR feature using these optional parameters at the global level of
 - **`min_area`**: Defines the minimum area (in pixels) a license plate must be before recognition runs.
   - Default: `1000` pixels. Note: this is intentionally set very low as it is an _area_ measurement (length x width). For reference, 1000 pixels represents a ~32x32 pixel square in your camera image.
   - Depending on the resolution of your camera's `detect` stream, you can increase this value to ignore small or distant plates.
-- **`device`**: Device to use to run license plate recognition models.
+- **`device`**: Device to use to run license plate detection *and* recognition models.
   - Default: `CPU`
-  - This can be `CPU` or `GPU`. For users without a model that detects license plates natively, using a GPU may increase performance of the models, especially the YOLOv9 license plate detector model. See the [Hardware Accelerated Enrichments](/configuration/hardware_acceleration_enrichments.md) documentation.
+  - This can be `CPU` or one of [onnxruntime's provider options](https://onnxruntime.ai/docs/execution-providers/). For users without a model that detects license plates natively, using a GPU may increase performance of the models, especially the YOLOv9 license plate detector model. See the [Hardware Accelerated Enrichments](/configuration/hardware_acceleration_enrichments.md) documentation.
 - **`model_size`**: The size of the model used to detect text on plates.
   - Default: `small`
   - This can be `small` or `large`. The `large` model uses an enhanced text detector and is more accurate at finding text on plates but slower than the `small` model. For most users, the small model is recommended. For users in countries with multiple lines of text on plates, the large model is recommended. Note that using the large model does not improve _text recognition_, but it may improve _text detection_.
diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md
index c4c024853..341d0cdc1 100644
--- a/docs/docs/configuration/reference.md
+++ b/docs/docs/configuration/reference.md
@@ -592,6 +592,9 @@ semantic_search:
   # Optional: Set the model size used for embeddings. (default: shown below)
   # NOTE: small model runs on CPU and large model runs on GPU
   model_size: "small"
+  # Optional: Target a specific device to run the model (default: shown below)
+  # NOTE: See https://onnxruntime.ai/docs/execution-providers/ for more information
+  device: None
 
 # Optional: Configuration for face recognition capability
 # NOTE: enabled, min_area can be overridden at the camera level
@@ -615,6 +618,9 @@ face_recognition:
   blur_confidence_filter: True
   # Optional: Set the model size used face recognition. (default: shown below)
   model_size: small
+  # Optional: Target a specific device to run the model (default: shown below)
+  # NOTE: See https://onnxruntime.ai/docs/execution-providers/ for more information
+  device: None
 
 # Optional: Configuration for license plate recognition capability
 # NOTE: enabled, min_area, and enhancement can be overridden at the camera level
@@ -622,6 +628,7 @@ lpr:
   # Optional: Enable license plate recognition (default: shown below)
   enabled: False
   # Optional: The device to run the models on (default: shown below)
+  # NOTE: See https://onnxruntime.ai/docs/execution-providers/ for more information
   device: CPU
   # Optional: Set the model size used for text detection. (default: shown below)
   model_size: small
diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md
index fc85ef259..e04df3a56 100644
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@@ -78,17 +78,21 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings
 
 ### GPU Acceleration
 
-The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used.
+The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.
 
 ```yaml
 semantic_search:
   enabled: True
   model_size: large
+  # Optional, if using the 'large' model in a multi-GPU installation
+  device: 0 
 ```
 
 :::info
 
-If the correct build is used for your GPU and the `large` model is configured, then the GPU will be detected and used automatically.
+If the correct build is used for your GPU and the `large` model is configured, then the GPU will be detected and used automatically. 
+Specify the `device` option to target a specific GPU in a multi-GPU system (see [onnxruntime's provider options](https://onnxruntime.ai/docs/execution-providers/)). 
+If you do not specify a device, the first available GPU will be used.
 
 See the [Hardware Accelerated Enrichments](/configuration/hardware_acceleration_enrichments.md) documentation.
 
diff --git a/frigate/config/classification.py b/frigate/config/classification.py
index 234113dd2..63e89421c 100644
--- a/frigate/config/classification.py
+++ b/frigate/config/classification.py
@@ -130,6 +130,11 @@ class SemanticSearchConfig(FrigateBaseModel):
     model_size: str = Field(
         default="small", title="The size of the embeddings model used."
     )
+    device: Optional[str] = Field(
+        default=None,
+        title="The device key to use for semantic search.",
+        description="This is an override, to target a specific device. See https://onnxruntime.ai/docs/execution-providers/ for more information",
+    )
 
 
 class TriggerConfig(FrigateBaseModel):
@@ -196,6 +201,11 @@ class FaceRecognitionConfig(FrigateBaseModel):
     blur_confidence_filter: bool = Field(
         default=True, title="Apply blur quality filter to face confidence."
     )
+    device: Optional[str] = Field(
+        default=None,
+        title="The device key to use for face recognition.",
+        description="This is an override, to target a specific device. See https://onnxruntime.ai/docs/execution-providers/ for more information",
+    )
 
 
 class CameraFaceRecognitionConfig(FrigateBaseModel):
@@ -209,10 +219,6 @@ class CameraFaceRecognitionConfig(FrigateBaseModel):
 
 class LicensePlateRecognitionConfig(FrigateBaseModel):
     enabled: bool = Field(default=False, title="Enable license plate recognition.")
-    device: Optional[EnrichmentsDeviceEnum] = Field(
-        default=EnrichmentsDeviceEnum.CPU,
-        title="The device used for license plate recognition.",
-    )
     model_size: str = Field(
         default="small", title="The size of the embeddings model used."
     )
@@ -258,6 +264,11 @@ class LicensePlateRecognitionConfig(FrigateBaseModel):
         default=False,
         title="Save plates captured for LPR for debugging purposes.",
     )
+    device: Optional[str] = Field(
+        default=None,
+        title="The device key to use for LPR.",
+        description="This is an override, to target a specific device. See https://onnxruntime.ai/docs/execution-providers/ for more information",
+    )
 
 
 class CameraLicensePlateRecognitionConfig(FrigateBaseModel):
diff --git a/frigate/data_processing/common/face/model.py b/frigate/data_processing/common/face/model.py
index f230a1b2c..21de37768 100644
--- a/frigate/data_processing/common/face/model.py
+++ b/frigate/data_processing/common/face/model.py
@@ -269,7 +269,7 @@ class ArcFaceRecognizer(FaceRecognizer):
     def __init__(self, config: FrigateConfig):
         super().__init__(config)
         self.mean_embs: dict[int, np.ndarray] = {}
-        self.face_embedder: ArcfaceEmbedding = ArcfaceEmbedding()
+        self.face_embedder: ArcfaceEmbedding = ArcfaceEmbedding(config.face_recognition)
         self.model_builder_queue: queue.Queue | None = None
 
     def clear(self) -> None:
diff --git a/frigate/data_processing/real_time/face.py b/frigate/data_processing/real_time/face.py
index a9e94ac92..c0cd50894 100644
--- a/frigate/data_processing/real_time/face.py
+++ b/frigate/data_processing/real_time/face.py
@@ -171,7 +171,7 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
 
         # don't run for non person objects
         if obj_data.get("label") != "person":
-            logger.debug("Not a processing face for non person object.")
+            logger.debug("Not processing face for a non person object.")
             return
 
         # don't overwrite sub label for objects that have a sub label
diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
index a0981f669..788e3e6db 100644
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@@ -112,9 +112,8 @@ class Embeddings:
             self.embedding = JinaV2Embedding(
                 model_size=self.config.semantic_search.model_size,
                 requestor=self.requestor,
-                device="GPU"
-                if self.config.semantic_search.model_size == "large"
-                else "CPU",
+                device=config.semantic_search.device
+                or ("GPU" if config.semantic_search.model_size == "large" else "CPU"),
             )
             self.text_embedding = lambda input_data: self.embedding(
                 input_data, embedding_type="text"
@@ -131,7 +130,8 @@ class Embeddings:
             self.vision_embedding = JinaV1ImageEmbedding(
                 model_size=config.semantic_search.model_size,
                 requestor=self.requestor,
-                device="GPU" if config.semantic_search.model_size == "large" else "CPU",
+                device=config.semantic_search.device
+                or ("GPU" if config.semantic_search.model_size == "large" else "CPU"),
             )
 
     def update_stats(self) -> None:
diff --git a/frigate/embeddings/onnx/face_embedding.py b/frigate/embeddings/onnx/face_embedding.py
index acb4507a2..10d5627d9 100644
--- a/frigate/embeddings/onnx/face_embedding.py
+++ b/frigate/embeddings/onnx/face_embedding.py
@@ -9,6 +9,7 @@ from frigate.const import MODEL_CACHE_DIR
 from frigate.log import redirect_output_to_logger
 from frigate.util.downloader import ModelDownloader
 
+from ...config import FaceRecognitionConfig
 from .base_embedding import BaseEmbedding
 from .runner import ONNXModelRunner
 
@@ -111,7 +112,7 @@ class FaceNetEmbedding(BaseEmbedding):
 
 
 class ArcfaceEmbedding(BaseEmbedding):
-    def __init__(self):
+    def __init__(self, config: FaceRecognitionConfig):
         super().__init__(
             model_name="facedet",
             model_file="arcface.onnx",
@@ -119,6 +120,7 @@ class ArcfaceEmbedding(BaseEmbedding):
                 "arcface.onnx": "https://github.com/NickM-27/facenet-onnx/releases/download/v1.0/arcface.onnx",
             },
         )
+        self.config = config
         self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)
         self.tokenizer = None
         self.feature_extractor = None
@@ -148,7 +150,7 @@ class ArcfaceEmbedding(BaseEmbedding):
 
             self.runner = ONNXModelRunner(
                 os.path.join(self.download_path, self.model_file),
-                "GPU",
+                device=self.config.device or "GPU",
             )
 
     def _preprocess_inputs(self, raw_inputs):
diff --git a/frigate/embeddings/onnx/jina_v1_embedding.py b/frigate/embeddings/onnx/jina_v1_embedding.py
index b448ec816..d327fa8ba 100644
--- a/frigate/embeddings/onnx/jina_v1_embedding.py
+++ b/frigate/embeddings/onnx/jina_v1_embedding.py
@@ -128,7 +128,6 @@ class JinaV1TextEmbedding(BaseEmbedding):
             self.runner = ONNXModelRunner(
                 os.path.join(self.download_path, self.model_file),
                 self.device,
-                self.model_size,
             )
 
     def _preprocess_inputs(self, raw_inputs):
@@ -207,7 +206,6 @@ class JinaV1ImageEmbedding(BaseEmbedding):
             self.runner = ONNXModelRunner(
                 os.path.join(self.download_path, self.model_file),
                 self.device,
-                self.model_size,
             )
 
     def _preprocess_inputs(self, raw_inputs):
diff --git a/frigate/embeddings/onnx/jina_v2_embedding.py b/frigate/embeddings/onnx/jina_v2_embedding.py
index e9def9a07..50b503d76 100644
--- a/frigate/embeddings/onnx/jina_v2_embedding.py
+++ b/frigate/embeddings/onnx/jina_v2_embedding.py
@@ -128,7 +128,6 @@ class JinaV2Embedding(BaseEmbedding):
             self.runner = ONNXModelRunner(
                 os.path.join(self.download_path, self.model_file),
                 self.device,
-                self.model_size,
             )
 
     def _preprocess_image(self, image_data: bytes | Image.Image) -> np.ndarray: