Enrichments: Allow targeting a specific GPU ID (#19342)

2025-12-21 20:06:12 +01:00 · 2025-08-18 17:43:53 -06:00 · 2025-08-18 17:43:53 -06:00 · 33f3ea3b59
commit 33f3ea3b59
parent 83e9ae616a
11 changed files with 43 additions and 19 deletions
--- a/docs/docs/configuration/face_recognition.md
+++ b/docs/docs/configuration/face_recognition.md
@ -73,6 +73,9 @@ Fine-tune face recognition with these optional parameters at the global level of
  - Default: `100`.
 - `blur_confidence_filter`: Enables a filter that calculates how blurry the face is and adjusts the confidence based on this.
  - Default: `True`.
+- `device`: Target a specific device to run the face recognition model on (multi-GPU installation).
+  - Default: `None`.
+  - Note: This setting is only applicable when using the `large` model. See [onnxruntime's provider options](https://onnxruntime.ai/docs/execution-providers/)

 ## Usage

--- a/docs/docs/configuration/license_plate_recognition.md
+++ b/docs/docs/configuration/license_plate_recognition.md
@ -67,9 +67,9 @@ Fine-tune the LPR feature using these optional parameters at the global level of
 - **`min_area`**: Defines the minimum area (in pixels) a license plate must be before recognition runs.
  - Default: `1000` pixels. Note: this is intentionally set very low as it is an _area_ measurement (length x width). For reference, 1000 pixels represents a ~32x32 pixel square in your camera image.
  - Depending on the resolution of your camera's `detect` stream, you can increase this value to ignore small or distant plates.
- **`device`**: Device to use to run license plate recognition models.
+- **`device`**: Device to use to run license plate detection *and* recognition models.
  - Default: `CPU`
-  - This can be `CPU` or `GPU`. For users without a model that detects license plates natively, using a GPU may increase performance of the models, especially the YOLOv9 license plate detector model. See the [Hardware Accelerated Enrichments](/configuration/hardware_acceleration_enrichments.md) documentation.
+  - This can be `CPU` or one of [onnxruntime's provider options](https://onnxruntime.ai/docs/execution-providers/). For users without a model that detects license plates natively, using a GPU may increase performance of the models, especially the YOLOv9 license plate detector model. See the [Hardware Accelerated Enrichments](/configuration/hardware_acceleration_enrichments.md) documentation.
 - **`model_size`**: The size of the model used to detect text on plates.
  - Default: `small`
  - This can be `small` or `large`. The `large` model uses an enhanced text detector and is more accurate at finding text on plates but slower than the `small` model. For most users, the small model is recommended. For users in countries with multiple lines of text on plates, the large model is recommended. Note that using the large model does not improve _text recognition_, but it may improve _text detection_.
--- a/docs/docs/configuration/reference.md
+++ b/docs/docs/configuration/reference.md
@ -592,6 +592,9 @@ semantic_search:
  # Optional: Set the model size used for embeddings. (default: shown below)
  # NOTE: small model runs on CPU and large model runs on GPU
  model_size: "small"
+  # Optional: Target a specific device to run the model (default: shown below)
+  # NOTE: See https://onnxruntime.ai/docs/execution-providers/ for more information
+  device: None

 # Optional: Configuration for face recognition capability
 # NOTE: enabled, min_area can be overridden at the camera level
@ -615,6 +618,9 @@ face_recognition:
  blur_confidence_filter: True
  # Optional: Set the model size used face recognition. (default: shown below)
  model_size: small
+  # Optional: Target a specific device to run the model (default: shown below)
+  # NOTE: See https://onnxruntime.ai/docs/execution-providers/ for more information
+  device: None

 # Optional: Configuration for license plate recognition capability
 # NOTE: enabled, min_area, and enhancement can be overridden at the camera level
@ -622,6 +628,7 @@ lpr:
  # Optional: Enable license plate recognition (default: shown below)
  enabled: False
  # Optional: The device to run the models on (default: shown below)
+  # NOTE: See https://onnxruntime.ai/docs/execution-providers/ for more information
  device: CPU
  # Optional: Set the model size used for text detection. (default: shown below)
  model_size: small
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@ -78,17 +78,21 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings

 ### GPU Acceleration

-The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used.
+The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.

 ```yaml
 semantic_search:
  enabled: True
  model_size: large
+  # Optional, if using the 'large' model in a multi-GPU installation
+  device: 0 
 ```

 :::info

-If the correct build is used for your GPU and the `large` model is configured, then the GPU will be detected and used automatically.
+If the correct build is used for your GPU and the `large` model is configured, then the GPU will be detected and used automatically. 
+Specify the `device` option to target a specific GPU in a multi-GPU system (see [onnxruntime's provider options](https://onnxruntime.ai/docs/execution-providers/)). 
+If you do not specify a device, the first available GPU will be used.

 See the [Hardware Accelerated Enrichments](/configuration/hardware_acceleration_enrichments.md) documentation.

--- a/frigate/config/classification.py
+++ b/frigate/config/classification.py
@ -130,6 +130,11 @@ class SemanticSearchConfig(FrigateBaseModel):
    model_size: str = Field(
        default="small", title="The size of the embeddings model used."
    )
+    device: Optional[str] = Field(
+        default=None,
+        title="The device key to use for semantic search.",
+        description="This is an override, to target a specific device. See https://onnxruntime.ai/docs/execution-providers/ for more information",
+    )


 class TriggerConfig(FrigateBaseModel):
@ -196,6 +201,11 @@ class FaceRecognitionConfig(FrigateBaseModel):
    blur_confidence_filter: bool = Field(
        default=True, title="Apply blur quality filter to face confidence."
    )
+    device: Optional[str] = Field(
+        default=None,
+        title="The device key to use for face recognition.",
+        description="This is an override, to target a specific device. See https://onnxruntime.ai/docs/execution-providers/ for more information",
+    )


 class CameraFaceRecognitionConfig(FrigateBaseModel):
@ -209,10 +219,6 @@ class CameraFaceRecognitionConfig(FrigateBaseModel):

 class LicensePlateRecognitionConfig(FrigateBaseModel):
    enabled: bool = Field(default=False, title="Enable license plate recognition.")
-    device: Optional[EnrichmentsDeviceEnum] = Field(
-        default=EnrichmentsDeviceEnum.CPU,
-        title="The device used for license plate recognition.",
-    )
    model_size: str = Field(
        default="small", title="The size of the embeddings model used."
    )
@ -258,6 +264,11 @@ class LicensePlateRecognitionConfig(FrigateBaseModel):
        default=False,
        title="Save plates captured for LPR for debugging purposes.",
    )
+    device: Optional[str] = Field(
+        default=None,
+        title="The device key to use for LPR.",
+        description="This is an override, to target a specific device. See https://onnxruntime.ai/docs/execution-providers/ for more information",
+    )


 class CameraLicensePlateRecognitionConfig(FrigateBaseModel):
--- a/frigate/data_processing/common/face/model.py
+++ b/frigate/data_processing/common/face/model.py
@ -269,7 +269,7 @@ class ArcFaceRecognizer(FaceRecognizer):
    def __init__(self, config: FrigateConfig):
        super().__init__(config)
        self.mean_embs: dict[int, np.ndarray] = {}
-        self.face_embedder: ArcfaceEmbedding = ArcfaceEmbedding()
+        self.face_embedder: ArcfaceEmbedding = ArcfaceEmbedding(config.face_recognition)
        self.model_builder_queue: queue.Queue | None = None

    def clear(self) -> None:
--- a/frigate/data_processing/real_time/face.py
+++ b/frigate/data_processing/real_time/face.py
@ -171,7 +171,7 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):

        # don't run for non person objects
        if obj_data.get("label") != "person":
-            logger.debug("Not a processing face for non person object.")
+            logger.debug("Not processing face for a non person object.")
            return

        # don't overwrite sub label for objects that have a sub label
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@ -112,9 +112,8 @@ class Embeddings:
            self.embedding = JinaV2Embedding(
                model_size=self.config.semantic_search.model_size,
                requestor=self.requestor,
-                device="GPU"
-                if self.config.semantic_search.model_size == "large"
-                else "CPU",
+                device=config.semantic_search.device
+                or ("GPU" if config.semantic_search.model_size == "large" else "CPU"),
            )
            self.text_embedding = lambda input_data: self.embedding(
                input_data, embedding_type="text"
@ -131,7 +130,8 @@ class Embeddings:
            self.vision_embedding = JinaV1ImageEmbedding(
                model_size=config.semantic_search.model_size,
                requestor=self.requestor,
-                device="GPU" if config.semantic_search.model_size == "large" else "CPU",
+                device=config.semantic_search.device
+                or ("GPU" if config.semantic_search.model_size == "large" else "CPU"),
            )

    def update_stats(self) -> None:
--- a/frigate/embeddings/onnx/face_embedding.py
+++ b/frigate/embeddings/onnx/face_embedding.py
@ -9,6 +9,7 @@ from frigate.const import MODEL_CACHE_DIR
 from frigate.log import redirect_output_to_logger
 from frigate.util.downloader import ModelDownloader

+from ...config import FaceRecognitionConfig
 from .base_embedding import BaseEmbedding
 from .runner import ONNXModelRunner

@ -111,7 +112,7 @@ class FaceNetEmbedding(BaseEmbedding):


 class ArcfaceEmbedding(BaseEmbedding):
-    def __init__(self):
+    def __init__(self, config: FaceRecognitionConfig):
        super().__init__(
            model_name="facedet",
            model_file="arcface.onnx",
@ -119,6 +120,7 @@ class ArcfaceEmbedding(BaseEmbedding):
                "arcface.onnx": "https://github.com/NickM-27/facenet-onnx/releases/download/v1.0/arcface.onnx",
            },
        )
+        self.config = config
        self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)
        self.tokenizer = None
        self.feature_extractor = None
@ -148,7 +150,7 @@ class ArcfaceEmbedding(BaseEmbedding):

            self.runner = ONNXModelRunner(
                os.path.join(self.download_path, self.model_file),
-                "GPU",
+                device=self.config.device or "GPU",
            )

    def _preprocess_inputs(self, raw_inputs):
--- a/frigate/embeddings/onnx/jina_v1_embedding.py
+++ b/frigate/embeddings/onnx/jina_v1_embedding.py
@ -128,7 +128,6 @@ class JinaV1TextEmbedding(BaseEmbedding):
            self.runner = ONNXModelRunner(
                os.path.join(self.download_path, self.model_file),
                self.device,
-                self.model_size,
            )

    def _preprocess_inputs(self, raw_inputs):
@ -207,7 +206,6 @@ class JinaV1ImageEmbedding(BaseEmbedding):
            self.runner = ONNXModelRunner(
                os.path.join(self.download_path, self.model_file),
                self.device,
-                self.model_size,
            )

    def _preprocess_inputs(self, raw_inputs):
--- a/frigate/embeddings/onnx/jina_v2_embedding.py
+++ b/frigate/embeddings/onnx/jina_v2_embedding.py
@ -128,7 +128,6 @@ class JinaV2Embedding(BaseEmbedding):
            self.runner = ONNXModelRunner(
                os.path.join(self.download_path, self.model_file),
                self.device,
-                self.model_size,
            )

    def _preprocess_image(self, image_data: bytes | Image.Image) -> np.ndarray: