write prompts for genai at the camera level (#13767)

2025-10-27 10:52:11 +01:00 · 2024-09-16 09:46:11 -05:00 · 2024-09-16 09:46:11 -05:00 · e3edcf057c
commit e3edcf057c
parent 06ccf7e9e9
5 changed files with 46 additions and 10 deletions
--- a/docs/docs/configuration/genai.md
+++ b/docs/docs/configuration/genai.md
@ -124,13 +124,25 @@ genai:
  model: llava
  prompt: "Describe the {label} in these images from the {camera} security camera."
  object_prompts:
-    person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from."
+    person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc)."
    car: "Label the primary vehicle in these images with just the name of the company if it is a delivery vehicle, or the color make and model."
 ```
 Prompts can also be overriden at the camera level to provide a more detailed prompt to the model about your specific camera, if you desire.
 ```yaml
 cameras:
  front_door:
    genai:
      prompt: "Describe the {label} in these images from the {camera} security camera at the front door of a house, aimed outward toward the street."
      object_prompts:
        person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from."
        cat: "Describe the cat in these images (color, size, tail). Indicate whether or not the cat is by the flower pots. If the cat is chasing a mouse, make up a name for the mouse."
 ```
 ### Experiment with prompts
-Providers also has a public facing chat interface for their models. Download a couple different thumbnails or snapshots from Frigate and try new things in the playground to get descriptions to your liking before updating the prompt in Frigate.
+Many providers also have a public facing chat interface for their models. Download a couple of different thumbnails or snapshots from Frigate and try new things in the playground to get descriptions to your liking before updating the prompt in Frigate.
 - OpenAI - [ChatGPT](https://chatgpt.com)
 - Gemini - [Google AI Studio](https://aistudio.google.com)
--- a/docs/docs/configuration/reference.md
+++ b/docs/docs/configuration/reference.md
@ -504,7 +504,7 @@ semantic_search:
 # to Google or OpenAI's LLMs to generate descriptions. It can be overridden at
 # the camera level (enabled: False) to enhance privacy for indoor cameras.
 genai:
-  # Optional: Enable Google Gemini description generation (default: shown below)
+  # Optional: Enable AI description generation (default: shown below)
  enabled: False
  # Required if enabled: Provider must be one of ollama, gemini, or openai
  provider: ollama
@ -712,6 +712,18 @@ cameras:
      # By default the cameras are sorted alphabetically.
      order: 0
    # Optional: Configuration for AI generated tracked object descriptions
    genai:
      # Optional: Enable AI description generation (default: shown below)
      enabled: False
      # Optional: The default prompt for generating descriptions. Can use replacement
      # variables like "label", "sub_label", "camera" to make more dynamic. (default: shown below)
      prompt: "Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background."
      # Optional: Object specific prompts to customize description results
      # Format: {label}: {prompt}
      object_prompts:
        person: "My special person prompt."
 # Optional
 ui:
  # Optional: Set a timezone to use in the UI (default: use browser local time)
--- a/frigate/config.py
+++ b/frigate/config.py
@ -763,8 +763,14 @@ class GenAIConfig(FrigateBaseModel):
    object_prompts: Dict[str, str] = Field(default={}, title="Object specific prompts.")
-class GenAICameraConfig(FrigateBaseModel):
+# uses BaseModel because some global attributes are not available at the camera level
 class GenAICameraConfig(BaseModel):
    enabled: bool = Field(default=False, title="Enable GenAI for camera.")
    prompt: str = Field(
        default="Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background.",
        title="Default caption prompt.",
    )
    object_prompts: Dict[str, str] = Field(default={}, title="Object specific prompts.")
 class AudioConfig(FrigateBaseModel):
@ -1520,7 +1526,7 @@ class FrigateConfig(FrigateBaseModel):
                "live": ...,
                "objects": ...,
                "review": ...,
-                "genai": {"enabled"},
+                "genai": ...,
                "motion": ...,
                "detect": ...,
                "ffmpeg": ...,
--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@ -171,8 +171,11 @@ class EmbeddingMaintainer(threading.Thread):
        self, event: Event, thumbnails: list[bytes], metadata: dict
    ) -> None:
        """Embed the description for an event."""
        camera_config = self.config.cameras[event.camera]
-        description = self.genai_client.generate_description(thumbnails, metadata)
+        description = self.genai_client.generate_description(
            camera_config, thumbnails, metadata
        )
        if description is None:
            logger.debug("Failed to generate description for %s", event.id)
--- a/frigate/genai/init.py
+++ b/frigate/genai/init.py
@ -4,7 +4,7 @@ import importlib
 import os
 from typing import Optional
-from frigate.config import GenAIConfig, GenAIProviderEnum
+from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
 PROVIDERS = {}
@ -28,11 +28,14 @@ class GenAIClient:
        self.provider = self._init_provider()
    def generate_description(
-        self, thumbnails: list[bytes], metadata: dict[str, any]
+        self,
        camera_config: CameraConfig,
        thumbnails: list[bytes],
        metadata: dict[str, any],
    ) -> Optional[str]:
        """Generate a description for the frame."""
-        prompt = self.genai_config.object_prompts.get(
+        prompt = camera_config.genai.object_prompts.get(
-            metadata["label"], self.genai_config.prompt
+            metadata["label"], camera_config.genai.prompt
        ).format(**metadata)
        return self._send(prompt, thumbnails)