diff --git a/docs/docs/configuration/genai.md b/docs/docs/configuration/genai.md index 623cf588c..26954effe 100644 --- a/docs/docs/configuration/genai.md +++ b/docs/docs/configuration/genai.md @@ -124,13 +124,25 @@ genai: model: llava prompt: "Describe the {label} in these images from the {camera} security camera." object_prompts: - person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from." + person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc)." car: "Label the primary vehicle in these images with just the name of the company if it is a delivery vehicle, or the color make and model." ``` +Prompts can also be overriden at the camera level to provide a more detailed prompt to the model about your specific camera, if you desire. + +```yaml +cameras: + front_door: + genai: + prompt: "Describe the {label} in these images from the {camera} security camera at the front door of a house, aimed outward toward the street." + object_prompts: + person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from." + cat: "Describe the cat in these images (color, size, tail). Indicate whether or not the cat is by the flower pots. If the cat is chasing a mouse, make up a name for the mouse." +``` + ### Experiment with prompts -Providers also has a public facing chat interface for their models. Download a couple different thumbnails or snapshots from Frigate and try new things in the playground to get descriptions to your liking before updating the prompt in Frigate. +Many providers also have a public facing chat interface for their models. Download a couple of different thumbnails or snapshots from Frigate and try new things in the playground to get descriptions to your liking before updating the prompt in Frigate. - OpenAI - [ChatGPT](https://chatgpt.com) - Gemini - [Google AI Studio](https://aistudio.google.com) diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md index 1ae0739e7..020e59979 100644 --- a/docs/docs/configuration/reference.md +++ b/docs/docs/configuration/reference.md @@ -504,7 +504,7 @@ semantic_search: # to Google or OpenAI's LLMs to generate descriptions. It can be overridden at # the camera level (enabled: False) to enhance privacy for indoor cameras. genai: - # Optional: Enable Google Gemini description generation (default: shown below) + # Optional: Enable AI description generation (default: shown below) enabled: False # Required if enabled: Provider must be one of ollama, gemini, or openai provider: ollama @@ -712,6 +712,18 @@ cameras: # By default the cameras are sorted alphabetically. order: 0 + # Optional: Configuration for AI generated tracked object descriptions + genai: + # Optional: Enable AI description generation (default: shown below) + enabled: False + # Optional: The default prompt for generating descriptions. Can use replacement + # variables like "label", "sub_label", "camera" to make more dynamic. (default: shown below) + prompt: "Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background." + # Optional: Object specific prompts to customize description results + # Format: {label}: {prompt} + object_prompts: + person: "My special person prompt." + # Optional ui: # Optional: Set a timezone to use in the UI (default: use browser local time) diff --git a/frigate/config.py b/frigate/config.py index 8c66931c3..79d6c2343 100644 --- a/frigate/config.py +++ b/frigate/config.py @@ -763,8 +763,14 @@ class GenAIConfig(FrigateBaseModel): object_prompts: Dict[str, str] = Field(default={}, title="Object specific prompts.") -class GenAICameraConfig(FrigateBaseModel): +# uses BaseModel because some global attributes are not available at the camera level +class GenAICameraConfig(BaseModel): enabled: bool = Field(default=False, title="Enable GenAI for camera.") + prompt: str = Field( + default="Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background.", + title="Default caption prompt.", + ) + object_prompts: Dict[str, str] = Field(default={}, title="Object specific prompts.") class AudioConfig(FrigateBaseModel): @@ -1520,7 +1526,7 @@ class FrigateConfig(FrigateBaseModel): "live": ..., "objects": ..., "review": ..., - "genai": {"enabled"}, + "genai": ..., "motion": ..., "detect": ..., "ffmpeg": ..., diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index a60663e7d..8e4309d5e 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -171,8 +171,11 @@ class EmbeddingMaintainer(threading.Thread): self, event: Event, thumbnails: list[bytes], metadata: dict ) -> None: """Embed the description for an event.""" + camera_config = self.config.cameras[event.camera] - description = self.genai_client.generate_description(thumbnails, metadata) + description = self.genai_client.generate_description( + camera_config, thumbnails, metadata + ) if description is None: logger.debug("Failed to generate description for %s", event.id) diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py index 3761fa62f..afc783021 100644 --- a/frigate/genai/__init__.py +++ b/frigate/genai/__init__.py @@ -4,7 +4,7 @@ import importlib import os from typing import Optional -from frigate.config import GenAIConfig, GenAIProviderEnum +from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum PROVIDERS = {} @@ -28,11 +28,14 @@ class GenAIClient: self.provider = self._init_provider() def generate_description( - self, thumbnails: list[bytes], metadata: dict[str, any] + self, + camera_config: CameraConfig, + thumbnails: list[bytes], + metadata: dict[str, any], ) -> Optional[str]: """Generate a description for the frame.""" - prompt = self.genai_config.object_prompts.get( - metadata["label"], self.genai_config.prompt + prompt = camera_config.genai.object_prompts.get( + metadata["label"], camera_config.genai.prompt ).format(**metadata) return self._send(prompt, thumbnails)