write prompts for genai at the camera level (#13767)

This commit is contained in:
Josh Hawkins 2024-09-16 09:46:11 -05:00 committed by GitHub
parent 06ccf7e9e9
commit e3edcf057c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 46 additions and 10 deletions

View File

@ -124,13 +124,25 @@ genai:
model: llava model: llava
prompt: "Describe the {label} in these images from the {camera} security camera." prompt: "Describe the {label} in these images from the {camera} security camera."
object_prompts: object_prompts:
person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from." person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc)."
car: "Label the primary vehicle in these images with just the name of the company if it is a delivery vehicle, or the color make and model." car: "Label the primary vehicle in these images with just the name of the company if it is a delivery vehicle, or the color make and model."
``` ```
Prompts can also be overriden at the camera level to provide a more detailed prompt to the model about your specific camera, if you desire.
```yaml
cameras:
front_door:
genai:
prompt: "Describe the {label} in these images from the {camera} security camera at the front door of a house, aimed outward toward the street."
object_prompts:
person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from."
cat: "Describe the cat in these images (color, size, tail). Indicate whether or not the cat is by the flower pots. If the cat is chasing a mouse, make up a name for the mouse."
```
### Experiment with prompts ### Experiment with prompts
Providers also has a public facing chat interface for their models. Download a couple different thumbnails or snapshots from Frigate and try new things in the playground to get descriptions to your liking before updating the prompt in Frigate. Many providers also have a public facing chat interface for their models. Download a couple of different thumbnails or snapshots from Frigate and try new things in the playground to get descriptions to your liking before updating the prompt in Frigate.
- OpenAI - [ChatGPT](https://chatgpt.com) - OpenAI - [ChatGPT](https://chatgpt.com)
- Gemini - [Google AI Studio](https://aistudio.google.com) - Gemini - [Google AI Studio](https://aistudio.google.com)

View File

@ -504,7 +504,7 @@ semantic_search:
# to Google or OpenAI's LLMs to generate descriptions. It can be overridden at # to Google or OpenAI's LLMs to generate descriptions. It can be overridden at
# the camera level (enabled: False) to enhance privacy for indoor cameras. # the camera level (enabled: False) to enhance privacy for indoor cameras.
genai: genai:
# Optional: Enable Google Gemini description generation (default: shown below) # Optional: Enable AI description generation (default: shown below)
enabled: False enabled: False
# Required if enabled: Provider must be one of ollama, gemini, or openai # Required if enabled: Provider must be one of ollama, gemini, or openai
provider: ollama provider: ollama
@ -712,6 +712,18 @@ cameras:
# By default the cameras are sorted alphabetically. # By default the cameras are sorted alphabetically.
order: 0 order: 0
# Optional: Configuration for AI generated tracked object descriptions
genai:
# Optional: Enable AI description generation (default: shown below)
enabled: False
# Optional: The default prompt for generating descriptions. Can use replacement
# variables like "label", "sub_label", "camera" to make more dynamic. (default: shown below)
prompt: "Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background."
# Optional: Object specific prompts to customize description results
# Format: {label}: {prompt}
object_prompts:
person: "My special person prompt."
# Optional # Optional
ui: ui:
# Optional: Set a timezone to use in the UI (default: use browser local time) # Optional: Set a timezone to use in the UI (default: use browser local time)

View File

@ -763,8 +763,14 @@ class GenAIConfig(FrigateBaseModel):
object_prompts: Dict[str, str] = Field(default={}, title="Object specific prompts.") object_prompts: Dict[str, str] = Field(default={}, title="Object specific prompts.")
class GenAICameraConfig(FrigateBaseModel): # uses BaseModel because some global attributes are not available at the camera level
class GenAICameraConfig(BaseModel):
enabled: bool = Field(default=False, title="Enable GenAI for camera.") enabled: bool = Field(default=False, title="Enable GenAI for camera.")
prompt: str = Field(
default="Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background.",
title="Default caption prompt.",
)
object_prompts: Dict[str, str] = Field(default={}, title="Object specific prompts.")
class AudioConfig(FrigateBaseModel): class AudioConfig(FrigateBaseModel):
@ -1520,7 +1526,7 @@ class FrigateConfig(FrigateBaseModel):
"live": ..., "live": ...,
"objects": ..., "objects": ...,
"review": ..., "review": ...,
"genai": {"enabled"}, "genai": ...,
"motion": ..., "motion": ...,
"detect": ..., "detect": ...,
"ffmpeg": ..., "ffmpeg": ...,

View File

@ -171,8 +171,11 @@ class EmbeddingMaintainer(threading.Thread):
self, event: Event, thumbnails: list[bytes], metadata: dict self, event: Event, thumbnails: list[bytes], metadata: dict
) -> None: ) -> None:
"""Embed the description for an event.""" """Embed the description for an event."""
camera_config = self.config.cameras[event.camera]
description = self.genai_client.generate_description(thumbnails, metadata) description = self.genai_client.generate_description(
camera_config, thumbnails, metadata
)
if description is None: if description is None:
logger.debug("Failed to generate description for %s", event.id) logger.debug("Failed to generate description for %s", event.id)

View File

@ -4,7 +4,7 @@ import importlib
import os import os
from typing import Optional from typing import Optional
from frigate.config import GenAIConfig, GenAIProviderEnum from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
PROVIDERS = {} PROVIDERS = {}
@ -28,11 +28,14 @@ class GenAIClient:
self.provider = self._init_provider() self.provider = self._init_provider()
def generate_description( def generate_description(
self, thumbnails: list[bytes], metadata: dict[str, any] self,
camera_config: CameraConfig,
thumbnails: list[bytes],
metadata: dict[str, any],
) -> Optional[str]: ) -> Optional[str]:
"""Generate a description for the frame.""" """Generate a description for the frame."""
prompt = self.genai_config.object_prompts.get( prompt = camera_config.genai.object_prompts.get(
metadata["label"], self.genai_config.prompt metadata["label"], camera_config.genai.prompt
).format(**metadata) ).format(**metadata)
return self._send(prompt, thumbnails) return self._send(prompt, thumbnails)