diff --git a/docs/docs/configuration/genai.md b/docs/docs/configuration/genai.md index 874f45069..7041bb8eb 100644 --- a/docs/docs/configuration/genai.md +++ b/docs/docs/configuration/genai.md @@ -130,10 +130,13 @@ genai: Prompts can also be overriden at the camera level to provide a more detailed prompt to the model about your specific camera, if you desire. By default, descriptions will be generated for all tracked objects and all zones. But you can also optionally specify `objects` and `required_zones` to only generate descriptions for certain tracked objects or zones. +Optionally, you can generate the description using a snapshot (if enabled) by setting `use_snapshot` to `True`. By default, this is set to `False`, which sends the thumbnails collected over the object's lifetime to the model. Using a snapshot provides the AI with a higher-resolution image (typically downscaled by the AI itself), but the trade-off is that only a single image is used, which might limit the model's ability to determine object movement or direction. + ```yaml cameras: front_door: genai: + use_snapshot: True prompt: "Describe the {label} in these images from the {camera} security camera at the front door of a house, aimed outward toward the street." object_prompts: person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from." diff --git a/frigate/api/defs/regenerate_query_parameters.py b/frigate/api/defs/regenerate_query_parameters.py new file mode 100644 index 000000000..bcce47b1b --- /dev/null +++ b/frigate/api/defs/regenerate_query_parameters.py @@ -0,0 +1,9 @@ +from typing import Optional + +from pydantic import BaseModel + +from frigate.events.types import RegenerateDescriptionEnum + + +class RegenerateQueryParameters(BaseModel): + source: Optional[RegenerateDescriptionEnum] = RegenerateDescriptionEnum.thumbnails diff --git a/frigate/api/event.py b/frigate/api/event.py index 4e45b10de..3c861f901 100644 --- a/frigate/api/event.py +++ b/frigate/api/event.py @@ -31,6 +31,9 @@ from frigate.api.defs.events_query_parameters import ( EventsSearchQueryParams, EventsSummaryQueryParams, ) +from frigate.api.defs.regenerate_query_parameters import ( + RegenerateQueryParameters, +) from frigate.api.defs.tags import Tags from frigate.const import ( CLIPS_DIR, @@ -996,7 +999,9 @@ def set_description( @router.put("/events/{event_id}/description/regenerate") -def regenerate_description(request: Request, event_id: str): +def regenerate_description( + request: Request, event_id: str, params: RegenerateQueryParameters = Depends() +): try: event: Event = Event.get(Event.id == event_id) except DoesNotExist: @@ -1009,7 +1014,7 @@ def regenerate_description(request: Request, event_id: str): request.app.frigate_config.semantic_search.enabled and request.app.frigate_config.genai.enabled ): - request.app.event_metadata_updater.publish(event.id) + request.app.event_metadata_updater.publish((event.id, params.source)) return JSONResponse( content=( @@ -1017,7 +1022,8 @@ def regenerate_description(request: Request, event_id: str): "success": True, "message": "Event " + event_id - + " description regeneration has been requested.", + + " description regeneration has been requested using " + + params.source, } ), status_code=200, diff --git a/frigate/comms/event_metadata_updater.py b/frigate/comms/event_metadata_updater.py index d435b149e..aeede6d8e 100644 --- a/frigate/comms/event_metadata_updater.py +++ b/frigate/comms/event_metadata_updater.py @@ -4,6 +4,8 @@ import logging from enum import Enum from typing import Optional +from frigate.events.types import RegenerateDescriptionEnum + from .zmq_proxy import Publisher, Subscriber logger = logging.getLogger(__name__) @@ -23,6 +25,9 @@ class EventMetadataPublisher(Publisher): topic = topic.value super().__init__(topic) + def publish(self, payload: tuple[str, RegenerateDescriptionEnum]) -> None: + super().publish(payload) + class EventMetadataSubscriber(Subscriber): """Simplifies receiving event metadata.""" @@ -35,10 +40,12 @@ class EventMetadataSubscriber(Subscriber): def check_for_update( self, timeout: float = None - ) -> Optional[tuple[EventMetadataTypeEnum, any]]: + ) -> Optional[tuple[EventMetadataTypeEnum, str, RegenerateDescriptionEnum]]: return super().check_for_update(timeout) def _return_object(self, topic: str, payload: any) -> any: if payload is None: - return (None, None) - return (EventMetadataTypeEnum[topic[len(self.topic_base) :]], payload) + return (None, None, None) + topic = EventMetadataTypeEnum[topic[len(self.topic_base) :]] + event_id, source = payload + return (topic, event_id, RegenerateDescriptionEnum(source)) diff --git a/frigate/config/camera/genai.py b/frigate/config/camera/genai.py index 736974535..12ff37adb 100644 --- a/frigate/config/camera/genai.py +++ b/frigate/config/camera/genai.py @@ -18,6 +18,9 @@ class GenAIProviderEnum(str, Enum): # uses BaseModel because some global attributes are not available at the camera level class GenAICameraConfig(BaseModel): enabled: bool = Field(default=False, title="Enable GenAI for camera.") + use_snapshot: bool = Field( + default=False, title="Use snapshots for generating descriptions." + ) prompt: str = Field( default="Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background.", title="Default caption prompt.", diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index cbe4554ce..ab60bd5ba 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -3,6 +3,7 @@ import base64 import io import logging +import os import threading from multiprocessing.synchronize import Event as MpEvent from typing import Optional @@ -19,7 +20,7 @@ from frigate.comms.event_metadata_updater import ( from frigate.comms.events_updater import EventEndSubscriber, EventUpdateSubscriber from frigate.comms.inter_process import InterProcessRequestor from frigate.config import FrigateConfig -from frigate.const import UPDATE_EVENT_DESCRIPTION +from frigate.const import CLIPS_DIR, UPDATE_EVENT_DESCRIPTION from frigate.events.types import EventTypeEnum from frigate.genai import get_genai_client from frigate.models import Event @@ -136,6 +137,41 @@ class EmbeddingMaintainer(threading.Thread): or set(event.zones) & set(camera_config.genai.required_zones) ) ): + if event.has_snapshot and camera_config.genai.use_snapshot: + with open( + os.path.join(CLIPS_DIR, f"{event.camera}-{event.id}.jpg"), + "rb", + ) as image_file: + snapshot_image = image_file.read() + + img = cv2.imdecode( + np.frombuffer(snapshot_image, dtype=np.int8), + cv2.IMREAD_COLOR, + ) + + # crop snapshot based on region before sending off to genai + height, width = img.shape[:2] + x1_rel, y1_rel, width_rel, height_rel = event.data["region"] + + x1, y1 = int(x1_rel * width), int(y1_rel * height) + cropped_image = img[ + y1 : y1 + int(height_rel * height), + x1 : x1 + int(width_rel * width), + ] + + _, buffer = cv2.imencode(".jpg", cropped_image) + snapshot_image = buffer.tobytes() + + embed_image = ( + [snapshot_image] + if event.has_snapshot and camera_config.genai.use_snapshot + else ( + [thumbnail for data in self.tracked_events[event_id]] + if len(self.tracked_events.get(event_id, [])) > 0 + else [thumbnail] + ) + ) + # Generate the description. Call happens in a thread since it is network bound. threading.Thread( target=self._embed_description, @@ -143,12 +179,7 @@ class EmbeddingMaintainer(threading.Thread): daemon=True, args=( event, - [ - data["thumbnail"] - for data in self.tracked_events[event_id] - ] - if len(self.tracked_events.get(event_id, [])) > 0 - else [thumbnail], + embed_image, metadata, ), ).start() @@ -159,13 +190,15 @@ class EmbeddingMaintainer(threading.Thread): def _process_event_metadata(self): # Check for regenerate description requests - (topic, event_id) = self.event_metadata_subscriber.check_for_update(timeout=1) + (topic, event_id, source) = self.event_metadata_subscriber.check_for_update( + timeout=1 + ) if topic is None: return if event_id: - self.handle_regenerate_description(event_id) + self.handle_regenerate_description(event_id, source) def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]: """Return jpg thumbnail of a region of the frame.""" @@ -228,7 +261,7 @@ class EmbeddingMaintainer(threading.Thread): description, ) - def handle_regenerate_description(self, event_id: str) -> None: + def handle_regenerate_description(self, event_id: str, source: str) -> None: try: event: Event = Event.get(Event.id == event_id) except DoesNotExist: @@ -243,4 +276,38 @@ class EmbeddingMaintainer(threading.Thread): metadata = get_metadata(event) thumbnail = base64.b64decode(event.thumbnail) - self._embed_description(event, [thumbnail], metadata) + logger.debug(f"Using ${source} regeneration for ${event}") + + if event.has_snapshot and source == "snapshot": + with open( + os.path.join(CLIPS_DIR, f"{event.camera}-{event.id}.jpg"), + "rb", + ) as image_file: + snapshot_image = image_file.read() + img = cv2.imdecode( + np.frombuffer(snapshot_image, dtype=np.int8), cv2.IMREAD_COLOR + ) + + # crop snapshot based on region before sending off to genai + height, width = img.shape[:2] + x1_rel, y1_rel, width_rel, height_rel = event.data["region"] + + x1, y1 = int(x1_rel * width), int(y1_rel * height) + cropped_image = img[ + y1 : y1 + int(height_rel * height), x1 : x1 + int(width_rel * width) + ] + + _, buffer = cv2.imencode(".jpg", cropped_image) + snapshot_image = buffer.tobytes() + + embed_image = ( + [snapshot_image] + if event.has_snapshot and source == "snapshot" + else ( + [thumbnail for data in self.tracked_events[event_id]] + if len(self.tracked_events.get(event_id, [])) > 0 + else [thumbnail] + ) + ) + + self._embed_description(event, embed_image, metadata) diff --git a/frigate/events/types.py b/frigate/events/types.py index 1750b3e7b..1461c1f28 100644 --- a/frigate/events/types.py +++ b/frigate/events/types.py @@ -12,3 +12,8 @@ class EventStateEnum(str, Enum): start = "start" update = "update" end = "end" + + +class RegenerateDescriptionEnum(str, Enum): + thumbnails = "thumbnails" + snapshot = "snapshot" diff --git a/web/src/components/overlay/detail/SearchDetailDialog.tsx b/web/src/components/overlay/detail/SearchDetailDialog.tsx index 6b9de06db..ac4b80e46 100644 --- a/web/src/components/overlay/detail/SearchDetailDialog.tsx +++ b/web/src/components/overlay/detail/SearchDetailDialog.tsx @@ -27,7 +27,13 @@ import { baseUrl } from "@/api/baseUrl"; import { cn } from "@/lib/utils"; import ActivityIndicator from "@/components/indicators/activity-indicator"; import { ASPECT_VERTICAL_LAYOUT, ASPECT_WIDE_LAYOUT } from "@/types/record"; -import { FaHistory, FaImage, FaRegListAlt, FaVideo } from "react-icons/fa"; +import { + FaChevronDown, + FaHistory, + FaImage, + FaRegListAlt, + FaVideo, +} from "react-icons/fa"; import { FaRotate } from "react-icons/fa6"; import ObjectLifecycle from "./ObjectLifecycle"; import { @@ -47,6 +53,12 @@ import { useNavigate } from "react-router-dom"; import Chip from "@/components/indicators/Chip"; import { capitalizeFirstLetter } from "@/utils/stringUtil"; import useGlobalMutation from "@/hooks/use-global-mutate"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu"; const SEARCH_TABS = [ "details", @@ -309,33 +321,36 @@ function ObjectDetailsTab({ }); }, [desc, search, mutate]); - const regenerateDescription = useCallback(() => { - if (!search) { - return; - } + const regenerateDescription = useCallback( + (source: "snapshot" | "thumbnails") => { + if (!search) { + return; + } - axios - .put(`events/${search.id}/description/regenerate`) - .then((resp) => { - if (resp.status == 200) { - toast.success( - `A new description has been requested from ${capitalizeFirstLetter(config?.genai.provider ?? "Generative AI")}. Depending on the speed of your provider, the new description may take some time to regenerate.`, + axios + .put(`events/${search.id}/description/regenerate?source=${source}`) + .then((resp) => { + if (resp.status == 200) { + toast.success( + `A new description has been requested from ${capitalizeFirstLetter(config?.genai.provider ?? "Generative AI")}. Depending on the speed of your provider, the new description may take some time to regenerate.`, + { + position: "top-center", + duration: 7000, + }, + ); + } + }) + .catch(() => { + toast.error( + `Failed to call ${capitalizeFirstLetter(config?.genai.provider ?? "Generative AI")} for a new description`, { position: "top-center", - duration: 7000, }, ); - } - }) - .catch(() => { - toast.error( - `Failed to call ${capitalizeFirstLetter(config?.genai.provider ?? "Generative AI")} for a new description`, - { - position: "top-center", - }, - ); - }); - }, [search, config]); + }); + }, + [search, config], + ); return (