Add ability to configure genai to use snapshot instead of thumbnails (#14077)

* Allow embedding of snapshot for description via config option * docs * frontend button * Backend * crop snapshot to region * only show dropdown when event has snapshot * fix cursor on dropdown * crop on initial generation as well * use enum for type * fix type
2024-11-21 19:07:46 +01:00 · 2024-09-30 16:54:53 -05:00 · 2024-09-30 16:54:53 -05:00 · 95d6da3111
commit 95d6da3111
parent 4dc4704bb4
8 changed files with 186 additions and 41 deletions
--- a/docs/docs/configuration/genai.md
+++ b/docs/docs/configuration/genai.md
@ -130,10 +130,13 @@ genai:

 Prompts can also be overriden at the camera level to provide a more detailed prompt to the model about your specific camera, if you desire. By default, descriptions will be generated for all tracked objects and all zones. But you can also optionally specify `objects` and `required_zones` to only generate descriptions for certain tracked objects or zones.

+Optionally, you can generate the description using a snapshot (if enabled) by setting `use_snapshot` to `True`. By default, this is set to `False`, which sends the thumbnails collected over the object's lifetime to the model. Using a snapshot provides the AI with a higher-resolution image (typically downscaled by the AI itself), but the trade-off is that only a single image is used, which might limit the model's ability to determine object movement or direction.
+
 ```yaml
 cameras:
  front_door:
    genai:
+      use_snapshot: True
      prompt: "Describe the {label} in these images from the {camera} security camera at the front door of a house, aimed outward toward the street."
      object_prompts:
        person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from."
--- a/frigate/api/defs/regenerate_query_parameters.py
+++ b/frigate/api/defs/regenerate_query_parameters.py
@ -0,0 +1,9 @@
+from typing import Optional
+
+from pydantic import BaseModel
+
+from frigate.events.types import RegenerateDescriptionEnum
+
+
+class RegenerateQueryParameters(BaseModel):
+    source: Optional[RegenerateDescriptionEnum] = RegenerateDescriptionEnum.thumbnails
--- a/frigate/api/event.py
+++ b/frigate/api/event.py
@ -31,6 +31,9 @@ from frigate.api.defs.events_query_parameters import (
    EventsSearchQueryParams,
    EventsSummaryQueryParams,
 )
+from frigate.api.defs.regenerate_query_parameters import (
+    RegenerateQueryParameters,
+)
 from frigate.api.defs.tags import Tags
 from frigate.const import (
    CLIPS_DIR,
@ -996,7 +999,9 @@ def set_description(


@router.put("/events/{event_id}/description/regenerate")
-def regenerate_description(request: Request, event_id: str):
+def regenerate_description(
+    request: Request, event_id: str, params: RegenerateQueryParameters = Depends()
+):
    try:
        event: Event = Event.get(Event.id == event_id)
    except DoesNotExist:
@ -1009,7 +1014,7 @@ def regenerate_description(request: Request, event_id: str):
        request.app.frigate_config.semantic_search.enabled
        and request.app.frigate_config.genai.enabled
    ):
-        request.app.event_metadata_updater.publish(event.id)
+        request.app.event_metadata_updater.publish((event.id, params.source))

        return JSONResponse(
            content=(
@ -1017,7 +1022,8 @@ def regenerate_description(request: Request, event_id: str):
                    "success": True,
                    "message": "Event "
                    + event_id
-                    + " description regeneration has been requested.",
+                    + " description regeneration has been requested using "
+                    + params.source,
                }
            ),
            status_code=200,
--- a/frigate/comms/event_metadata_updater.py
+++ b/frigate/comms/event_metadata_updater.py
@ -4,6 +4,8 @@ import logging
 from enum import Enum
 from typing import Optional

+from frigate.events.types import RegenerateDescriptionEnum
+
 from .zmq_proxy import Publisher, Subscriber

 logger = logging.getLogger(__name__)
@ -23,6 +25,9 @@ class EventMetadataPublisher(Publisher):
        topic = topic.value
        super().__init__(topic)

+    def publish(self, payload: tuple[str, RegenerateDescriptionEnum]) -> None:
+        super().publish(payload)
+

 class EventMetadataSubscriber(Subscriber):
    """Simplifies receiving event metadata."""
@ -35,10 +40,12 @@ class EventMetadataSubscriber(Subscriber):

    def check_for_update(
        self, timeout: float = None
-    ) -> Optional[tuple[EventMetadataTypeEnum, any]]:
+    ) -> Optional[tuple[EventMetadataTypeEnum, str, RegenerateDescriptionEnum]]:
        return super().check_for_update(timeout)

    def _return_object(self, topic: str, payload: any) -> any:
        if payload is None:
-            return (None, None)
-        return (EventMetadataTypeEnum[topic[len(self.topic_base) :]], payload)
+            return (None, None, None)
+        topic = EventMetadataTypeEnum[topic[len(self.topic_base) :]]
+        event_id, source = payload
+        return (topic, event_id, RegenerateDescriptionEnum(source))
--- a/frigate/config/camera/genai.py
+++ b/frigate/config/camera/genai.py
@ -18,6 +18,9 @@ class GenAIProviderEnum(str, Enum):
 # uses BaseModel because some global attributes are not available at the camera level
 class GenAICameraConfig(BaseModel):
    enabled: bool = Field(default=False, title="Enable GenAI for camera.")
+    use_snapshot: bool = Field(
+        default=False, title="Use snapshots for generating descriptions."
+    )
    prompt: str = Field(
        default="Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background.",
        title="Default caption prompt.",
--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@ -3,6 +3,7 @@
 import base64
 import io
 import logging
+import os
 import threading
 from multiprocessing.synchronize import Event as MpEvent
 from typing import Optional
@ -19,7 +20,7 @@ from frigate.comms.event_metadata_updater import (
 from frigate.comms.events_updater import EventEndSubscriber, EventUpdateSubscriber
 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.config import FrigateConfig
-from frigate.const import UPDATE_EVENT_DESCRIPTION
+from frigate.const import CLIPS_DIR, UPDATE_EVENT_DESCRIPTION
 from frigate.events.types import EventTypeEnum
 from frigate.genai import get_genai_client
 from frigate.models import Event
@ -136,6 +137,41 @@ class EmbeddingMaintainer(threading.Thread):
                        or set(event.zones) & set(camera_config.genai.required_zones)
                    )
                ):
+                    if event.has_snapshot and camera_config.genai.use_snapshot:
+                        with open(
+                            os.path.join(CLIPS_DIR, f"{event.camera}-{event.id}.jpg"),
+                            "rb",
+                        ) as image_file:
+                            snapshot_image = image_file.read()
+
+                            img = cv2.imdecode(
+                                np.frombuffer(snapshot_image, dtype=np.int8),
+                                cv2.IMREAD_COLOR,
+                            )
+
+                            # crop snapshot based on region before sending off to genai
+                            height, width = img.shape[:2]
+                            x1_rel, y1_rel, width_rel, height_rel = event.data["region"]
+
+                            x1, y1 = int(x1_rel * width), int(y1_rel * height)
+                            cropped_image = img[
+                                y1 : y1 + int(height_rel * height),
+                                x1 : x1 + int(width_rel * width),
+                            ]
+
+                            _, buffer = cv2.imencode(".jpg", cropped_image)
+                            snapshot_image = buffer.tobytes()
+
+                    embed_image = (
+                        [snapshot_image]
+                        if event.has_snapshot and camera_config.genai.use_snapshot
+                        else (
+                            [thumbnail for data in self.tracked_events[event_id]]
+                            if len(self.tracked_events.get(event_id, [])) > 0
+                            else [thumbnail]
+                        )
+                    )
+
                    # Generate the description. Call happens in a thread since it is network bound.
                    threading.Thread(
                        target=self._embed_description,
@ -143,12 +179,7 @@ class EmbeddingMaintainer(threading.Thread):
                        daemon=True,
                        args=(
                            event,
-                            [
-                                data["thumbnail"]
-                                for data in self.tracked_events[event_id]
-                            ]
-                            if len(self.tracked_events.get(event_id, [])) > 0
-                            else [thumbnail],
+                            embed_image,
                            metadata,
                        ),
                    ).start()
@ -159,13 +190,15 @@ class EmbeddingMaintainer(threading.Thread):

    def _process_event_metadata(self):
        # Check for regenerate description requests
-        (topic, event_id) = self.event_metadata_subscriber.check_for_update(timeout=1)
+        (topic, event_id, source) = self.event_metadata_subscriber.check_for_update(
+            timeout=1
+        )

        if topic is None:
            return

        if event_id:
-            self.handle_regenerate_description(event_id)
+            self.handle_regenerate_description(event_id, source)

    def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]:
        """Return jpg thumbnail of a region of the frame."""
@ -228,7 +261,7 @@ class EmbeddingMaintainer(threading.Thread):
            description,
        )

-    def handle_regenerate_description(self, event_id: str) -> None:
+    def handle_regenerate_description(self, event_id: str, source: str) -> None:
        try:
            event: Event = Event.get(Event.id == event_id)
        except DoesNotExist:
@ -243,4 +276,38 @@ class EmbeddingMaintainer(threading.Thread):
        metadata = get_metadata(event)
        thumbnail = base64.b64decode(event.thumbnail)

-        self._embed_description(event, [thumbnail], metadata)
+        logger.debug(f"Using ${source} regeneration for ${event}")
+
+        if event.has_snapshot and source == "snapshot":
+            with open(
+                os.path.join(CLIPS_DIR, f"{event.camera}-{event.id}.jpg"),
+                "rb",
+            ) as image_file:
+                snapshot_image = image_file.read()
+                img = cv2.imdecode(
+                    np.frombuffer(snapshot_image, dtype=np.int8), cv2.IMREAD_COLOR
+                )
+
+                # crop snapshot based on region before sending off to genai
+                height, width = img.shape[:2]
+                x1_rel, y1_rel, width_rel, height_rel = event.data["region"]
+
+                x1, y1 = int(x1_rel * width), int(y1_rel * height)
+                cropped_image = img[
+                    y1 : y1 + int(height_rel * height), x1 : x1 + int(width_rel * width)
+                ]
+
+                _, buffer = cv2.imencode(".jpg", cropped_image)
+                snapshot_image = buffer.tobytes()
+
+        embed_image = (
+            [snapshot_image]
+            if event.has_snapshot and source == "snapshot"
+            else (
+                [thumbnail for data in self.tracked_events[event_id]]
+                if len(self.tracked_events.get(event_id, [])) > 0
+                else [thumbnail]
+            )
+        )
+
+        self._embed_description(event, embed_image, metadata)
--- a/frigate/events/types.py
+++ b/frigate/events/types.py
@ -12,3 +12,8 @@ class EventStateEnum(str, Enum):
    start = "start"
    update = "update"
    end = "end"
+
+
+class RegenerateDescriptionEnum(str, Enum):
+    thumbnails = "thumbnails"
+    snapshot = "snapshot"
--- a/web/src/components/overlay/detail/SearchDetailDialog.tsx
+++ b/web/src/components/overlay/detail/SearchDetailDialog.tsx
@ -27,7 +27,13 @@ import { baseUrl } from "@/api/baseUrl";
 import { cn } from "@/lib/utils";
 import ActivityIndicator from "@/components/indicators/activity-indicator";
 import { ASPECT_VERTICAL_LAYOUT, ASPECT_WIDE_LAYOUT } from "@/types/record";
-import { FaHistory, FaImage, FaRegListAlt, FaVideo } from "react-icons/fa";
+import {
+  FaChevronDown,
+  FaHistory,
+  FaImage,
+  FaRegListAlt,
+  FaVideo,
+} from "react-icons/fa";
 import { FaRotate } from "react-icons/fa6";
 import ObjectLifecycle from "./ObjectLifecycle";
 import {
@ -47,6 +53,12 @@ import { useNavigate } from "react-router-dom";
 import Chip from "@/components/indicators/Chip";
 import { capitalizeFirstLetter } from "@/utils/stringUtil";
 import useGlobalMutation from "@/hooks/use-global-mutate";
+import {
+  DropdownMenu,
+  DropdownMenuContent,
+  DropdownMenuItem,
+  DropdownMenuTrigger,
+} from "@/components/ui/dropdown-menu";

 const SEARCH_TABS = [
  "details",
@ -309,13 +321,14 @@ function ObjectDetailsTab({
      });
  }, [desc, search, mutate]);

-  const regenerateDescription = useCallback(() => {
+  const regenerateDescription = useCallback(
+    (source: "snapshot" | "thumbnails") => {
      if (!search) {
        return;
      }

      axios
-      .put(`events/${search.id}/description/regenerate`)
+        .put(`events/${search.id}/description/regenerate?source=${source}`)
        .then((resp) => {
          if (resp.status == 200) {
            toast.success(
@ -335,7 +348,9 @@ function ObjectDetailsTab({
            },
          );
        });
-  }, [search, config]);
+    },
+    [search, config],
+  );

  return (
    <div className="flex flex-col gap-5">
@ -403,7 +418,37 @@ function ObjectDetailsTab({
        />
        <div className="flex w-full flex-row justify-end gap-2">
          {config?.genai.enabled && (
-            <Button onClick={regenerateDescription}>Regenerate</Button>
+            <div className="flex items-center">
+              <Button
+                className="rounded-r-none border-r-0"
+                onClick={() => regenerateDescription("thumbnails")}
+              >
+                Regenerate
+              </Button>
+              {search.has_snapshot && (
+                <DropdownMenu>
+                  <DropdownMenuTrigger asChild>
+                    <Button className="rounded-l-none border-l-0 px-2">
+                      <FaChevronDown className="size-3" />
+                    </Button>
+                  </DropdownMenuTrigger>
+                  <DropdownMenuContent>
+                    <DropdownMenuItem
+                      className="cursor-pointer"
+                      onClick={() => regenerateDescription("snapshot")}
+                    >
+                      Regenerate from Snapshot
+                    </DropdownMenuItem>
+                    <DropdownMenuItem
+                      className="cursor-pointer"
+                      onClick={() => regenerateDescription("thumbnails")}
+                    >
+                      Regenerate from Thumbnails
+                    </DropdownMenuItem>
+                  </DropdownMenuContent>
+                </DropdownMenu>
+              )}
+            </div>
          )}
          <Button variant="select" onClick={updateDescription}>
            Save