Add ability to configure genai to use snapshot instead of thumbnails (#14077)

* Allow embedding of snapshot for description via config option

* docs

* frontend button

* Backend

* crop snapshot to region

* only show dropdown when event has snapshot

* fix cursor on dropdown

* crop on initial generation as well

* use enum for type

* fix type
This commit is contained in:
Josh Hawkins 2024-09-30 16:54:53 -05:00 committed by GitHub
parent 4dc4704bb4
commit 95d6da3111
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 186 additions and 41 deletions

View File

@ -130,10 +130,13 @@ genai:
Prompts can also be overriden at the camera level to provide a more detailed prompt to the model about your specific camera, if you desire. By default, descriptions will be generated for all tracked objects and all zones. But you can also optionally specify `objects` and `required_zones` to only generate descriptions for certain tracked objects or zones.
Optionally, you can generate the description using a snapshot (if enabled) by setting `use_snapshot` to `True`. By default, this is set to `False`, which sends the thumbnails collected over the object's lifetime to the model. Using a snapshot provides the AI with a higher-resolution image (typically downscaled by the AI itself), but the trade-off is that only a single image is used, which might limit the model's ability to determine object movement or direction.
```yaml
cameras:
front_door:
genai:
use_snapshot: True
prompt: "Describe the {label} in these images from the {camera} security camera at the front door of a house, aimed outward toward the street."
object_prompts:
person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from."

View File

@ -0,0 +1,9 @@
from typing import Optional
from pydantic import BaseModel
from frigate.events.types import RegenerateDescriptionEnum
class RegenerateQueryParameters(BaseModel):
source: Optional[RegenerateDescriptionEnum] = RegenerateDescriptionEnum.thumbnails

View File

@ -31,6 +31,9 @@ from frigate.api.defs.events_query_parameters import (
EventsSearchQueryParams,
EventsSummaryQueryParams,
)
from frigate.api.defs.regenerate_query_parameters import (
RegenerateQueryParameters,
)
from frigate.api.defs.tags import Tags
from frigate.const import (
CLIPS_DIR,
@ -996,7 +999,9 @@ def set_description(
@router.put("/events/{event_id}/description/regenerate")
def regenerate_description(request: Request, event_id: str):
def regenerate_description(
request: Request, event_id: str, params: RegenerateQueryParameters = Depends()
):
try:
event: Event = Event.get(Event.id == event_id)
except DoesNotExist:
@ -1009,7 +1014,7 @@ def regenerate_description(request: Request, event_id: str):
request.app.frigate_config.semantic_search.enabled
and request.app.frigate_config.genai.enabled
):
request.app.event_metadata_updater.publish(event.id)
request.app.event_metadata_updater.publish((event.id, params.source))
return JSONResponse(
content=(
@ -1017,7 +1022,8 @@ def regenerate_description(request: Request, event_id: str):
"success": True,
"message": "Event "
+ event_id
+ " description regeneration has been requested.",
+ " description regeneration has been requested using "
+ params.source,
}
),
status_code=200,

View File

@ -4,6 +4,8 @@ import logging
from enum import Enum
from typing import Optional
from frigate.events.types import RegenerateDescriptionEnum
from .zmq_proxy import Publisher, Subscriber
logger = logging.getLogger(__name__)
@ -23,6 +25,9 @@ class EventMetadataPublisher(Publisher):
topic = topic.value
super().__init__(topic)
def publish(self, payload: tuple[str, RegenerateDescriptionEnum]) -> None:
super().publish(payload)
class EventMetadataSubscriber(Subscriber):
"""Simplifies receiving event metadata."""
@ -35,10 +40,12 @@ class EventMetadataSubscriber(Subscriber):
def check_for_update(
self, timeout: float = None
) -> Optional[tuple[EventMetadataTypeEnum, any]]:
) -> Optional[tuple[EventMetadataTypeEnum, str, RegenerateDescriptionEnum]]:
return super().check_for_update(timeout)
def _return_object(self, topic: str, payload: any) -> any:
if payload is None:
return (None, None)
return (EventMetadataTypeEnum[topic[len(self.topic_base) :]], payload)
return (None, None, None)
topic = EventMetadataTypeEnum[topic[len(self.topic_base) :]]
event_id, source = payload
return (topic, event_id, RegenerateDescriptionEnum(source))

View File

@ -18,6 +18,9 @@ class GenAIProviderEnum(str, Enum):
# uses BaseModel because some global attributes are not available at the camera level
class GenAICameraConfig(BaseModel):
enabled: bool = Field(default=False, title="Enable GenAI for camera.")
use_snapshot: bool = Field(
default=False, title="Use snapshots for generating descriptions."
)
prompt: str = Field(
default="Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background.",
title="Default caption prompt.",

View File

@ -3,6 +3,7 @@
import base64
import io
import logging
import os
import threading
from multiprocessing.synchronize import Event as MpEvent
from typing import Optional
@ -19,7 +20,7 @@ from frigate.comms.event_metadata_updater import (
from frigate.comms.events_updater import EventEndSubscriber, EventUpdateSubscriber
from frigate.comms.inter_process import InterProcessRequestor
from frigate.config import FrigateConfig
from frigate.const import UPDATE_EVENT_DESCRIPTION
from frigate.const import CLIPS_DIR, UPDATE_EVENT_DESCRIPTION
from frigate.events.types import EventTypeEnum
from frigate.genai import get_genai_client
from frigate.models import Event
@ -136,6 +137,41 @@ class EmbeddingMaintainer(threading.Thread):
or set(event.zones) & set(camera_config.genai.required_zones)
)
):
if event.has_snapshot and camera_config.genai.use_snapshot:
with open(
os.path.join(CLIPS_DIR, f"{event.camera}-{event.id}.jpg"),
"rb",
) as image_file:
snapshot_image = image_file.read()
img = cv2.imdecode(
np.frombuffer(snapshot_image, dtype=np.int8),
cv2.IMREAD_COLOR,
)
# crop snapshot based on region before sending off to genai
height, width = img.shape[:2]
x1_rel, y1_rel, width_rel, height_rel = event.data["region"]
x1, y1 = int(x1_rel * width), int(y1_rel * height)
cropped_image = img[
y1 : y1 + int(height_rel * height),
x1 : x1 + int(width_rel * width),
]
_, buffer = cv2.imencode(".jpg", cropped_image)
snapshot_image = buffer.tobytes()
embed_image = (
[snapshot_image]
if event.has_snapshot and camera_config.genai.use_snapshot
else (
[thumbnail for data in self.tracked_events[event_id]]
if len(self.tracked_events.get(event_id, [])) > 0
else [thumbnail]
)
)
# Generate the description. Call happens in a thread since it is network bound.
threading.Thread(
target=self._embed_description,
@ -143,12 +179,7 @@ class EmbeddingMaintainer(threading.Thread):
daemon=True,
args=(
event,
[
data["thumbnail"]
for data in self.tracked_events[event_id]
]
if len(self.tracked_events.get(event_id, [])) > 0
else [thumbnail],
embed_image,
metadata,
),
).start()
@ -159,13 +190,15 @@ class EmbeddingMaintainer(threading.Thread):
def _process_event_metadata(self):
# Check for regenerate description requests
(topic, event_id) = self.event_metadata_subscriber.check_for_update(timeout=1)
(topic, event_id, source) = self.event_metadata_subscriber.check_for_update(
timeout=1
)
if topic is None:
return
if event_id:
self.handle_regenerate_description(event_id)
self.handle_regenerate_description(event_id, source)
def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]:
"""Return jpg thumbnail of a region of the frame."""
@ -228,7 +261,7 @@ class EmbeddingMaintainer(threading.Thread):
description,
)
def handle_regenerate_description(self, event_id: str) -> None:
def handle_regenerate_description(self, event_id: str, source: str) -> None:
try:
event: Event = Event.get(Event.id == event_id)
except DoesNotExist:
@ -243,4 +276,38 @@ class EmbeddingMaintainer(threading.Thread):
metadata = get_metadata(event)
thumbnail = base64.b64decode(event.thumbnail)
self._embed_description(event, [thumbnail], metadata)
logger.debug(f"Using ${source} regeneration for ${event}")
if event.has_snapshot and source == "snapshot":
with open(
os.path.join(CLIPS_DIR, f"{event.camera}-{event.id}.jpg"),
"rb",
) as image_file:
snapshot_image = image_file.read()
img = cv2.imdecode(
np.frombuffer(snapshot_image, dtype=np.int8), cv2.IMREAD_COLOR
)
# crop snapshot based on region before sending off to genai
height, width = img.shape[:2]
x1_rel, y1_rel, width_rel, height_rel = event.data["region"]
x1, y1 = int(x1_rel * width), int(y1_rel * height)
cropped_image = img[
y1 : y1 + int(height_rel * height), x1 : x1 + int(width_rel * width)
]
_, buffer = cv2.imencode(".jpg", cropped_image)
snapshot_image = buffer.tobytes()
embed_image = (
[snapshot_image]
if event.has_snapshot and source == "snapshot"
else (
[thumbnail for data in self.tracked_events[event_id]]
if len(self.tracked_events.get(event_id, [])) > 0
else [thumbnail]
)
)
self._embed_description(event, embed_image, metadata)

View File

@ -12,3 +12,8 @@ class EventStateEnum(str, Enum):
start = "start"
update = "update"
end = "end"
class RegenerateDescriptionEnum(str, Enum):
thumbnails = "thumbnails"
snapshot = "snapshot"

View File

@ -27,7 +27,13 @@ import { baseUrl } from "@/api/baseUrl";
import { cn } from "@/lib/utils";
import ActivityIndicator from "@/components/indicators/activity-indicator";
import { ASPECT_VERTICAL_LAYOUT, ASPECT_WIDE_LAYOUT } from "@/types/record";
import { FaHistory, FaImage, FaRegListAlt, FaVideo } from "react-icons/fa";
import {
FaChevronDown,
FaHistory,
FaImage,
FaRegListAlt,
FaVideo,
} from "react-icons/fa";
import { FaRotate } from "react-icons/fa6";
import ObjectLifecycle from "./ObjectLifecycle";
import {
@ -47,6 +53,12 @@ import { useNavigate } from "react-router-dom";
import Chip from "@/components/indicators/Chip";
import { capitalizeFirstLetter } from "@/utils/stringUtil";
import useGlobalMutation from "@/hooks/use-global-mutate";
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu";
const SEARCH_TABS = [
"details",
@ -309,13 +321,14 @@ function ObjectDetailsTab({
});
}, [desc, search, mutate]);
const regenerateDescription = useCallback(() => {
const regenerateDescription = useCallback(
(source: "snapshot" | "thumbnails") => {
if (!search) {
return;
}
axios
.put(`events/${search.id}/description/regenerate`)
.put(`events/${search.id}/description/regenerate?source=${source}`)
.then((resp) => {
if (resp.status == 200) {
toast.success(
@ -335,7 +348,9 @@ function ObjectDetailsTab({
},
);
});
}, [search, config]);
},
[search, config],
);
return (
<div className="flex flex-col gap-5">
@ -403,7 +418,37 @@ function ObjectDetailsTab({
/>
<div className="flex w-full flex-row justify-end gap-2">
{config?.genai.enabled && (
<Button onClick={regenerateDescription}>Regenerate</Button>
<div className="flex items-center">
<Button
className="rounded-r-none border-r-0"
onClick={() => regenerateDescription("thumbnails")}
>
Regenerate
</Button>
{search.has_snapshot && (
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button className="rounded-l-none border-l-0 px-2">
<FaChevronDown className="size-3" />
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent>
<DropdownMenuItem
className="cursor-pointer"
onClick={() => regenerateDescription("snapshot")}
>
Regenerate from Snapshot
</DropdownMenuItem>
<DropdownMenuItem
className="cursor-pointer"
onClick={() => regenerateDescription("thumbnails")}
>
Regenerate from Thumbnails
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
)}
</div>
)}
<Button variant="select" onClick={updateDescription}>
Save