mirror of
https://github.com/blakeblackshear/frigate.git
synced 2024-11-30 19:09:13 +01:00
Add ability to configure genai to use snapshot instead of thumbnails (#14077)
* Allow embedding of snapshot for description via config option * docs * frontend button * Backend * crop snapshot to region * only show dropdown when event has snapshot * fix cursor on dropdown * crop on initial generation as well * use enum for type * fix type
This commit is contained in:
parent
4dc4704bb4
commit
95d6da3111
@ -130,10 +130,13 @@ genai:
|
|||||||
|
|
||||||
Prompts can also be overriden at the camera level to provide a more detailed prompt to the model about your specific camera, if you desire. By default, descriptions will be generated for all tracked objects and all zones. But you can also optionally specify `objects` and `required_zones` to only generate descriptions for certain tracked objects or zones.
|
Prompts can also be overriden at the camera level to provide a more detailed prompt to the model about your specific camera, if you desire. By default, descriptions will be generated for all tracked objects and all zones. But you can also optionally specify `objects` and `required_zones` to only generate descriptions for certain tracked objects or zones.
|
||||||
|
|
||||||
|
Optionally, you can generate the description using a snapshot (if enabled) by setting `use_snapshot` to `True`. By default, this is set to `False`, which sends the thumbnails collected over the object's lifetime to the model. Using a snapshot provides the AI with a higher-resolution image (typically downscaled by the AI itself), but the trade-off is that only a single image is used, which might limit the model's ability to determine object movement or direction.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
cameras:
|
cameras:
|
||||||
front_door:
|
front_door:
|
||||||
genai:
|
genai:
|
||||||
|
use_snapshot: True
|
||||||
prompt: "Describe the {label} in these images from the {camera} security camera at the front door of a house, aimed outward toward the street."
|
prompt: "Describe the {label} in these images from the {camera} security camera at the front door of a house, aimed outward toward the street."
|
||||||
object_prompts:
|
object_prompts:
|
||||||
person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from."
|
person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from."
|
||||||
|
9
frigate/api/defs/regenerate_query_parameters.py
Normal file
9
frigate/api/defs/regenerate_query_parameters.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from frigate.events.types import RegenerateDescriptionEnum
|
||||||
|
|
||||||
|
|
||||||
|
class RegenerateQueryParameters(BaseModel):
|
||||||
|
source: Optional[RegenerateDescriptionEnum] = RegenerateDescriptionEnum.thumbnails
|
@ -31,6 +31,9 @@ from frigate.api.defs.events_query_parameters import (
|
|||||||
EventsSearchQueryParams,
|
EventsSearchQueryParams,
|
||||||
EventsSummaryQueryParams,
|
EventsSummaryQueryParams,
|
||||||
)
|
)
|
||||||
|
from frigate.api.defs.regenerate_query_parameters import (
|
||||||
|
RegenerateQueryParameters,
|
||||||
|
)
|
||||||
from frigate.api.defs.tags import Tags
|
from frigate.api.defs.tags import Tags
|
||||||
from frigate.const import (
|
from frigate.const import (
|
||||||
CLIPS_DIR,
|
CLIPS_DIR,
|
||||||
@ -996,7 +999,9 @@ def set_description(
|
|||||||
|
|
||||||
|
|
||||||
@router.put("/events/{event_id}/description/regenerate")
|
@router.put("/events/{event_id}/description/regenerate")
|
||||||
def regenerate_description(request: Request, event_id: str):
|
def regenerate_description(
|
||||||
|
request: Request, event_id: str, params: RegenerateQueryParameters = Depends()
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
event: Event = Event.get(Event.id == event_id)
|
event: Event = Event.get(Event.id == event_id)
|
||||||
except DoesNotExist:
|
except DoesNotExist:
|
||||||
@ -1009,7 +1014,7 @@ def regenerate_description(request: Request, event_id: str):
|
|||||||
request.app.frigate_config.semantic_search.enabled
|
request.app.frigate_config.semantic_search.enabled
|
||||||
and request.app.frigate_config.genai.enabled
|
and request.app.frigate_config.genai.enabled
|
||||||
):
|
):
|
||||||
request.app.event_metadata_updater.publish(event.id)
|
request.app.event_metadata_updater.publish((event.id, params.source))
|
||||||
|
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
content=(
|
content=(
|
||||||
@ -1017,7 +1022,8 @@ def regenerate_description(request: Request, event_id: str):
|
|||||||
"success": True,
|
"success": True,
|
||||||
"message": "Event "
|
"message": "Event "
|
||||||
+ event_id
|
+ event_id
|
||||||
+ " description regeneration has been requested.",
|
+ " description regeneration has been requested using "
|
||||||
|
+ params.source,
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
status_code=200,
|
status_code=200,
|
||||||
|
@ -4,6 +4,8 @@ import logging
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
from frigate.events.types import RegenerateDescriptionEnum
|
||||||
|
|
||||||
from .zmq_proxy import Publisher, Subscriber
|
from .zmq_proxy import Publisher, Subscriber
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -23,6 +25,9 @@ class EventMetadataPublisher(Publisher):
|
|||||||
topic = topic.value
|
topic = topic.value
|
||||||
super().__init__(topic)
|
super().__init__(topic)
|
||||||
|
|
||||||
|
def publish(self, payload: tuple[str, RegenerateDescriptionEnum]) -> None:
|
||||||
|
super().publish(payload)
|
||||||
|
|
||||||
|
|
||||||
class EventMetadataSubscriber(Subscriber):
|
class EventMetadataSubscriber(Subscriber):
|
||||||
"""Simplifies receiving event metadata."""
|
"""Simplifies receiving event metadata."""
|
||||||
@ -35,10 +40,12 @@ class EventMetadataSubscriber(Subscriber):
|
|||||||
|
|
||||||
def check_for_update(
|
def check_for_update(
|
||||||
self, timeout: float = None
|
self, timeout: float = None
|
||||||
) -> Optional[tuple[EventMetadataTypeEnum, any]]:
|
) -> Optional[tuple[EventMetadataTypeEnum, str, RegenerateDescriptionEnum]]:
|
||||||
return super().check_for_update(timeout)
|
return super().check_for_update(timeout)
|
||||||
|
|
||||||
def _return_object(self, topic: str, payload: any) -> any:
|
def _return_object(self, topic: str, payload: any) -> any:
|
||||||
if payload is None:
|
if payload is None:
|
||||||
return (None, None)
|
return (None, None, None)
|
||||||
return (EventMetadataTypeEnum[topic[len(self.topic_base) :]], payload)
|
topic = EventMetadataTypeEnum[topic[len(self.topic_base) :]]
|
||||||
|
event_id, source = payload
|
||||||
|
return (topic, event_id, RegenerateDescriptionEnum(source))
|
||||||
|
@ -18,6 +18,9 @@ class GenAIProviderEnum(str, Enum):
|
|||||||
# uses BaseModel because some global attributes are not available at the camera level
|
# uses BaseModel because some global attributes are not available at the camera level
|
||||||
class GenAICameraConfig(BaseModel):
|
class GenAICameraConfig(BaseModel):
|
||||||
enabled: bool = Field(default=False, title="Enable GenAI for camera.")
|
enabled: bool = Field(default=False, title="Enable GenAI for camera.")
|
||||||
|
use_snapshot: bool = Field(
|
||||||
|
default=False, title="Use snapshots for generating descriptions."
|
||||||
|
)
|
||||||
prompt: str = Field(
|
prompt: str = Field(
|
||||||
default="Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background.",
|
default="Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background.",
|
||||||
title="Default caption prompt.",
|
title="Default caption prompt.",
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import threading
|
import threading
|
||||||
from multiprocessing.synchronize import Event as MpEvent
|
from multiprocessing.synchronize import Event as MpEvent
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@ -19,7 +20,7 @@ from frigate.comms.event_metadata_updater import (
|
|||||||
from frigate.comms.events_updater import EventEndSubscriber, EventUpdateSubscriber
|
from frigate.comms.events_updater import EventEndSubscriber, EventUpdateSubscriber
|
||||||
from frigate.comms.inter_process import InterProcessRequestor
|
from frigate.comms.inter_process import InterProcessRequestor
|
||||||
from frigate.config import FrigateConfig
|
from frigate.config import FrigateConfig
|
||||||
from frigate.const import UPDATE_EVENT_DESCRIPTION
|
from frigate.const import CLIPS_DIR, UPDATE_EVENT_DESCRIPTION
|
||||||
from frigate.events.types import EventTypeEnum
|
from frigate.events.types import EventTypeEnum
|
||||||
from frigate.genai import get_genai_client
|
from frigate.genai import get_genai_client
|
||||||
from frigate.models import Event
|
from frigate.models import Event
|
||||||
@ -136,6 +137,41 @@ class EmbeddingMaintainer(threading.Thread):
|
|||||||
or set(event.zones) & set(camera_config.genai.required_zones)
|
or set(event.zones) & set(camera_config.genai.required_zones)
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
|
if event.has_snapshot and camera_config.genai.use_snapshot:
|
||||||
|
with open(
|
||||||
|
os.path.join(CLIPS_DIR, f"{event.camera}-{event.id}.jpg"),
|
||||||
|
"rb",
|
||||||
|
) as image_file:
|
||||||
|
snapshot_image = image_file.read()
|
||||||
|
|
||||||
|
img = cv2.imdecode(
|
||||||
|
np.frombuffer(snapshot_image, dtype=np.int8),
|
||||||
|
cv2.IMREAD_COLOR,
|
||||||
|
)
|
||||||
|
|
||||||
|
# crop snapshot based on region before sending off to genai
|
||||||
|
height, width = img.shape[:2]
|
||||||
|
x1_rel, y1_rel, width_rel, height_rel = event.data["region"]
|
||||||
|
|
||||||
|
x1, y1 = int(x1_rel * width), int(y1_rel * height)
|
||||||
|
cropped_image = img[
|
||||||
|
y1 : y1 + int(height_rel * height),
|
||||||
|
x1 : x1 + int(width_rel * width),
|
||||||
|
]
|
||||||
|
|
||||||
|
_, buffer = cv2.imencode(".jpg", cropped_image)
|
||||||
|
snapshot_image = buffer.tobytes()
|
||||||
|
|
||||||
|
embed_image = (
|
||||||
|
[snapshot_image]
|
||||||
|
if event.has_snapshot and camera_config.genai.use_snapshot
|
||||||
|
else (
|
||||||
|
[thumbnail for data in self.tracked_events[event_id]]
|
||||||
|
if len(self.tracked_events.get(event_id, [])) > 0
|
||||||
|
else [thumbnail]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Generate the description. Call happens in a thread since it is network bound.
|
# Generate the description. Call happens in a thread since it is network bound.
|
||||||
threading.Thread(
|
threading.Thread(
|
||||||
target=self._embed_description,
|
target=self._embed_description,
|
||||||
@ -143,12 +179,7 @@ class EmbeddingMaintainer(threading.Thread):
|
|||||||
daemon=True,
|
daemon=True,
|
||||||
args=(
|
args=(
|
||||||
event,
|
event,
|
||||||
[
|
embed_image,
|
||||||
data["thumbnail"]
|
|
||||||
for data in self.tracked_events[event_id]
|
|
||||||
]
|
|
||||||
if len(self.tracked_events.get(event_id, [])) > 0
|
|
||||||
else [thumbnail],
|
|
||||||
metadata,
|
metadata,
|
||||||
),
|
),
|
||||||
).start()
|
).start()
|
||||||
@ -159,13 +190,15 @@ class EmbeddingMaintainer(threading.Thread):
|
|||||||
|
|
||||||
def _process_event_metadata(self):
|
def _process_event_metadata(self):
|
||||||
# Check for regenerate description requests
|
# Check for regenerate description requests
|
||||||
(topic, event_id) = self.event_metadata_subscriber.check_for_update(timeout=1)
|
(topic, event_id, source) = self.event_metadata_subscriber.check_for_update(
|
||||||
|
timeout=1
|
||||||
|
)
|
||||||
|
|
||||||
if topic is None:
|
if topic is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
if event_id:
|
if event_id:
|
||||||
self.handle_regenerate_description(event_id)
|
self.handle_regenerate_description(event_id, source)
|
||||||
|
|
||||||
def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]:
|
def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]:
|
||||||
"""Return jpg thumbnail of a region of the frame."""
|
"""Return jpg thumbnail of a region of the frame."""
|
||||||
@ -228,7 +261,7 @@ class EmbeddingMaintainer(threading.Thread):
|
|||||||
description,
|
description,
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle_regenerate_description(self, event_id: str) -> None:
|
def handle_regenerate_description(self, event_id: str, source: str) -> None:
|
||||||
try:
|
try:
|
||||||
event: Event = Event.get(Event.id == event_id)
|
event: Event = Event.get(Event.id == event_id)
|
||||||
except DoesNotExist:
|
except DoesNotExist:
|
||||||
@ -243,4 +276,38 @@ class EmbeddingMaintainer(threading.Thread):
|
|||||||
metadata = get_metadata(event)
|
metadata = get_metadata(event)
|
||||||
thumbnail = base64.b64decode(event.thumbnail)
|
thumbnail = base64.b64decode(event.thumbnail)
|
||||||
|
|
||||||
self._embed_description(event, [thumbnail], metadata)
|
logger.debug(f"Using ${source} regeneration for ${event}")
|
||||||
|
|
||||||
|
if event.has_snapshot and source == "snapshot":
|
||||||
|
with open(
|
||||||
|
os.path.join(CLIPS_DIR, f"{event.camera}-{event.id}.jpg"),
|
||||||
|
"rb",
|
||||||
|
) as image_file:
|
||||||
|
snapshot_image = image_file.read()
|
||||||
|
img = cv2.imdecode(
|
||||||
|
np.frombuffer(snapshot_image, dtype=np.int8), cv2.IMREAD_COLOR
|
||||||
|
)
|
||||||
|
|
||||||
|
# crop snapshot based on region before sending off to genai
|
||||||
|
height, width = img.shape[:2]
|
||||||
|
x1_rel, y1_rel, width_rel, height_rel = event.data["region"]
|
||||||
|
|
||||||
|
x1, y1 = int(x1_rel * width), int(y1_rel * height)
|
||||||
|
cropped_image = img[
|
||||||
|
y1 : y1 + int(height_rel * height), x1 : x1 + int(width_rel * width)
|
||||||
|
]
|
||||||
|
|
||||||
|
_, buffer = cv2.imencode(".jpg", cropped_image)
|
||||||
|
snapshot_image = buffer.tobytes()
|
||||||
|
|
||||||
|
embed_image = (
|
||||||
|
[snapshot_image]
|
||||||
|
if event.has_snapshot and source == "snapshot"
|
||||||
|
else (
|
||||||
|
[thumbnail for data in self.tracked_events[event_id]]
|
||||||
|
if len(self.tracked_events.get(event_id, [])) > 0
|
||||||
|
else [thumbnail]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self._embed_description(event, embed_image, metadata)
|
||||||
|
@ -12,3 +12,8 @@ class EventStateEnum(str, Enum):
|
|||||||
start = "start"
|
start = "start"
|
||||||
update = "update"
|
update = "update"
|
||||||
end = "end"
|
end = "end"
|
||||||
|
|
||||||
|
|
||||||
|
class RegenerateDescriptionEnum(str, Enum):
|
||||||
|
thumbnails = "thumbnails"
|
||||||
|
snapshot = "snapshot"
|
||||||
|
@ -27,7 +27,13 @@ import { baseUrl } from "@/api/baseUrl";
|
|||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
import ActivityIndicator from "@/components/indicators/activity-indicator";
|
import ActivityIndicator from "@/components/indicators/activity-indicator";
|
||||||
import { ASPECT_VERTICAL_LAYOUT, ASPECT_WIDE_LAYOUT } from "@/types/record";
|
import { ASPECT_VERTICAL_LAYOUT, ASPECT_WIDE_LAYOUT } from "@/types/record";
|
||||||
import { FaHistory, FaImage, FaRegListAlt, FaVideo } from "react-icons/fa";
|
import {
|
||||||
|
FaChevronDown,
|
||||||
|
FaHistory,
|
||||||
|
FaImage,
|
||||||
|
FaRegListAlt,
|
||||||
|
FaVideo,
|
||||||
|
} from "react-icons/fa";
|
||||||
import { FaRotate } from "react-icons/fa6";
|
import { FaRotate } from "react-icons/fa6";
|
||||||
import ObjectLifecycle from "./ObjectLifecycle";
|
import ObjectLifecycle from "./ObjectLifecycle";
|
||||||
import {
|
import {
|
||||||
@ -47,6 +53,12 @@ import { useNavigate } from "react-router-dom";
|
|||||||
import Chip from "@/components/indicators/Chip";
|
import Chip from "@/components/indicators/Chip";
|
||||||
import { capitalizeFirstLetter } from "@/utils/stringUtil";
|
import { capitalizeFirstLetter } from "@/utils/stringUtil";
|
||||||
import useGlobalMutation from "@/hooks/use-global-mutate";
|
import useGlobalMutation from "@/hooks/use-global-mutate";
|
||||||
|
import {
|
||||||
|
DropdownMenu,
|
||||||
|
DropdownMenuContent,
|
||||||
|
DropdownMenuItem,
|
||||||
|
DropdownMenuTrigger,
|
||||||
|
} from "@/components/ui/dropdown-menu";
|
||||||
|
|
||||||
const SEARCH_TABS = [
|
const SEARCH_TABS = [
|
||||||
"details",
|
"details",
|
||||||
@ -309,13 +321,14 @@ function ObjectDetailsTab({
|
|||||||
});
|
});
|
||||||
}, [desc, search, mutate]);
|
}, [desc, search, mutate]);
|
||||||
|
|
||||||
const regenerateDescription = useCallback(() => {
|
const regenerateDescription = useCallback(
|
||||||
|
(source: "snapshot" | "thumbnails") => {
|
||||||
if (!search) {
|
if (!search) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
axios
|
axios
|
||||||
.put(`events/${search.id}/description/regenerate`)
|
.put(`events/${search.id}/description/regenerate?source=${source}`)
|
||||||
.then((resp) => {
|
.then((resp) => {
|
||||||
if (resp.status == 200) {
|
if (resp.status == 200) {
|
||||||
toast.success(
|
toast.success(
|
||||||
@ -335,7 +348,9 @@ function ObjectDetailsTab({
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
}, [search, config]);
|
},
|
||||||
|
[search, config],
|
||||||
|
);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col gap-5">
|
<div className="flex flex-col gap-5">
|
||||||
@ -403,7 +418,37 @@ function ObjectDetailsTab({
|
|||||||
/>
|
/>
|
||||||
<div className="flex w-full flex-row justify-end gap-2">
|
<div className="flex w-full flex-row justify-end gap-2">
|
||||||
{config?.genai.enabled && (
|
{config?.genai.enabled && (
|
||||||
<Button onClick={regenerateDescription}>Regenerate</Button>
|
<div className="flex items-center">
|
||||||
|
<Button
|
||||||
|
className="rounded-r-none border-r-0"
|
||||||
|
onClick={() => regenerateDescription("thumbnails")}
|
||||||
|
>
|
||||||
|
Regenerate
|
||||||
|
</Button>
|
||||||
|
{search.has_snapshot && (
|
||||||
|
<DropdownMenu>
|
||||||
|
<DropdownMenuTrigger asChild>
|
||||||
|
<Button className="rounded-l-none border-l-0 px-2">
|
||||||
|
<FaChevronDown className="size-3" />
|
||||||
|
</Button>
|
||||||
|
</DropdownMenuTrigger>
|
||||||
|
<DropdownMenuContent>
|
||||||
|
<DropdownMenuItem
|
||||||
|
className="cursor-pointer"
|
||||||
|
onClick={() => regenerateDescription("snapshot")}
|
||||||
|
>
|
||||||
|
Regenerate from Snapshot
|
||||||
|
</DropdownMenuItem>
|
||||||
|
<DropdownMenuItem
|
||||||
|
className="cursor-pointer"
|
||||||
|
onClick={() => regenerateDescription("thumbnails")}
|
||||||
|
>
|
||||||
|
Regenerate from Thumbnails
|
||||||
|
</DropdownMenuItem>
|
||||||
|
</DropdownMenuContent>
|
||||||
|
</DropdownMenu>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
)}
|
)}
|
||||||
<Button variant="select" onClick={updateDescription}>
|
<Button variant="select" onClick={updateDescription}>
|
||||||
Save
|
Save
|
||||||
|
Loading…
Reference in New Issue
Block a user