mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-02-20 13:54:36 +01:00
Add ability to change source of images for review descriptions (#20676)
* Add ability to change source of images for review descriptions * Undo
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
import copy
|
||||
import datetime
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import shutil
|
||||
import threading
|
||||
@@ -10,16 +11,18 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import cv2
|
||||
from peewee import DoesNotExist
|
||||
|
||||
from frigate.comms.embeddings_updater import EmbeddingsRequestEnum
|
||||
from frigate.comms.inter_process import InterProcessRequestor
|
||||
from frigate.config import FrigateConfig
|
||||
from frigate.config.camera.review import GenAIReviewConfig
|
||||
from frigate.config.camera.review import GenAIReviewConfig, ImageSourceEnum
|
||||
from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION
|
||||
from frigate.data_processing.types import PostProcessDataEnum
|
||||
from frigate.genai import GenAIClient
|
||||
from frigate.models import ReviewSegment
|
||||
from frigate.models import Recordings, ReviewSegment
|
||||
from frigate.util.builtin import EventsPerSecond, InferenceSpeed
|
||||
from frigate.util.image import get_image_from_recording
|
||||
|
||||
from ..post.api import PostProcessorApi
|
||||
from ..types import DataProcessorMetrics
|
||||
@@ -43,20 +46,35 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
||||
self.review_descs_dps = EventsPerSecond()
|
||||
self.review_descs_dps.start()
|
||||
|
||||
def calculate_frame_count(self) -> int:
|
||||
"""Calculate optimal number of frames based on context size."""
|
||||
# With our preview images (height of 180px) each image should be ~100 tokens per image
|
||||
# We want to be conservative to not have too long of query times with too many images
|
||||
def calculate_frame_count(
|
||||
self, image_source: ImageSourceEnum = ImageSourceEnum.preview
|
||||
) -> int:
|
||||
"""Calculate optimal number of frames based on context size and image source."""
|
||||
context_size = self.genai_client.get_context_size()
|
||||
|
||||
if context_size > 10000:
|
||||
return 20
|
||||
elif context_size > 6000:
|
||||
return 16
|
||||
elif context_size > 4000:
|
||||
return 12
|
||||
if image_source == ImageSourceEnum.recordings:
|
||||
# With recordings at 480p resolution (480px height), each image uses ~200-300 tokens
|
||||
# This is ~2-3x more than preview images, so we reduce frame count accordingly
|
||||
# to avoid exceeding context limits and maintain reasonable inference times
|
||||
if context_size > 10000:
|
||||
return 12
|
||||
elif context_size > 6000:
|
||||
return 10
|
||||
elif context_size > 4000:
|
||||
return 8
|
||||
else:
|
||||
return 6
|
||||
else:
|
||||
return 8
|
||||
# With preview images (180px height), each image uses ~100 tokens
|
||||
# We can send more frames since they're lower resolution
|
||||
if context_size > 10000:
|
||||
return 20
|
||||
elif context_size > 6000:
|
||||
return 16
|
||||
elif context_size > 4000:
|
||||
return 12
|
||||
else:
|
||||
return 8
|
||||
|
||||
def process_data(self, data, data_type):
|
||||
self.metrics.review_desc_dps.value = self.review_descs_dps.eps()
|
||||
@@ -88,36 +106,50 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
||||
):
|
||||
return
|
||||
|
||||
frames = self.get_cache_frames(
|
||||
camera, final_data["start_time"], final_data["end_time"]
|
||||
)
|
||||
image_source = camera_config.review.genai.image_source
|
||||
|
||||
if not frames:
|
||||
frames = [final_data["thumb_path"]]
|
||||
|
||||
thumbs = []
|
||||
|
||||
for idx, thumb_path in enumerate(frames):
|
||||
thumb_data = cv2.imread(thumb_path)
|
||||
ret, jpg = cv2.imencode(
|
||||
".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
|
||||
if image_source == ImageSourceEnum.recordings:
|
||||
thumbs = self.get_recording_frames(
|
||||
camera,
|
||||
final_data["start_time"],
|
||||
final_data["end_time"],
|
||||
height=480, # Use 480p for good balance between quality and token usage
|
||||
)
|
||||
|
||||
if ret:
|
||||
thumbs.append(jpg.tobytes())
|
||||
|
||||
if camera_config.review.genai.debug_save_thumbnails:
|
||||
id = data["after"]["id"]
|
||||
Path(os.path.join(CLIPS_DIR, "genai-requests", f"{id}")).mkdir(
|
||||
if not thumbs:
|
||||
# Fallback to preview frames if no recordings available
|
||||
logger.warning(
|
||||
f"No recording frames found for {camera}, falling back to preview frames"
|
||||
)
|
||||
thumbs = self.get_preview_frames_as_bytes(
|
||||
camera,
|
||||
final_data["start_time"],
|
||||
final_data["end_time"],
|
||||
final_data["thumb_path"],
|
||||
id,
|
||||
camera_config.review.genai.debug_save_thumbnails,
|
||||
)
|
||||
elif camera_config.review.genai.debug_save_thumbnails:
|
||||
# Save debug thumbnails for recordings
|
||||
Path(os.path.join(CLIPS_DIR, "genai-requests", id)).mkdir(
|
||||
parents=True, exist_ok=True
|
||||
)
|
||||
shutil.copy(
|
||||
thumb_path,
|
||||
os.path.join(
|
||||
CLIPS_DIR,
|
||||
f"genai-requests/{id}/{idx}.webp",
|
||||
),
|
||||
)
|
||||
for idx, frame_bytes in enumerate(thumbs):
|
||||
with open(
|
||||
os.path.join(CLIPS_DIR, f"genai-requests/{id}/{idx}.jpg"),
|
||||
"wb",
|
||||
) as f:
|
||||
f.write(frame_bytes)
|
||||
else:
|
||||
# Use preview frames
|
||||
thumbs = self.get_preview_frames_as_bytes(
|
||||
camera,
|
||||
final_data["start_time"],
|
||||
final_data["end_time"],
|
||||
final_data["thumb_path"],
|
||||
id,
|
||||
camera_config.review.genai.debug_save_thumbnails,
|
||||
)
|
||||
|
||||
# kickoff analysis
|
||||
self.review_descs_dps.update()
|
||||
@@ -231,6 +263,122 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
||||
|
||||
return selected_frames
|
||||
|
||||
def get_recording_frames(
|
||||
self,
|
||||
camera: str,
|
||||
start_time: float,
|
||||
end_time: float,
|
||||
height: int = 480,
|
||||
) -> list[bytes]:
|
||||
"""Get frames from recordings at specified timestamps."""
|
||||
duration = end_time - start_time
|
||||
desired_frame_count = self.calculate_frame_count(ImageSourceEnum.recordings)
|
||||
|
||||
# Calculate evenly spaced timestamps throughout the duration
|
||||
if desired_frame_count == 1:
|
||||
timestamps = [start_time + duration / 2]
|
||||
else:
|
||||
step = duration / (desired_frame_count - 1)
|
||||
timestamps = [start_time + (i * step) for i in range(desired_frame_count)]
|
||||
|
||||
def extract_frame_from_recording(ts: float) -> bytes | None:
|
||||
"""Extract a single frame from recording at given timestamp."""
|
||||
try:
|
||||
recording = (
|
||||
Recordings.select(
|
||||
Recordings.path,
|
||||
Recordings.start_time,
|
||||
)
|
||||
.where((ts >= Recordings.start_time) & (ts <= Recordings.end_time))
|
||||
.where(Recordings.camera == camera)
|
||||
.order_by(Recordings.start_time.desc())
|
||||
.limit(1)
|
||||
.get()
|
||||
)
|
||||
|
||||
time_in_segment = ts - recording.start_time
|
||||
return get_image_from_recording(
|
||||
self.config.ffmpeg,
|
||||
recording.path,
|
||||
time_in_segment,
|
||||
"mjpeg",
|
||||
height=height,
|
||||
)
|
||||
except DoesNotExist:
|
||||
return None
|
||||
|
||||
frames = []
|
||||
|
||||
for timestamp in timestamps:
|
||||
try:
|
||||
# Try to extract frame at exact timestamp
|
||||
image_data = extract_frame_from_recording(timestamp)
|
||||
|
||||
if not image_data:
|
||||
# Try with rounded timestamp as fallback
|
||||
rounded_timestamp = math.ceil(timestamp)
|
||||
image_data = extract_frame_from_recording(rounded_timestamp)
|
||||
|
||||
if image_data:
|
||||
frames.append(image_data)
|
||||
else:
|
||||
logger.warning(
|
||||
f"No recording found for {camera} at timestamp {timestamp}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error extracting frame from recording for {camera} at {timestamp}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
return frames
|
||||
|
||||
def get_preview_frames_as_bytes(
|
||||
self,
|
||||
camera: str,
|
||||
start_time: float,
|
||||
end_time: float,
|
||||
thumb_path_fallback: str,
|
||||
review_id: str,
|
||||
save_debug: bool,
|
||||
) -> list[bytes]:
|
||||
"""Get preview frames and convert them to JPEG bytes.
|
||||
|
||||
Args:
|
||||
camera: Camera name
|
||||
start_time: Start timestamp
|
||||
end_time: End timestamp
|
||||
thumb_path_fallback: Fallback thumbnail path if no preview frames found
|
||||
review_id: Review item ID for debug saving
|
||||
save_debug: Whether to save debug thumbnails
|
||||
|
||||
Returns:
|
||||
List of JPEG image bytes
|
||||
"""
|
||||
frame_paths = self.get_cache_frames(camera, start_time, end_time)
|
||||
if not frame_paths:
|
||||
frame_paths = [thumb_path_fallback]
|
||||
|
||||
thumbs = []
|
||||
for idx, thumb_path in enumerate(frame_paths):
|
||||
thumb_data = cv2.imread(thumb_path)
|
||||
ret, jpg = cv2.imencode(
|
||||
".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
|
||||
)
|
||||
if ret:
|
||||
thumbs.append(jpg.tobytes())
|
||||
|
||||
if save_debug:
|
||||
Path(os.path.join(CLIPS_DIR, "genai-requests", review_id)).mkdir(
|
||||
parents=True, exist_ok=True
|
||||
)
|
||||
shutil.copy(
|
||||
thumb_path,
|
||||
os.path.join(CLIPS_DIR, f"genai-requests/{review_id}/{idx}.webp"),
|
||||
)
|
||||
|
||||
return thumbs
|
||||
|
||||
|
||||
@staticmethod
|
||||
def run_analysis(
|
||||
|
||||
Reference in New Issue
Block a user