Add ability to change source of images for review descriptions (#20676)

* Add ability to change source of images for review descriptions

* Undo
This commit is contained in:
Nicolas Mowen
2025-10-26 07:40:38 -06:00
committed by GitHub
parent 840d567d22
commit 094a0a6e05
4 changed files with 223 additions and 39 deletions

View File

@@ -3,6 +3,7 @@
import copy
import datetime
import logging
import math
import os
import shutil
import threading
@@ -10,16 +11,18 @@ from pathlib import Path
from typing import Any
import cv2
from peewee import DoesNotExist
from frigate.comms.embeddings_updater import EmbeddingsRequestEnum
from frigate.comms.inter_process import InterProcessRequestor
from frigate.config import FrigateConfig
from frigate.config.camera.review import GenAIReviewConfig
from frigate.config.camera.review import GenAIReviewConfig, ImageSourceEnum
from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION
from frigate.data_processing.types import PostProcessDataEnum
from frigate.genai import GenAIClient
from frigate.models import ReviewSegment
from frigate.models import Recordings, ReviewSegment
from frigate.util.builtin import EventsPerSecond, InferenceSpeed
from frigate.util.image import get_image_from_recording
from ..post.api import PostProcessorApi
from ..types import DataProcessorMetrics
@@ -43,20 +46,35 @@ class ReviewDescriptionProcessor(PostProcessorApi):
self.review_descs_dps = EventsPerSecond()
self.review_descs_dps.start()
def calculate_frame_count(self) -> int:
"""Calculate optimal number of frames based on context size."""
# With our preview images (height of 180px) each image should be ~100 tokens per image
# We want to be conservative to not have too long of query times with too many images
def calculate_frame_count(
self, image_source: ImageSourceEnum = ImageSourceEnum.preview
) -> int:
"""Calculate optimal number of frames based on context size and image source."""
context_size = self.genai_client.get_context_size()
if context_size > 10000:
return 20
elif context_size > 6000:
return 16
elif context_size > 4000:
return 12
if image_source == ImageSourceEnum.recordings:
# With recordings at 480p resolution (480px height), each image uses ~200-300 tokens
# This is ~2-3x more than preview images, so we reduce frame count accordingly
# to avoid exceeding context limits and maintain reasonable inference times
if context_size > 10000:
return 12
elif context_size > 6000:
return 10
elif context_size > 4000:
return 8
else:
return 6
else:
return 8
# With preview images (180px height), each image uses ~100 tokens
# We can send more frames since they're lower resolution
if context_size > 10000:
return 20
elif context_size > 6000:
return 16
elif context_size > 4000:
return 12
else:
return 8
def process_data(self, data, data_type):
self.metrics.review_desc_dps.value = self.review_descs_dps.eps()
@@ -88,36 +106,50 @@ class ReviewDescriptionProcessor(PostProcessorApi):
):
return
frames = self.get_cache_frames(
camera, final_data["start_time"], final_data["end_time"]
)
image_source = camera_config.review.genai.image_source
if not frames:
frames = [final_data["thumb_path"]]
thumbs = []
for idx, thumb_path in enumerate(frames):
thumb_data = cv2.imread(thumb_path)
ret, jpg = cv2.imencode(
".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
if image_source == ImageSourceEnum.recordings:
thumbs = self.get_recording_frames(
camera,
final_data["start_time"],
final_data["end_time"],
height=480, # Use 480p for good balance between quality and token usage
)
if ret:
thumbs.append(jpg.tobytes())
if camera_config.review.genai.debug_save_thumbnails:
id = data["after"]["id"]
Path(os.path.join(CLIPS_DIR, "genai-requests", f"{id}")).mkdir(
if not thumbs:
# Fallback to preview frames if no recordings available
logger.warning(
f"No recording frames found for {camera}, falling back to preview frames"
)
thumbs = self.get_preview_frames_as_bytes(
camera,
final_data["start_time"],
final_data["end_time"],
final_data["thumb_path"],
id,
camera_config.review.genai.debug_save_thumbnails,
)
elif camera_config.review.genai.debug_save_thumbnails:
# Save debug thumbnails for recordings
Path(os.path.join(CLIPS_DIR, "genai-requests", id)).mkdir(
parents=True, exist_ok=True
)
shutil.copy(
thumb_path,
os.path.join(
CLIPS_DIR,
f"genai-requests/{id}/{idx}.webp",
),
)
for idx, frame_bytes in enumerate(thumbs):
with open(
os.path.join(CLIPS_DIR, f"genai-requests/{id}/{idx}.jpg"),
"wb",
) as f:
f.write(frame_bytes)
else:
# Use preview frames
thumbs = self.get_preview_frames_as_bytes(
camera,
final_data["start_time"],
final_data["end_time"],
final_data["thumb_path"],
id,
camera_config.review.genai.debug_save_thumbnails,
)
# kickoff analysis
self.review_descs_dps.update()
@@ -231,6 +263,122 @@ class ReviewDescriptionProcessor(PostProcessorApi):
return selected_frames
def get_recording_frames(
self,
camera: str,
start_time: float,
end_time: float,
height: int = 480,
) -> list[bytes]:
"""Get frames from recordings at specified timestamps."""
duration = end_time - start_time
desired_frame_count = self.calculate_frame_count(ImageSourceEnum.recordings)
# Calculate evenly spaced timestamps throughout the duration
if desired_frame_count == 1:
timestamps = [start_time + duration / 2]
else:
step = duration / (desired_frame_count - 1)
timestamps = [start_time + (i * step) for i in range(desired_frame_count)]
def extract_frame_from_recording(ts: float) -> bytes | None:
"""Extract a single frame from recording at given timestamp."""
try:
recording = (
Recordings.select(
Recordings.path,
Recordings.start_time,
)
.where((ts >= Recordings.start_time) & (ts <= Recordings.end_time))
.where(Recordings.camera == camera)
.order_by(Recordings.start_time.desc())
.limit(1)
.get()
)
time_in_segment = ts - recording.start_time
return get_image_from_recording(
self.config.ffmpeg,
recording.path,
time_in_segment,
"mjpeg",
height=height,
)
except DoesNotExist:
return None
frames = []
for timestamp in timestamps:
try:
# Try to extract frame at exact timestamp
image_data = extract_frame_from_recording(timestamp)
if not image_data:
# Try with rounded timestamp as fallback
rounded_timestamp = math.ceil(timestamp)
image_data = extract_frame_from_recording(rounded_timestamp)
if image_data:
frames.append(image_data)
else:
logger.warning(
f"No recording found for {camera} at timestamp {timestamp}"
)
except Exception as e:
logger.error(
f"Error extracting frame from recording for {camera} at {timestamp}: {e}"
)
continue
return frames
def get_preview_frames_as_bytes(
self,
camera: str,
start_time: float,
end_time: float,
thumb_path_fallback: str,
review_id: str,
save_debug: bool,
) -> list[bytes]:
"""Get preview frames and convert them to JPEG bytes.
Args:
camera: Camera name
start_time: Start timestamp
end_time: End timestamp
thumb_path_fallback: Fallback thumbnail path if no preview frames found
review_id: Review item ID for debug saving
save_debug: Whether to save debug thumbnails
Returns:
List of JPEG image bytes
"""
frame_paths = self.get_cache_frames(camera, start_time, end_time)
if not frame_paths:
frame_paths = [thumb_path_fallback]
thumbs = []
for idx, thumb_path in enumerate(frame_paths):
thumb_data = cv2.imread(thumb_path)
ret, jpg = cv2.imencode(
".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
)
if ret:
thumbs.append(jpg.tobytes())
if save_debug:
Path(os.path.join(CLIPS_DIR, "genai-requests", review_id)).mkdir(
parents=True, exist_ok=True
)
shutil.copy(
thumb_path,
os.path.join(CLIPS_DIR, f"genai-requests/{review_id}/{idx}.webp"),
)
return thumbs
@staticmethod
def run_analysis(