Add ability to change source of images for review descriptions (#20676)

* Add ability to change source of images for review descriptions * Undo
2026-02-20 13:54:36 +01:00 · 2025-10-26 07:40:38 -06:00
parent 840d567d22
commit 094a0a6e05
4 changed files with 223 additions and 39 deletions
--- a/frigate/data_processing/post/review_descriptions.py
+++ b/frigate/data_processing/post/review_descriptions.py
@@ -3,6 +3,7 @@
 import copy
 import datetime
 import logging
+import math
 import os
 import shutil
 import threading
@@ -10,16 +11,18 @@ from pathlib import Path
 from typing import Any

 import cv2
+from peewee import DoesNotExist

 from frigate.comms.embeddings_updater import EmbeddingsRequestEnum
 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.config import FrigateConfig
-from frigate.config.camera.review import GenAIReviewConfig
+from frigate.config.camera.review import GenAIReviewConfig, ImageSourceEnum
 from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION
 from frigate.data_processing.types import PostProcessDataEnum
 from frigate.genai import GenAIClient
-from frigate.models import ReviewSegment
+from frigate.models import Recordings, ReviewSegment
 from frigate.util.builtin import EventsPerSecond, InferenceSpeed
+from frigate.util.image import get_image_from_recording

 from ..post.api import PostProcessorApi
 from ..types import DataProcessorMetrics
@@ -43,20 +46,35 @@ class ReviewDescriptionProcessor(PostProcessorApi):
        self.review_descs_dps = EventsPerSecond()
        self.review_descs_dps.start()

-    def calculate_frame_count(self) -> int:
-        """Calculate optimal number of frames based on context size."""
-        # With our preview images (height of 180px) each image should be ~100 tokens per image
-        # We want to be conservative to not have too long of query times with too many images
+    def calculate_frame_count(
+        self, image_source: ImageSourceEnum = ImageSourceEnum.preview
+    ) -> int:
+        """Calculate optimal number of frames based on context size and image source."""
        context_size = self.genai_client.get_context_size()

-        if context_size > 10000:
-            return 20
-        elif context_size > 6000:
-            return 16
-        elif context_size > 4000:
-            return 12
+        if image_source == ImageSourceEnum.recordings:
+            # With recordings at 480p resolution (480px height), each image uses ~200-300 tokens
+            # This is ~2-3x more than preview images, so we reduce frame count accordingly
+            # to avoid exceeding context limits and maintain reasonable inference times
+            if context_size > 10000:
+                return 12
+            elif context_size > 6000:
+                return 10
+            elif context_size > 4000:
+                return 8
+            else:
+                return 6
        else:
-            return 8
+            # With preview images (180px height), each image uses ~100 tokens
+            # We can send more frames since they're lower resolution
+            if context_size > 10000:
+                return 20
+            elif context_size > 6000:
+                return 16
+            elif context_size > 4000:
+                return 12
+            else:
+                return 8

    def process_data(self, data, data_type):
        self.metrics.review_desc_dps.value = self.review_descs_dps.eps()
@@ -88,36 +106,50 @@ class ReviewDescriptionProcessor(PostProcessorApi):
            ):
                return

-            frames = self.get_cache_frames(
-                camera, final_data["start_time"], final_data["end_time"]
-            )
+            image_source = camera_config.review.genai.image_source

-            if not frames:
-                frames = [final_data["thumb_path"]]
-
-            thumbs = []
-
-            for idx, thumb_path in enumerate(frames):
-                thumb_data = cv2.imread(thumb_path)
-                ret, jpg = cv2.imencode(
-                    ".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
+            if image_source == ImageSourceEnum.recordings:
+                thumbs = self.get_recording_frames(
+                    camera,
+                    final_data["start_time"],
+                    final_data["end_time"],
+                    height=480,  # Use 480p for good balance between quality and token usage
                )

-                if ret:
-                    thumbs.append(jpg.tobytes())
-
-                if camera_config.review.genai.debug_save_thumbnails:
-                    id = data["after"]["id"]
-                    Path(os.path.join(CLIPS_DIR, "genai-requests", f"{id}")).mkdir(
+                if not thumbs:
+                    # Fallback to preview frames if no recordings available
+                    logger.warning(
+                        f"No recording frames found for {camera}, falling back to preview frames"
+                    )
+                    thumbs = self.get_preview_frames_as_bytes(
+                        camera,
+                        final_data["start_time"],
+                        final_data["end_time"],
+                        final_data["thumb_path"],
+                        id,
+                        camera_config.review.genai.debug_save_thumbnails,
+                    )
+                elif camera_config.review.genai.debug_save_thumbnails:
+                    # Save debug thumbnails for recordings
+                    Path(os.path.join(CLIPS_DIR, "genai-requests", id)).mkdir(
                        parents=True, exist_ok=True
                    )
-                    shutil.copy(
-                        thumb_path,
-                        os.path.join(
-                            CLIPS_DIR,
-                            f"genai-requests/{id}/{idx}.webp",
-                        ),
-                    )
+                    for idx, frame_bytes in enumerate(thumbs):
+                        with open(
+                            os.path.join(CLIPS_DIR, f"genai-requests/{id}/{idx}.jpg"),
+                            "wb",
+                        ) as f:
+                            f.write(frame_bytes)
+            else:
+                # Use preview frames
+                thumbs = self.get_preview_frames_as_bytes(
+                    camera,
+                    final_data["start_time"],
+                    final_data["end_time"],
+                    final_data["thumb_path"],
+                    id,
+                    camera_config.review.genai.debug_save_thumbnails,
+                )

            # kickoff analysis
            self.review_descs_dps.update()
@@ -231,6 +263,122 @@ class ReviewDescriptionProcessor(PostProcessorApi):

        return selected_frames

+    def get_recording_frames(
+        self,
+        camera: str,
+        start_time: float,
+        end_time: float,
+        height: int = 480,
+    ) -> list[bytes]:
+        """Get frames from recordings at specified timestamps."""
+        duration = end_time - start_time
+        desired_frame_count = self.calculate_frame_count(ImageSourceEnum.recordings)
+
+        # Calculate evenly spaced timestamps throughout the duration
+        if desired_frame_count == 1:
+            timestamps = [start_time + duration / 2]
+        else:
+            step = duration / (desired_frame_count - 1)
+            timestamps = [start_time + (i * step) for i in range(desired_frame_count)]
+
+        def extract_frame_from_recording(ts: float) -> bytes | None:
+            """Extract a single frame from recording at given timestamp."""
+            try:
+                recording = (
+                    Recordings.select(
+                        Recordings.path,
+                        Recordings.start_time,
+                    )
+                    .where((ts >= Recordings.start_time) & (ts <= Recordings.end_time))
+                    .where(Recordings.camera == camera)
+                    .order_by(Recordings.start_time.desc())
+                    .limit(1)
+                    .get()
+                )
+
+                time_in_segment = ts - recording.start_time
+                return get_image_from_recording(
+                    self.config.ffmpeg,
+                    recording.path,
+                    time_in_segment,
+                    "mjpeg",
+                    height=height,
+                )
+            except DoesNotExist:
+                return None
+
+        frames = []
+
+        for timestamp in timestamps:
+            try:
+                # Try to extract frame at exact timestamp
+                image_data = extract_frame_from_recording(timestamp)
+
+                if not image_data:
+                    # Try with rounded timestamp as fallback
+                    rounded_timestamp = math.ceil(timestamp)
+                    image_data = extract_frame_from_recording(rounded_timestamp)
+
+                if image_data:
+                    frames.append(image_data)
+                else:
+                    logger.warning(
+                        f"No recording found for {camera} at timestamp {timestamp}"
+                    )
+            except Exception as e:
+                logger.error(
+                    f"Error extracting frame from recording for {camera} at {timestamp}: {e}"
+                )
+                continue
+
+        return frames
+
+    def get_preview_frames_as_bytes(
+        self,
+        camera: str,
+        start_time: float,
+        end_time: float,
+        thumb_path_fallback: str,
+        review_id: str,
+        save_debug: bool,
+    ) -> list[bytes]:
+        """Get preview frames and convert them to JPEG bytes.
+
+        Args:
+            camera: Camera name
+            start_time: Start timestamp
+            end_time: End timestamp
+            thumb_path_fallback: Fallback thumbnail path if no preview frames found
+            review_id: Review item ID for debug saving
+            save_debug: Whether to save debug thumbnails
+
+        Returns:
+            List of JPEG image bytes
+        """
+        frame_paths = self.get_cache_frames(camera, start_time, end_time)
+        if not frame_paths:
+            frame_paths = [thumb_path_fallback]
+
+        thumbs = []
+        for idx, thumb_path in enumerate(frame_paths):
+            thumb_data = cv2.imread(thumb_path)
+            ret, jpg = cv2.imencode(
+                ".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
+            )
+            if ret:
+                thumbs.append(jpg.tobytes())
+
+            if save_debug:
+                Path(os.path.join(CLIPS_DIR, "genai-requests", review_id)).mkdir(
+                    parents=True, exist_ok=True
+                )
+                shutil.copy(
+                    thumb_path,
+                    os.path.join(CLIPS_DIR, f"genai-requests/{review_id}/{idx}.webp"),
+                )
+
+        return thumbs
+

@staticmethod
 def run_analysis(