Review description updates (#20723)

* Update docs for review descriptions * Add logging for context tokens used * Incrase number of images due to lower than expected context usage * Re-balance the suspicious activity checks * Adjustments to context sizing * optimize context usage * Adjust context usage * Make title more direct * Update docs
2026-04-19 23:08:08 +02:00 · 2025-10-30 08:52:55 -06:00
parent fd1eb64921
commit 971521cd8e
6 changed files with 131 additions and 75 deletions
--- a/frigate/data_processing/post/review_descriptions.py
+++ b/frigate/data_processing/post/review_descriptions.py
@@ -49,38 +49,51 @@ class ReviewDescriptionProcessor(PostProcessorApi):
        self.review_descs_dps.start()

    def calculate_frame_count(
-        self, image_source: ImageSourceEnum = ImageSourceEnum.preview
+        self,
+        camera: str,
+        image_source: ImageSourceEnum = ImageSourceEnum.preview,
+        height: int = 480,
    ) -> int:
-        """Calculate optimal number of frames based on context size and image source."""
+        """Calculate optimal number of frames based on context size, image source, and resolution.
+
+        Token usage varies by resolution: larger images (ultrawide aspect ratios) use more tokens.
+        Estimates ~1 token per 1250 pixels. Targets 98% context utilization with safety margin.
+        Capped at 20 frames.
+        """
        context_size = self.genai_client.get_context_size()
+        camera_config = self.config.cameras[camera]
+
+        detect_width = camera_config.detect.width
+        detect_height = camera_config.detect.height
+        aspect_ratio = detect_width / detect_height

        if image_source == ImageSourceEnum.recordings:
-            # With recordings at 480p resolution (480px height), each image uses ~200-300 tokens
-            # This is ~2-3x more than preview images, so we reduce frame count accordingly
-            # to avoid exceeding context limits and maintain reasonable inference times
-            if context_size > 14000:
-                return 16
-            elif context_size > 12000:
-                return 14
-            elif context_size > 10000:
-                return 12
-            elif context_size > 6000:
-                return 10
-            elif context_size > 4000:
-                return 8
+            if aspect_ratio >= 1:
+                # Landscape or square: constrain height
+                width = int(height * aspect_ratio)
            else:
-                return 6
+                # Portrait: constrain width
+                width = height
+                height = int(width / aspect_ratio)
        else:
-            # With preview images (180px height), each image uses ~100 tokens
-            # We can send more frames since they're lower resolution
-            if context_size > 10000:
-                return 20
-            elif context_size > 6000:
-                return 16
-            elif context_size > 4000:
-                return 12
+            if aspect_ratio >= 1:
+                # Landscape or square: constrain height
+                target_height = 180
+                width = int(target_height * aspect_ratio)
+                height = target_height
            else:
-                return 8
+                # Portrait: constrain width
+                target_width = 180
+                width = target_width
+                height = int(target_width / aspect_ratio)
+
+        pixels_per_image = width * height
+        tokens_per_image = pixels_per_image / 1250
+        prompt_tokens = 3500
+        available_tokens = context_size * 0.98 - prompt_tokens
+        max_frames = int(available_tokens / tokens_per_image)
+
+        return min(max(max_frames, 3), 20)

    def process_data(self, data, data_type):
        self.metrics.review_desc_dps.value = self.review_descs_dps.eps()
@@ -258,7 +271,7 @@ class ReviewDescriptionProcessor(PostProcessorApi):
            all_frames.append(os.path.join(preview_dir, file))

        frame_count = len(all_frames)
-        desired_frame_count = self.calculate_frame_count()
+        desired_frame_count = self.calculate_frame_count(camera)

        if frame_count <= desired_frame_count:
            return all_frames
@@ -281,7 +294,9 @@ class ReviewDescriptionProcessor(PostProcessorApi):
    ) -> list[bytes]:
        """Get frames from recordings at specified timestamps."""
        duration = end_time - start_time
-        desired_frame_count = self.calculate_frame_count(ImageSourceEnum.recordings)
+        desired_frame_count = self.calculate_frame_count(
+            camera, ImageSourceEnum.recordings, height
+        )

        # Calculate evenly spaced timestamps throughout the duration
        if desired_frame_count == 1: