From ccbaa74a8bc6bc877cbdfc7c44285e5acb593c73 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Fri, 15 Aug 2025 07:25:49 -0600
Subject: [PATCH] Improve natural language of prompt (#19515)

* Make sequence details human-readable so they are used in natural language response

* Cleanup

* Improve prompt and image selection

* Adjust

* Adjust sligtly

* Format time

* Adjust frame selection logic

* Debug save response

* Ignore extra fields

* Adjust docs
---
 docs/docs/configuration/genai/config.md       | 11 ++--
 .../post/review_descriptions.py               | 56 ++++++++++++++-----
 frigate/data_processing/post/types.py         |  4 +-
 frigate/genai/__init__.py                     | 26 +++++++--
 4 files changed, 70 insertions(+), 27 deletions(-)

diff --git a/docs/docs/configuration/genai/config.md b/docs/docs/configuration/genai/config.md
index f7ceb804d..bb4d213e1 100644
--- a/docs/docs/configuration/genai/config.md
+++ b/docs/docs/configuration/genai/config.md
@@ -29,12 +29,11 @@ You must use a vision capable model with Frigate. Current model variants can be
 
 The following models are recommended:
 
-| Model             | Size   | Recommended Features |
-| ----------------- | ------ | -------------------- |
-| `minicpm-v:8b`    | 5.5 GB | Review Summary       |
-| `qwen2.5vl:3b`    | 3.2 GB | Review Summary       |
-| `gemma3:4b`       | 3.3 GB | All Features         |
-| `llava-phi3:3.8b` | 2.9 GB | All Features         |
+| Model             | Size   | Notes                                                       |
+| ----------------- | ------ | ----------------------------------------------------------- |
+| `gemma3:4b`       | 3.3 GB | Strong frame-to-frame understanding, slower inference times |
+| `qwen2.5vl:3b`    | 3.2 GB | Fast but capable model with good vision comprehension       |
+| `llava-phi3:3.8b` | 2.9 GB | Lightweight and fast model with vision comprehension        |
 
 :::note
 
diff --git a/frigate/data_processing/post/review_descriptions.py b/frigate/data_processing/post/review_descriptions.py
index 68496fc18..6293241b8 100644
--- a/frigate/data_processing/post/review_descriptions.py
+++ b/frigate/data_processing/post/review_descriptions.py
@@ -116,6 +116,7 @@ class ReviewDescriptionProcessor(PostProcessorApi):
                     final_data,
                     thumbs,
                     camera_config.review.genai,
+                    list(self.config.model.merged_labelmap.values()),
                 ),
             ).start()
 
@@ -160,7 +161,11 @@ class ReviewDescriptionProcessor(PostProcessorApi):
             return None
 
     def get_cache_frames(
-        self, camera: str, start_time: float, end_time: float
+        self,
+        camera: str,
+        start_time: float,
+        end_time: float,
+        desired_frame_count: int = 12,
     ) -> list[str]:
         preview_dir = os.path.join(CACHE_DIR, "preview_frames")
         file_start = f"preview_{camera}"
@@ -173,21 +178,27 @@ class ReviewDescriptionProcessor(PostProcessorApi):
                 continue
 
             if file < start_file:
+                if len(all_frames):
+                    all_frames[0] = os.path.join(preview_dir, file)
+                else:
+                    all_frames.append(os.path.join(preview_dir, file))
+
                 continue
 
             if file > end_file:
+                all_frames.append(os.path.join(preview_dir, file))
                 break
 
             all_frames.append(os.path.join(preview_dir, file))
 
         frame_count = len(all_frames)
-        if frame_count <= 10:
+        if frame_count <= desired_frame_count:
             return all_frames
 
         selected_frames = []
-        step_size = (frame_count - 1) / 9
+        step_size = (frame_count - 1) / (desired_frame_count - 1)
 
-        for i in range(10):
+        for i in range(desired_frame_count):
             index = round(i * step_size)
             selected_frames.append(all_frames[index])
 
@@ -203,19 +214,36 @@ def run_analysis(
     final_data: dict[str, str],
     thumbs: list[bytes],
     genai_config: GenAIReviewConfig,
+    labelmap_objects: list[str],
 ) -> None:
     start = datetime.datetime.now().timestamp()
+    analytics_data = {
+        "id": final_data["id"],
+        "camera": camera,
+        "zones": final_data["data"]["zones"],
+        "start": datetime.datetime.fromtimestamp(final_data["start_time"]).strftime(
+            "%A, %I:%M %p"
+        ),
+        "duration": final_data["end_time"] - final_data["start_time"],
+    }
+
+    objects = []
+    verified_objects = []
+
+    for label in set(final_data["data"]["objects"] + final_data["data"]["sub_labels"]):
+        if "-verified" in label:
+            continue
+
+        if label in labelmap_objects:
+            objects.append(label.replace("_", " ").title())
+        else:
+            verified_objects.append(label.replace("_", " ").title())
+
+    analytics_data["objects"] = objects
+    analytics_data["recognized_objects"] = verified_objects
+
     metadata = genai_client.generate_review_description(
-        {
-            "id": final_data["id"],
-            "camera": camera,
-            "objects": list(
-                filter(lambda o: "-verified" not in o, final_data["data"]["objects"])
-            ),
-            "recognized_objects": final_data["data"]["sub_labels"],
-            "zones": final_data["data"]["zones"],
-            "timestamp": datetime.datetime.fromtimestamp(final_data["end_time"]),
-        },
+        analytics_data,
         thumbs,
         genai_config.additional_concerns,
         genai_config.preferred_language,
diff --git a/frigate/data_processing/post/types.py b/frigate/data_processing/post/types.py
index 4e0534a8c..9810d1947 100644
--- a/frigate/data_processing/post/types.py
+++ b/frigate/data_processing/post/types.py
@@ -1,7 +1,9 @@
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 
 class ReviewMetadata(BaseModel):
+    model_config = ConfigDict(extra="ignore", protected_namespaces=())
+
     scene: str = Field(
         description="A comprehensive description of the setting and entities, including relevant context and plausible inferences if supported by visual evidence."
     )
diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py
index ba936db3c..4a9789097 100644
--- a/frigate/genai/__init__.py
+++ b/frigate/genai/__init__.py
@@ -90,11 +90,11 @@ Threat-level definitions:
 - 2 — Active or immediate threat: Breaking in, vandalism, aggression, weapon display.
 
 Sequence details:
-- Frame 1 = earliest, Frame 10 = latest
-- Activity occurred at {review_data["timestamp"].strftime("%I:%M %p")}
-- Detected objects: {list(set(review_data["objects"]))}
-- Recognized objects: {list(set(review_data["recognized_objects"])) or "None"}
-- Zones involved: {review_data["zones"]}
+- Frame 1 = earliest, Frame {len(thumbnails)} = latest
+- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds
+- Detected objects: {", ".join(review_data["objects"])}
+- Verified recognized objects: {", ".join(review_data["recognized_objects"]) or "None"}
+- Zones involved: {", ".join(z.replace("_", " ").title() for z in review_data["zones"]) or "None"}
 
 **IMPORTANT:**
 - Values must be plain strings, floats, or integers — no nested objects, no extra commentary.
@@ -115,13 +115,27 @@ Sequence details:
 
         response = self._send(context_prompt, thumbnails)
 
+        if debug_save:
+            with open(
+                os.path.join(
+                    CLIPS_DIR, "genai-requests", review_data["id"], "response.txt"
+                ),
+                "w",
+            ) as f:
+                f.write(response)
+
         if response:
             clean_json = re.sub(
                 r"\n?```$", "", re.sub(r"^```[a-zA-Z0-9]*\n?", "", response)
             )
 
             try:
-                return ReviewMetadata.model_validate_json(clean_json)
+                metadata = ReviewMetadata.model_validate_json(clean_json)
+
+                if review_data["recognized_objects"]:
+                    metadata.potential_threat_level = 0
+
+                return metadata
             except Exception as e:
                 # rarely LLMs can fail to follow directions on output format
                 logger.warning(