From ccbaa74a8bc6bc877cbdfc7c44285e5acb593c73 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Fri, 15 Aug 2025 07:25:49 -0600 Subject: [PATCH] Improve natural language of prompt (#19515) * Make sequence details human-readable so they are used in natural language response * Cleanup * Improve prompt and image selection * Adjust * Adjust sligtly * Format time * Adjust frame selection logic * Debug save response * Ignore extra fields * Adjust docs --- docs/docs/configuration/genai/config.md | 11 ++-- .../post/review_descriptions.py | 56 ++++++++++++++----- frigate/data_processing/post/types.py | 4 +- frigate/genai/__init__.py | 26 +++++++-- 4 files changed, 70 insertions(+), 27 deletions(-) diff --git a/docs/docs/configuration/genai/config.md b/docs/docs/configuration/genai/config.md index f7ceb804d..bb4d213e1 100644 --- a/docs/docs/configuration/genai/config.md +++ b/docs/docs/configuration/genai/config.md @@ -29,12 +29,11 @@ You must use a vision capable model with Frigate. Current model variants can be The following models are recommended: -| Model | Size | Recommended Features | -| ----------------- | ------ | -------------------- | -| `minicpm-v:8b` | 5.5 GB | Review Summary | -| `qwen2.5vl:3b` | 3.2 GB | Review Summary | -| `gemma3:4b` | 3.3 GB | All Features | -| `llava-phi3:3.8b` | 2.9 GB | All Features | +| Model | Size | Notes | +| ----------------- | ------ | ----------------------------------------------------------- | +| `gemma3:4b` | 3.3 GB | Strong frame-to-frame understanding, slower inference times | +| `qwen2.5vl:3b` | 3.2 GB | Fast but capable model with good vision comprehension | +| `llava-phi3:3.8b` | 2.9 GB | Lightweight and fast model with vision comprehension | :::note diff --git a/frigate/data_processing/post/review_descriptions.py b/frigate/data_processing/post/review_descriptions.py index 68496fc18..6293241b8 100644 --- a/frigate/data_processing/post/review_descriptions.py +++ b/frigate/data_processing/post/review_descriptions.py @@ -116,6 +116,7 @@ class ReviewDescriptionProcessor(PostProcessorApi): final_data, thumbs, camera_config.review.genai, + list(self.config.model.merged_labelmap.values()), ), ).start() @@ -160,7 +161,11 @@ class ReviewDescriptionProcessor(PostProcessorApi): return None def get_cache_frames( - self, camera: str, start_time: float, end_time: float + self, + camera: str, + start_time: float, + end_time: float, + desired_frame_count: int = 12, ) -> list[str]: preview_dir = os.path.join(CACHE_DIR, "preview_frames") file_start = f"preview_{camera}" @@ -173,21 +178,27 @@ class ReviewDescriptionProcessor(PostProcessorApi): continue if file < start_file: + if len(all_frames): + all_frames[0] = os.path.join(preview_dir, file) + else: + all_frames.append(os.path.join(preview_dir, file)) + continue if file > end_file: + all_frames.append(os.path.join(preview_dir, file)) break all_frames.append(os.path.join(preview_dir, file)) frame_count = len(all_frames) - if frame_count <= 10: + if frame_count <= desired_frame_count: return all_frames selected_frames = [] - step_size = (frame_count - 1) / 9 + step_size = (frame_count - 1) / (desired_frame_count - 1) - for i in range(10): + for i in range(desired_frame_count): index = round(i * step_size) selected_frames.append(all_frames[index]) @@ -203,19 +214,36 @@ def run_analysis( final_data: dict[str, str], thumbs: list[bytes], genai_config: GenAIReviewConfig, + labelmap_objects: list[str], ) -> None: start = datetime.datetime.now().timestamp() + analytics_data = { + "id": final_data["id"], + "camera": camera, + "zones": final_data["data"]["zones"], + "start": datetime.datetime.fromtimestamp(final_data["start_time"]).strftime( + "%A, %I:%M %p" + ), + "duration": final_data["end_time"] - final_data["start_time"], + } + + objects = [] + verified_objects = [] + + for label in set(final_data["data"]["objects"] + final_data["data"]["sub_labels"]): + if "-verified" in label: + continue + + if label in labelmap_objects: + objects.append(label.replace("_", " ").title()) + else: + verified_objects.append(label.replace("_", " ").title()) + + analytics_data["objects"] = objects + analytics_data["recognized_objects"] = verified_objects + metadata = genai_client.generate_review_description( - { - "id": final_data["id"], - "camera": camera, - "objects": list( - filter(lambda o: "-verified" not in o, final_data["data"]["objects"]) - ), - "recognized_objects": final_data["data"]["sub_labels"], - "zones": final_data["data"]["zones"], - "timestamp": datetime.datetime.fromtimestamp(final_data["end_time"]), - }, + analytics_data, thumbs, genai_config.additional_concerns, genai_config.preferred_language, diff --git a/frigate/data_processing/post/types.py b/frigate/data_processing/post/types.py index 4e0534a8c..9810d1947 100644 --- a/frigate/data_processing/post/types.py +++ b/frigate/data_processing/post/types.py @@ -1,7 +1,9 @@ -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field class ReviewMetadata(BaseModel): + model_config = ConfigDict(extra="ignore", protected_namespaces=()) + scene: str = Field( description="A comprehensive description of the setting and entities, including relevant context and plausible inferences if supported by visual evidence." ) diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py index ba936db3c..4a9789097 100644 --- a/frigate/genai/__init__.py +++ b/frigate/genai/__init__.py @@ -90,11 +90,11 @@ Threat-level definitions: - 2 — Active or immediate threat: Breaking in, vandalism, aggression, weapon display. Sequence details: -- Frame 1 = earliest, Frame 10 = latest -- Activity occurred at {review_data["timestamp"].strftime("%I:%M %p")} -- Detected objects: {list(set(review_data["objects"]))} -- Recognized objects: {list(set(review_data["recognized_objects"])) or "None"} -- Zones involved: {review_data["zones"]} +- Frame 1 = earliest, Frame {len(thumbnails)} = latest +- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds +- Detected objects: {", ".join(review_data["objects"])} +- Verified recognized objects: {", ".join(review_data["recognized_objects"]) or "None"} +- Zones involved: {", ".join(z.replace("_", " ").title() for z in review_data["zones"]) or "None"} **IMPORTANT:** - Values must be plain strings, floats, or integers — no nested objects, no extra commentary. @@ -115,13 +115,27 @@ Sequence details: response = self._send(context_prompt, thumbnails) + if debug_save: + with open( + os.path.join( + CLIPS_DIR, "genai-requests", review_data["id"], "response.txt" + ), + "w", + ) as f: + f.write(response) + if response: clean_json = re.sub( r"\n?```$", "", re.sub(r"^```[a-zA-Z0-9]*\n?", "", response) ) try: - return ReviewMetadata.model_validate_json(clean_json) + metadata = ReviewMetadata.model_validate_json(clean_json) + + if review_data["recognized_objects"]: + metadata.potential_threat_level = 0 + + return metadata except Exception as e: # rarely LLMs can fail to follow directions on output format logger.warning(