Improve natural language of prompt (#19515)

* Make sequence details human-readable so they are used in natural language response

* Cleanup

* Improve prompt and image selection

* Adjust

* Adjust sligtly

* Format time

* Adjust frame selection logic

* Debug save response

* Ignore extra fields

* Adjust docs
This commit is contained in:
Nicolas Mowen 2025-08-15 07:25:49 -06:00 committed by Blake Blackshear
parent 6671984e5a
commit ccbaa74a8b
4 changed files with 70 additions and 27 deletions

View File

@ -29,12 +29,11 @@ You must use a vision capable model with Frigate. Current model variants can be
The following models are recommended:
| Model | Size | Recommended Features |
| ----------------- | ------ | -------------------- |
| `minicpm-v:8b` | 5.5 GB | Review Summary |
| `qwen2.5vl:3b` | 3.2 GB | Review Summary |
| `gemma3:4b` | 3.3 GB | All Features |
| `llava-phi3:3.8b` | 2.9 GB | All Features |
| Model | Size | Notes |
| ----------------- | ------ | ----------------------------------------------------------- |
| `gemma3:4b` | 3.3 GB | Strong frame-to-frame understanding, slower inference times |
| `qwen2.5vl:3b` | 3.2 GB | Fast but capable model with good vision comprehension |
| `llava-phi3:3.8b` | 2.9 GB | Lightweight and fast model with vision comprehension |
:::note

View File

@ -116,6 +116,7 @@ class ReviewDescriptionProcessor(PostProcessorApi):
final_data,
thumbs,
camera_config.review.genai,
list(self.config.model.merged_labelmap.values()),
),
).start()
@ -160,7 +161,11 @@ class ReviewDescriptionProcessor(PostProcessorApi):
return None
def get_cache_frames(
self, camera: str, start_time: float, end_time: float
self,
camera: str,
start_time: float,
end_time: float,
desired_frame_count: int = 12,
) -> list[str]:
preview_dir = os.path.join(CACHE_DIR, "preview_frames")
file_start = f"preview_{camera}"
@ -173,21 +178,27 @@ class ReviewDescriptionProcessor(PostProcessorApi):
continue
if file < start_file:
if len(all_frames):
all_frames[0] = os.path.join(preview_dir, file)
else:
all_frames.append(os.path.join(preview_dir, file))
continue
if file > end_file:
all_frames.append(os.path.join(preview_dir, file))
break
all_frames.append(os.path.join(preview_dir, file))
frame_count = len(all_frames)
if frame_count <= 10:
if frame_count <= desired_frame_count:
return all_frames
selected_frames = []
step_size = (frame_count - 1) / 9
step_size = (frame_count - 1) / (desired_frame_count - 1)
for i in range(10):
for i in range(desired_frame_count):
index = round(i * step_size)
selected_frames.append(all_frames[index])
@ -203,19 +214,36 @@ def run_analysis(
final_data: dict[str, str],
thumbs: list[bytes],
genai_config: GenAIReviewConfig,
labelmap_objects: list[str],
) -> None:
start = datetime.datetime.now().timestamp()
analytics_data = {
"id": final_data["id"],
"camera": camera,
"zones": final_data["data"]["zones"],
"start": datetime.datetime.fromtimestamp(final_data["start_time"]).strftime(
"%A, %I:%M %p"
),
"duration": final_data["end_time"] - final_data["start_time"],
}
objects = []
verified_objects = []
for label in set(final_data["data"]["objects"] + final_data["data"]["sub_labels"]):
if "-verified" in label:
continue
if label in labelmap_objects:
objects.append(label.replace("_", " ").title())
else:
verified_objects.append(label.replace("_", " ").title())
analytics_data["objects"] = objects
analytics_data["recognized_objects"] = verified_objects
metadata = genai_client.generate_review_description(
{
"id": final_data["id"],
"camera": camera,
"objects": list(
filter(lambda o: "-verified" not in o, final_data["data"]["objects"])
),
"recognized_objects": final_data["data"]["sub_labels"],
"zones": final_data["data"]["zones"],
"timestamp": datetime.datetime.fromtimestamp(final_data["end_time"]),
},
analytics_data,
thumbs,
genai_config.additional_concerns,
genai_config.preferred_language,

View File

@ -1,7 +1,9 @@
from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field
class ReviewMetadata(BaseModel):
model_config = ConfigDict(extra="ignore", protected_namespaces=())
scene: str = Field(
description="A comprehensive description of the setting and entities, including relevant context and plausible inferences if supported by visual evidence."
)

View File

@ -90,11 +90,11 @@ Threat-level definitions:
- 2 Active or immediate threat: Breaking in, vandalism, aggression, weapon display.
Sequence details:
- Frame 1 = earliest, Frame 10 = latest
- Activity occurred at {review_data["timestamp"].strftime("%I:%M %p")}
- Detected objects: {list(set(review_data["objects"]))}
- Recognized objects: {list(set(review_data["recognized_objects"])) or "None"}
- Zones involved: {review_data["zones"]}
- Frame 1 = earliest, Frame {len(thumbnails)} = latest
- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds
- Detected objects: {", ".join(review_data["objects"])}
- Verified recognized objects: {", ".join(review_data["recognized_objects"]) or "None"}
- Zones involved: {", ".join(z.replace("_", " ").title() for z in review_data["zones"]) or "None"}
**IMPORTANT:**
- Values must be plain strings, floats, or integers no nested objects, no extra commentary.
@ -115,13 +115,27 @@ Sequence details:
response = self._send(context_prompt, thumbnails)
if debug_save:
with open(
os.path.join(
CLIPS_DIR, "genai-requests", review_data["id"], "response.txt"
),
"w",
) as f:
f.write(response)
if response:
clean_json = re.sub(
r"\n?```$", "", re.sub(r"^```[a-zA-Z0-9]*\n?", "", response)
)
try:
return ReviewMetadata.model_validate_json(clean_json)
metadata = ReviewMetadata.model_validate_json(clean_json)
if review_data["recognized_objects"]:
metadata.potential_threat_level = 0
return metadata
except Exception as e:
# rarely LLMs can fail to follow directions on output format
logger.warning(