mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-08-27 13:47:50 +02:00
Improve natural language of prompt (#19515)
* Make sequence details human-readable so they are used in natural language response * Cleanup * Improve prompt and image selection * Adjust * Adjust sligtly * Format time * Adjust frame selection logic * Debug save response * Ignore extra fields * Adjust docs
This commit is contained in:
parent
6671984e5a
commit
ccbaa74a8b
@ -29,12 +29,11 @@ You must use a vision capable model with Frigate. Current model variants can be
|
||||
|
||||
The following models are recommended:
|
||||
|
||||
| Model | Size | Recommended Features |
|
||||
| ----------------- | ------ | -------------------- |
|
||||
| `minicpm-v:8b` | 5.5 GB | Review Summary |
|
||||
| `qwen2.5vl:3b` | 3.2 GB | Review Summary |
|
||||
| `gemma3:4b` | 3.3 GB | All Features |
|
||||
| `llava-phi3:3.8b` | 2.9 GB | All Features |
|
||||
| Model | Size | Notes |
|
||||
| ----------------- | ------ | ----------------------------------------------------------- |
|
||||
| `gemma3:4b` | 3.3 GB | Strong frame-to-frame understanding, slower inference times |
|
||||
| `qwen2.5vl:3b` | 3.2 GB | Fast but capable model with good vision comprehension |
|
||||
| `llava-phi3:3.8b` | 2.9 GB | Lightweight and fast model with vision comprehension |
|
||||
|
||||
:::note
|
||||
|
||||
|
@ -116,6 +116,7 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
||||
final_data,
|
||||
thumbs,
|
||||
camera_config.review.genai,
|
||||
list(self.config.model.merged_labelmap.values()),
|
||||
),
|
||||
).start()
|
||||
|
||||
@ -160,7 +161,11 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
||||
return None
|
||||
|
||||
def get_cache_frames(
|
||||
self, camera: str, start_time: float, end_time: float
|
||||
self,
|
||||
camera: str,
|
||||
start_time: float,
|
||||
end_time: float,
|
||||
desired_frame_count: int = 12,
|
||||
) -> list[str]:
|
||||
preview_dir = os.path.join(CACHE_DIR, "preview_frames")
|
||||
file_start = f"preview_{camera}"
|
||||
@ -173,21 +178,27 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
||||
continue
|
||||
|
||||
if file < start_file:
|
||||
if len(all_frames):
|
||||
all_frames[0] = os.path.join(preview_dir, file)
|
||||
else:
|
||||
all_frames.append(os.path.join(preview_dir, file))
|
||||
|
||||
continue
|
||||
|
||||
if file > end_file:
|
||||
all_frames.append(os.path.join(preview_dir, file))
|
||||
break
|
||||
|
||||
all_frames.append(os.path.join(preview_dir, file))
|
||||
|
||||
frame_count = len(all_frames)
|
||||
if frame_count <= 10:
|
||||
if frame_count <= desired_frame_count:
|
||||
return all_frames
|
||||
|
||||
selected_frames = []
|
||||
step_size = (frame_count - 1) / 9
|
||||
step_size = (frame_count - 1) / (desired_frame_count - 1)
|
||||
|
||||
for i in range(10):
|
||||
for i in range(desired_frame_count):
|
||||
index = round(i * step_size)
|
||||
selected_frames.append(all_frames[index])
|
||||
|
||||
@ -203,19 +214,36 @@ def run_analysis(
|
||||
final_data: dict[str, str],
|
||||
thumbs: list[bytes],
|
||||
genai_config: GenAIReviewConfig,
|
||||
labelmap_objects: list[str],
|
||||
) -> None:
|
||||
start = datetime.datetime.now().timestamp()
|
||||
analytics_data = {
|
||||
"id": final_data["id"],
|
||||
"camera": camera,
|
||||
"zones": final_data["data"]["zones"],
|
||||
"start": datetime.datetime.fromtimestamp(final_data["start_time"]).strftime(
|
||||
"%A, %I:%M %p"
|
||||
),
|
||||
"duration": final_data["end_time"] - final_data["start_time"],
|
||||
}
|
||||
|
||||
objects = []
|
||||
verified_objects = []
|
||||
|
||||
for label in set(final_data["data"]["objects"] + final_data["data"]["sub_labels"]):
|
||||
if "-verified" in label:
|
||||
continue
|
||||
|
||||
if label in labelmap_objects:
|
||||
objects.append(label.replace("_", " ").title())
|
||||
else:
|
||||
verified_objects.append(label.replace("_", " ").title())
|
||||
|
||||
analytics_data["objects"] = objects
|
||||
analytics_data["recognized_objects"] = verified_objects
|
||||
|
||||
metadata = genai_client.generate_review_description(
|
||||
{
|
||||
"id": final_data["id"],
|
||||
"camera": camera,
|
||||
"objects": list(
|
||||
filter(lambda o: "-verified" not in o, final_data["data"]["objects"])
|
||||
),
|
||||
"recognized_objects": final_data["data"]["sub_labels"],
|
||||
"zones": final_data["data"]["zones"],
|
||||
"timestamp": datetime.datetime.fromtimestamp(final_data["end_time"]),
|
||||
},
|
||||
analytics_data,
|
||||
thumbs,
|
||||
genai_config.additional_concerns,
|
||||
genai_config.preferred_language,
|
||||
|
@ -1,7 +1,9 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class ReviewMetadata(BaseModel):
|
||||
model_config = ConfigDict(extra="ignore", protected_namespaces=())
|
||||
|
||||
scene: str = Field(
|
||||
description="A comprehensive description of the setting and entities, including relevant context and plausible inferences if supported by visual evidence."
|
||||
)
|
||||
|
@ -90,11 +90,11 @@ Threat-level definitions:
|
||||
- 2 — Active or immediate threat: Breaking in, vandalism, aggression, weapon display.
|
||||
|
||||
Sequence details:
|
||||
- Frame 1 = earliest, Frame 10 = latest
|
||||
- Activity occurred at {review_data["timestamp"].strftime("%I:%M %p")}
|
||||
- Detected objects: {list(set(review_data["objects"]))}
|
||||
- Recognized objects: {list(set(review_data["recognized_objects"])) or "None"}
|
||||
- Zones involved: {review_data["zones"]}
|
||||
- Frame 1 = earliest, Frame {len(thumbnails)} = latest
|
||||
- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds
|
||||
- Detected objects: {", ".join(review_data["objects"])}
|
||||
- Verified recognized objects: {", ".join(review_data["recognized_objects"]) or "None"}
|
||||
- Zones involved: {", ".join(z.replace("_", " ").title() for z in review_data["zones"]) or "None"}
|
||||
|
||||
**IMPORTANT:**
|
||||
- Values must be plain strings, floats, or integers — no nested objects, no extra commentary.
|
||||
@ -115,13 +115,27 @@ Sequence details:
|
||||
|
||||
response = self._send(context_prompt, thumbnails)
|
||||
|
||||
if debug_save:
|
||||
with open(
|
||||
os.path.join(
|
||||
CLIPS_DIR, "genai-requests", review_data["id"], "response.txt"
|
||||
),
|
||||
"w",
|
||||
) as f:
|
||||
f.write(response)
|
||||
|
||||
if response:
|
||||
clean_json = re.sub(
|
||||
r"\n?```$", "", re.sub(r"^```[a-zA-Z0-9]*\n?", "", response)
|
||||
)
|
||||
|
||||
try:
|
||||
return ReviewMetadata.model_validate_json(clean_json)
|
||||
metadata = ReviewMetadata.model_validate_json(clean_json)
|
||||
|
||||
if review_data["recognized_objects"]:
|
||||
metadata.potential_threat_level = 0
|
||||
|
||||
return metadata
|
||||
except Exception as e:
|
||||
# rarely LLMs can fail to follow directions on output format
|
||||
logger.warning(
|
||||
|
Loading…
Reference in New Issue
Block a user