Improve natural language of prompt (#19515)

* Make sequence details human-readable so they are used in natural language response

* Cleanup

* Improve prompt and image selection

* Adjust

* Adjust sligtly

* Format time

* Adjust frame selection logic

* Debug save response

* Ignore extra fields

* Adjust docs
This commit is contained in:
Nicolas Mowen
2025-08-15 07:25:49 -06:00
committed by Blake Blackshear
parent 6671984e5a
commit ccbaa74a8b
4 changed files with 70 additions and 27 deletions

View File

@@ -116,6 +116,7 @@ class ReviewDescriptionProcessor(PostProcessorApi):
final_data,
thumbs,
camera_config.review.genai,
list(self.config.model.merged_labelmap.values()),
),
).start()
@@ -160,7 +161,11 @@ class ReviewDescriptionProcessor(PostProcessorApi):
return None
def get_cache_frames(
self, camera: str, start_time: float, end_time: float
self,
camera: str,
start_time: float,
end_time: float,
desired_frame_count: int = 12,
) -> list[str]:
preview_dir = os.path.join(CACHE_DIR, "preview_frames")
file_start = f"preview_{camera}"
@@ -173,21 +178,27 @@ class ReviewDescriptionProcessor(PostProcessorApi):
continue
if file < start_file:
if len(all_frames):
all_frames[0] = os.path.join(preview_dir, file)
else:
all_frames.append(os.path.join(preview_dir, file))
continue
if file > end_file:
all_frames.append(os.path.join(preview_dir, file))
break
all_frames.append(os.path.join(preview_dir, file))
frame_count = len(all_frames)
if frame_count <= 10:
if frame_count <= desired_frame_count:
return all_frames
selected_frames = []
step_size = (frame_count - 1) / 9
step_size = (frame_count - 1) / (desired_frame_count - 1)
for i in range(10):
for i in range(desired_frame_count):
index = round(i * step_size)
selected_frames.append(all_frames[index])
@@ -203,19 +214,36 @@ def run_analysis(
final_data: dict[str, str],
thumbs: list[bytes],
genai_config: GenAIReviewConfig,
labelmap_objects: list[str],
) -> None:
start = datetime.datetime.now().timestamp()
analytics_data = {
"id": final_data["id"],
"camera": camera,
"zones": final_data["data"]["zones"],
"start": datetime.datetime.fromtimestamp(final_data["start_time"]).strftime(
"%A, %I:%M %p"
),
"duration": final_data["end_time"] - final_data["start_time"],
}
objects = []
verified_objects = []
for label in set(final_data["data"]["objects"] + final_data["data"]["sub_labels"]):
if "-verified" in label:
continue
if label in labelmap_objects:
objects.append(label.replace("_", " ").title())
else:
verified_objects.append(label.replace("_", " ").title())
analytics_data["objects"] = objects
analytics_data["recognized_objects"] = verified_objects
metadata = genai_client.generate_review_description(
{
"id": final_data["id"],
"camera": camera,
"objects": list(
filter(lambda o: "-verified" not in o, final_data["data"]["objects"])
),
"recognized_objects": final_data["data"]["sub_labels"],
"zones": final_data["data"]["zones"],
"timestamp": datetime.datetime.fromtimestamp(final_data["end_time"]),
},
analytics_data,
thumbs,
genai_config.additional_concerns,
genai_config.preferred_language,

View File

@@ -1,7 +1,9 @@
from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field
class ReviewMetadata(BaseModel):
model_config = ConfigDict(extra="ignore", protected_namespaces=())
scene: str = Field(
description="A comprehensive description of the setting and entities, including relevant context and plausible inferences if supported by visual evidence."
)