Refactor genai (#22752)

* Switch to a feature-based roles so it is easier to choose models for different tasks

* Fallback and try llama-swap format

* List models supported by provider

* Cleanup

* Add frontend

* Improve model loading

* Make it possible to update genai without restarting

* Cleanup

* Cleanup

* Mypy
This commit is contained in:
Nicolas Mowen
2026-04-03 17:13:52 -06:00
committed by GitHub
parent bb77a01779
commit 9cb76d0bd9
20 changed files with 363 additions and 140 deletions

View File

@@ -16,7 +16,7 @@ from frigate.config import CameraConfig, FrigateConfig
from frigate.const import CLIPS_DIR, UPDATE_EVENT_DESCRIPTION
from frigate.data_processing.post.semantic_trigger import SemanticTriggerProcessor
from frigate.data_processing.types import PostProcessDataEnum
from frigate.genai import GenAIClient
from frigate.genai.manager import GenAIClientManager
from frigate.models import Event
from frigate.types import TrackedObjectUpdateTypesEnum
from frigate.util.builtin import EventsPerSecond, InferenceSpeed
@@ -41,7 +41,7 @@ class ObjectDescriptionProcessor(PostProcessorApi):
embeddings: "Embeddings",
requestor: InterProcessRequestor,
metrics: DataProcessorMetrics,
client: GenAIClient,
genai_manager: GenAIClientManager,
semantic_trigger_processor: SemanticTriggerProcessor | None,
):
super().__init__(config, metrics, None)
@@ -49,7 +49,7 @@ class ObjectDescriptionProcessor(PostProcessorApi):
self.embeddings = embeddings
self.requestor = requestor
self.metrics = metrics
self.genai_client = client
self.genai_manager = genai_manager
self.semantic_trigger_processor = semantic_trigger_processor
self.tracked_events: dict[str, list[Any]] = {}
self.early_request_sent: dict[str, bool] = {}
@@ -198,6 +198,9 @@ class ObjectDescriptionProcessor(PostProcessorApi):
if data_type != PostProcessDataEnum.tracked_object:
return
if self.genai_manager.description_client is None:
return
state: str | None = frame_data.get("state", None)
if state is not None:
@@ -329,7 +332,12 @@ class ObjectDescriptionProcessor(PostProcessorApi):
"""Embed the description for an event."""
start = datetime.datetime.now().timestamp()
camera_config = self.config.cameras[str(event.camera)]
description = self.genai_client.generate_object_description(
client = self.genai_manager.description_client
if client is None:
return
description = client.generate_object_description(
camera_config, thumbnails, event
)

View File

@@ -22,6 +22,7 @@ from frigate.config.camera.review import GenAIReviewConfig, ImageSourceEnum
from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION
from frigate.data_processing.types import PostProcessDataEnum
from frigate.genai import GenAIClient
from frigate.genai.manager import GenAIClientManager
from frigate.models import Recordings, ReviewSegment
from frigate.util.builtin import EventsPerSecond, InferenceSpeed
from frigate.util.image import get_image_from_recording
@@ -41,12 +42,12 @@ class ReviewDescriptionProcessor(PostProcessorApi):
config: FrigateConfig,
requestor: InterProcessRequestor,
metrics: DataProcessorMetrics,
client: GenAIClient,
genai_manager: GenAIClientManager,
):
super().__init__(config, metrics, None)
self.requestor = requestor
self.metrics = metrics
self.genai_client = client
self.genai_manager = genai_manager
self.review_desc_speed = InferenceSpeed(self.metrics.review_desc_speed)
self.review_desc_dps = EventsPerSecond()
self.review_desc_dps.start()
@@ -63,7 +64,12 @@ class ReviewDescriptionProcessor(PostProcessorApi):
Estimates ~1 token per 1250 pixels. Targets 98% context utilization with safety margin.
Capped at 20 frames.
"""
context_size = self.genai_client.get_context_size()
client = self.genai_manager.description_client
if client is None:
return 3
context_size = client.get_context_size()
camera_config = self.config.cameras[camera]
detect_width = camera_config.detect.width
@@ -111,6 +117,9 @@ class ReviewDescriptionProcessor(PostProcessorApi):
if data_type != PostProcessDataEnum.review:
return
if self.genai_manager.description_client is None:
return
camera = data["after"]["camera"]
camera_config = self.config.cameras[camera]
@@ -200,7 +209,7 @@ class ReviewDescriptionProcessor(PostProcessorApi):
target=run_analysis,
args=(
self.requestor,
self.genai_client,
self.genai_manager.description_client,
self.review_desc_speed,
camera_config,
final_data,
@@ -316,7 +325,12 @@ class ReviewDescriptionProcessor(PostProcessorApi):
os.path.join(CLIPS_DIR, "genai-requests", f"{start_ts}-{end_ts}")
).mkdir(parents=True, exist_ok=True)
return self.genai_client.generate_review_summary(
client = self.genai_manager.description_client
if client is None:
return None
return client.generate_review_summary(
start_ts,
end_ts,
events_with_context,