mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-03-07 02:18:07 +01:00
* Update version * Create scaffolding for case management (#21293) * implement case management for export apis (#21295) * refactor vainfo to search for first GPU (#21296) use existing LibvaGpuSelector to pick appropritate libva device * Case management UI (#21299) * Refactor export cards to match existing cards in other UI pages * Show cases separately from exports * Add proper filtering and display of cases * Add ability to edit and select cases for exports * Cleanup typing * Hide if no unassigned * Cleanup hiding logic * fix scrolling * Improve layout * Camera connection quality indicator (#21297) * add camera connection quality metrics and indicator * formatting * move stall calcs to watchdog * clean up * change watchdog to 1s and separately track time for ffmpeg retry_interval * implement status caching to reduce message volume * Export filter UI (#21322) * Get started on export filters * implement basic filter * Implement filtering and adjust api * Improve filter handling * Improve navigation * Cleanup * handle scrolling * Refactor temperature reporting for detectors and implement Hailo temp reading (#21395) * Add Hailo temperature retrieval * Refactor `get_hailo_temps()` to use ctxmanager * Show Hailo temps in system UI * Move hailo_platform import to get_hailo_temps * Refactor temperatures calculations to use within detector block * Adjust webUI to handle new location --------- Co-authored-by: tigattack <10629864+tigattack@users.noreply.github.com> * Camera-specific hwaccel settings for timelapse exports (correct base) (#21386) * added hwaccel_args to camera.record.export config struct * populate camera.record.export.hwaccel_args with a cascade up to camera then global if 'auto' * use new hwaccel args in export * added documentation for camera-specific hwaccel export * fix c/p error * missed an import * fleshed out the docs and comments a bit * ruff lint * separated out the tips in the doc * fix documentation * fix and simplify reference config doc * Add support for GPU and NPU temperatures (#21495) * Add rockchip temps * Add support for GPU and NPU temperatures in the frontend * Add support for Nvidia temperature * Improve separation * Adjust graph scaling * Exports Improvements (#21521) * Add images to case folder view * Add ability to select case in export dialog * Add to mobile review too * Add API to handle deleting recordings (#21520) * Add recording delete API * Re-organize recordings apis * Fix import * Consolidate query types * Add media sync API endpoint (#21526) * add media cleanup functions * add endpoint * remove scheduled sync recordings from cleanup * move to utils dir * tweak import * remove sync_recordings and add config migrator * remove sync_recordings * docs * remove key * clean up docs * docs fix * docs tweak * Media sync API refactor and UI (#21542) * generic job infrastructure * types and dispatcher changes for jobs * save data in memory only for completed jobs * implement media sync job and endpoints * change logs to debug * websocket hook and types * frontend * i18n * docs tweaks * endpoint descriptions * tweak docs * use same logging pattern in sync_recordings as the other sync functions (#21625) * Fix incorrect counting in sync_recordings (#21626) * Update go2rtc to v1.9.13 (#21648) Co-authored-by: Eugeny Tulupov <eugeny.tulupov@spirent.com> * Refactor Time-Lapse Export (#21668) * refactor time lapse creation to be a separate API call with ability to pass arbitrary ffmpeg args * Add CPU fallback * Optimize empty directory cleanup for recordings (#21695) The previous empty directory cleanup did a full recursive directory walk, which can be extremely slow. This new implementation only removes directories which have a chance of being empty due to a recent file deletion. * Implement llama.cpp GenAI Provider (#21690) * Implement llama.cpp GenAI Provider * Add docs * Update links * Fix broken mqtt links * Fix more broken anchors * Remove parents in remove_empty_directories (#21726) The original implementation did a full directory tree walk to find and remove empty directories, so this implementation should remove the parents as well, like the original did. * Implement LLM Chat API with tool calling support (#21731) * Implement initial tools definiton APIs * Add initial chat completion API with tool support * Implement other providers * Cleanup * Offline preview image (#21752) * use latest preview frame for latest image when camera is offline * remove frame extraction logic * tests * frontend * add description to api endpoint * Update to ROCm 7.2.0 (#21753) * Update to ROCm 7.2.0 * ROCm now works properly with JinaV1 * Arcface has compilation error * Add live context tool to LLM (#21754) * Add live context tool * Improve handling of images in request * Improve prompt caching * Add networking options for configuring listening ports (#21779) * feat: add X-Frame-Time when returning snapshot (#21932) Co-authored-by: Florent MORICONI <170678386+fmcloudconsulting@users.noreply.github.com> * Improve jsmpeg player websocket handling (#21943) * improve jsmpeg player websocket handling prevent websocket console messages from appearing when player is destroyed * reformat files after ruff upgrade * Allow API Events to be Detections or Alerts, depending on the Event Label (#21923) * - API created events will be alerts OR detections, depending on the event label, defaulting to alerts - Indefinite API events will extend the recording segment until those events are ended - API event start time is the actual start time, instead of having a pre-buffer of record.event_pre_capture * Instead of checking for indefinite events on a camera before deciding if we should end the segment, only update last_detection_time and last_alert_time if frame_time is greater, which should have the same effect * Add the ability to set a pre_capture number of seconds when creating a manual event via the API. Default behavior unchanged * Remove unnecessary _publish_segment_start() call * Formatting * handle last_alert_time or last_detection_time being None when checking them against the frame_time * comment manual_info["label"].split(": ")[0] for clarity * ffmpeg Preview Segment Optimization for "high" and "very_high" (#21996) * Introduce qmax parameter for ffmpeg preview encoding Added PREVIEW_QMAX_PARAM to control ffmpeg encoding quality. * formatting * Fix spacing in qmax parameters for preview quality * Adapt to new Gemini format * Fix frame time access * Remove exceptions * Cleanup --------- Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Co-authored-by: tigattack <10629864+tigattack@users.noreply.github.com> Co-authored-by: Andrew Roberts <adroberts@gmail.com> Co-authored-by: Eugeny Tulupov <zhekka3@gmail.com> Co-authored-by: Eugeny Tulupov <eugeny.tulupov@spirent.com> Co-authored-by: John Shaw <1753078+johnshaw@users.noreply.github.com> Co-authored-by: Eric Work <work.eric@gmail.com> Co-authored-by: FL42 <46161216+fl42@users.noreply.github.com> Co-authored-by: Florent MORICONI <170678386+fmcloudconsulting@users.noreply.github.com> Co-authored-by: nulledy <254504350+nulledy@users.noreply.github.com>
239 lines
7.9 KiB
Python
239 lines
7.9 KiB
Python
"""llama.cpp Provider for Frigate AI."""
|
|
|
|
import base64
|
|
import json
|
|
import logging
|
|
from typing import Any, Optional
|
|
|
|
import requests
|
|
|
|
from frigate.config import GenAIProviderEnum
|
|
from frigate.genai import GenAIClient, register_genai_provider
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@register_genai_provider(GenAIProviderEnum.llamacpp)
|
|
class LlamaCppClient(GenAIClient):
|
|
"""Generative AI client for Frigate using llama.cpp server."""
|
|
|
|
LOCAL_OPTIMIZED_OPTIONS = {
|
|
"temperature": 0.7,
|
|
"repeat_penalty": 1.05,
|
|
"top_p": 0.8,
|
|
}
|
|
|
|
provider: str # base_url
|
|
provider_options: dict[str, Any]
|
|
|
|
def _init_provider(self):
|
|
"""Initialize the client."""
|
|
self.provider_options = {
|
|
**self.LOCAL_OPTIMIZED_OPTIONS,
|
|
**self.genai_config.provider_options,
|
|
}
|
|
return (
|
|
self.genai_config.base_url.rstrip("/")
|
|
if self.genai_config.base_url
|
|
else None
|
|
)
|
|
|
|
def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
|
|
"""Submit a request to llama.cpp server."""
|
|
if self.provider is None:
|
|
logger.warning(
|
|
"llama.cpp provider has not been initialized, a description will not be generated. Check your llama.cpp configuration."
|
|
)
|
|
return None
|
|
|
|
try:
|
|
content = []
|
|
for image in images:
|
|
encoded_image = base64.b64encode(image).decode("utf-8")
|
|
content.append(
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:image/jpeg;base64,{encoded_image}",
|
|
},
|
|
}
|
|
)
|
|
content.append(
|
|
{
|
|
"type": "text",
|
|
"text": prompt,
|
|
}
|
|
)
|
|
|
|
# Build request payload with llama.cpp native options
|
|
payload = {
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": content,
|
|
},
|
|
],
|
|
**self.provider_options,
|
|
}
|
|
|
|
response = requests.post(
|
|
f"{self.provider}/v1/chat/completions",
|
|
json=payload,
|
|
timeout=self.timeout,
|
|
)
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
|
|
if (
|
|
result is not None
|
|
and "choices" in result
|
|
and len(result["choices"]) > 0
|
|
):
|
|
choice = result["choices"][0]
|
|
if "message" in choice and "content" in choice["message"]:
|
|
return choice["message"]["content"].strip()
|
|
return None
|
|
except Exception as e:
|
|
logger.warning("llama.cpp returned an error: %s", str(e))
|
|
return None
|
|
|
|
def get_context_size(self) -> int:
|
|
"""Get the context window size for llama.cpp."""
|
|
return self.genai_config.provider_options.get("context_size", 4096)
|
|
|
|
def chat_with_tools(
|
|
self,
|
|
messages: list[dict[str, Any]],
|
|
tools: Optional[list[dict[str, Any]]] = None,
|
|
tool_choice: Optional[str] = "auto",
|
|
) -> dict[str, Any]:
|
|
"""
|
|
Send chat messages to llama.cpp server with optional tool definitions.
|
|
|
|
Uses the OpenAI-compatible endpoint but passes through all native llama.cpp
|
|
parameters (like slot_id, temperature, etc.) via provider_options.
|
|
"""
|
|
if self.provider is None:
|
|
logger.warning(
|
|
"llama.cpp provider has not been initialized. Check your llama.cpp configuration."
|
|
)
|
|
return {
|
|
"content": None,
|
|
"tool_calls": None,
|
|
"finish_reason": "error",
|
|
}
|
|
|
|
try:
|
|
openai_tool_choice = None
|
|
if tool_choice:
|
|
if tool_choice == "none":
|
|
openai_tool_choice = "none"
|
|
elif tool_choice == "auto":
|
|
openai_tool_choice = "auto"
|
|
elif tool_choice == "required":
|
|
openai_tool_choice = "required"
|
|
|
|
payload = {
|
|
"messages": messages,
|
|
}
|
|
|
|
if tools:
|
|
payload["tools"] = tools
|
|
if openai_tool_choice is not None:
|
|
payload["tool_choice"] = openai_tool_choice
|
|
|
|
provider_opts = {
|
|
k: v for k, v in self.provider_options.items() if k != "context_size"
|
|
}
|
|
payload.update(provider_opts)
|
|
|
|
response = requests.post(
|
|
f"{self.provider}/v1/chat/completions",
|
|
json=payload,
|
|
timeout=self.timeout,
|
|
)
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
|
|
if result is None or "choices" not in result or len(result["choices"]) == 0:
|
|
return {
|
|
"content": None,
|
|
"tool_calls": None,
|
|
"finish_reason": "error",
|
|
}
|
|
|
|
choice = result["choices"][0]
|
|
message = choice.get("message", {})
|
|
|
|
content = message.get("content")
|
|
if content:
|
|
content = content.strip()
|
|
else:
|
|
content = None
|
|
|
|
tool_calls = None
|
|
if "tool_calls" in message and message["tool_calls"]:
|
|
tool_calls = []
|
|
for tool_call in message["tool_calls"]:
|
|
try:
|
|
function_data = tool_call.get("function", {})
|
|
arguments_str = function_data.get("arguments", "{}")
|
|
arguments = json.loads(arguments_str)
|
|
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
|
logger.warning(
|
|
f"Failed to parse tool call arguments: {e}, "
|
|
f"tool: {function_data.get('name', 'unknown')}"
|
|
)
|
|
arguments = {}
|
|
|
|
tool_calls.append(
|
|
{
|
|
"id": tool_call.get("id", ""),
|
|
"name": function_data.get("name", ""),
|
|
"arguments": arguments,
|
|
}
|
|
)
|
|
|
|
finish_reason = "error"
|
|
if "finish_reason" in choice and choice["finish_reason"]:
|
|
finish_reason = choice["finish_reason"]
|
|
elif tool_calls:
|
|
finish_reason = "tool_calls"
|
|
elif content:
|
|
finish_reason = "stop"
|
|
|
|
return {
|
|
"content": content,
|
|
"tool_calls": tool_calls,
|
|
"finish_reason": finish_reason,
|
|
}
|
|
|
|
except requests.exceptions.Timeout as e:
|
|
logger.warning("llama.cpp request timed out: %s", str(e))
|
|
return {
|
|
"content": None,
|
|
"tool_calls": None,
|
|
"finish_reason": "error",
|
|
}
|
|
except requests.exceptions.RequestException as e:
|
|
error_detail = str(e)
|
|
if hasattr(e, "response") and e.response is not None:
|
|
try:
|
|
error_body = e.response.text
|
|
error_detail = f"{str(e)} - Response: {error_body[:500]}"
|
|
except Exception:
|
|
pass
|
|
logger.warning("llama.cpp returned an error: %s", error_detail)
|
|
return {
|
|
"content": None,
|
|
"tool_calls": None,
|
|
"finish_reason": "error",
|
|
}
|
|
except Exception as e:
|
|
logger.warning("Unexpected error in llama.cpp chat_with_tools: %s", str(e))
|
|
return {
|
|
"content": None,
|
|
"tool_calls": None,
|
|
"finish_reason": "error",
|
|
}
|